@article{ART003258853},
author={Doo-Yong Jeon},
title={Fine-Tuning Large Language Models for Security Log Data Labeling},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2025},
volume={30},
number={10},
pages={143-154}
TY - JOUR
AU - Doo-Yong Jeon
TI - Fine-Tuning Large Language Models for Security Log Data Labeling
JO - Journal of The Korea Society of Computer and Information
PY - 2025
VL - 30
IS - 10
PB - The Korean Society Of Computer And Information
SP - 143
EP - 154
SN - 1598-849X
AB - This study proposes a data sampling method called CoreShot Filter to address the high cost and subjective judgment issues in labeling security log data. CoreShot Filter combines the concepts of representativeness and uncertainty from active learning to select optimal data for fine-tuning large language models (LLMs). It defines uncertainty using discrepancies between weak learners and manual labels, while representativeness is measured through similarity with persona data generated by genetic algorithms. From over 310,000 logs, 204 core samples were selected and used to fine-tune GPT-4o mini. Experimental results demonstrate that CoreShot Filter outperforms stratified, outlier, and coreset sampling in terms of accuracy, recall, and F1-score. In particular, it achieved superior performance in abnormal detection (Recall 0.8901) and precision (0.9489), proving that CoreShot Filter is an effective method for improving security log analysis and LLM-based labeling efficiency.
KW - AI Security;Data Labeling;LLM;Fine Tuning;Data Sampling;CoreShot Filter
DO -
UR -
ER -
Doo-Yong Jeon. (2025). Fine-Tuning Large Language Models for Security Log Data Labeling. Journal of The Korea Society of Computer and Information, 30(10), 143-154.
Doo-Yong Jeon. 2025, "Fine-Tuning Large Language Models for Security Log Data Labeling", Journal of The Korea Society of Computer and Information, vol.30, no.10 pp.143-154.
Doo-Yong Jeon "Fine-Tuning Large Language Models for Security Log Data Labeling" Journal of The Korea Society of Computer and Information 30.10 pp.143-154 (2025) : 143.
Doo-Yong Jeon. Fine-Tuning Large Language Models for Security Log Data Labeling. 2025; 30(10), 143-154.
Doo-Yong Jeon. "Fine-Tuning Large Language Models for Security Log Data Labeling" Journal of The Korea Society of Computer and Information 30, no.10 (2025) : 143-154.
Doo-Yong Jeon. Fine-Tuning Large Language Models for Security Log Data Labeling. Journal of The Korea Society of Computer and Information, 30(10), 143-154.
Doo-Yong Jeon. Fine-Tuning Large Language Models for Security Log Data Labeling. Journal of The Korea Society of Computer and Information. 2025; 30(10) 143-154.
Doo-Yong Jeon. Fine-Tuning Large Language Models for Security Log Data Labeling. 2025; 30(10), 143-154.
Doo-Yong Jeon. "Fine-Tuning Large Language Models for Security Log Data Labeling" Journal of The Korea Society of Computer and Information 30, no.10 (2025) : 143-154.