@article{ART003277342},
author={Dong-Wan Kim and Hyun-Soo Kim and Kyung-Yeob Park and MinSoo Kim and Shin DongMyung},
title={An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies},
journal={Journal of Software Assessment and Valuation},
issn={2092-8114},
year={2025},
volume={21},
number={4},
pages={1-17}
TY - JOUR
AU - Dong-Wan Kim
AU - Hyun-Soo Kim
AU - Kyung-Yeob Park
AU - MinSoo Kim
AU - Shin DongMyung
TI - An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies
JO - Journal of Software Assessment and Valuation
PY - 2025
VL - 21
IS - 4
PB - Korea Software Assessment and Valuation Society
SP - 1
EP - 17
SN - 2092-8114
AB - In large-scale information systems and web services, log-based anomaly detection is a key means of capturing early signs of ransomware and other malware. However, unsupervised methods that rely on raw log text and limited feature engineering perform poorly on real security logs with imbalanced labels and multi-stage attacks. This paper proposes an LLM-based log embedding pipeline that combines three representations-raw logs, embeddings from pre-trained Llama language models, and domain-fine-tuned embeddings for security logs-with statistical and deep anomaly detection models, using about 800,000 web access and system audit log entries. Under a common data split, embedding-based representations raise the binary F1-score of most models to roughly 2.5 times the raw-log baseline and more than threefold for rare attack types, demonstrating their effectiveness as a common input representation for malware anomaly detection and early-warning systems.
KW - Anomaly detection;LLM;Log embedding;Malware intrusion detection;;Representation learning
DO -
UR -
ER -
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim and Shin DongMyung. (2025). An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies. Journal of Software Assessment and Valuation, 21(4), 1-17.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim and Shin DongMyung. 2025, "An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies", Journal of Software Assessment and Valuation, vol.21, no.4 pp.1-17.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim, Shin DongMyung "An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies" Journal of Software Assessment and Valuation 21.4 pp.1-17 (2025) : 1.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim, Shin DongMyung. An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies. 2025; 21(4), 1-17.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim and Shin DongMyung. "An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies" Journal of Software Assessment and Valuation 21, no.4 (2025) : 1-17.
Dong-Wan Kim; Hyun-Soo Kim; Kyung-Yeob Park; MinSoo Kim; Shin DongMyung. An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies. Journal of Software Assessment and Valuation, 21(4), 1-17.
Dong-Wan Kim; Hyun-Soo Kim; Kyung-Yeob Park; MinSoo Kim; Shin DongMyung. An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies. Journal of Software Assessment and Valuation. 2025; 21(4) 1-17.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim, Shin DongMyung. An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies. 2025; 21(4), 1-17.
Dong-Wan Kim, Hyun-Soo Kim, Kyung-Yeob Park, MinSoo Kim and Shin DongMyung. "An LLM-based Log Embedding Representation Learning Approach for Detecting Early-stage Malware Anomalies" Journal of Software Assessment and Valuation 21, no.4 (2025) : 1-17.