@article{ART003300147},
author={Eun ji Song and Seong-Cho Hong and Ah Reum Kang},
title={An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2026},
volume={31},
number={1},
pages={109-120}
TY - JOUR
AU - Eun ji Song
AU - Seong-Cho Hong
AU - Ah Reum Kang
TI - An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System
JO - Journal of The Korea Society of Computer and Information
PY - 2026
VL - 31
IS - 1
PB - The Korean Society Of Computer And Information
SP - 109
EP - 120
SN - 1598-849X
AB - With the rapid proliferation of web applications, HTTP-based cyberattacks continue to rise, underscoring the need for effective web attack detection systems. This study proposes and evaluates a detection system that combines TF-IDF feature extraction with multiple machine-learning classifiers.
Treating HTTP request data as text, we apply natural language processing techniques and assess Logistic Regression, Random Forest, and XGBoost using the CSIC 2010 HTTP dataset. Experiments show that XGBoost achieves the best performance with 98.77% accuracy, 0.994 ROC AUC, PR AUC, while Random Forest and Logistic Regression attain accuracies of 97.50% and 97.83%, respectively. All models deliver precision above 96%, demonstrating their viability for deployment in real-world environments. The results indicate that interpretable machine-learning approaches can achieve competitive performance without resorting to complex deep learning models.
KW - Web Attack Detection;TF-IDF;Logistic Regression;Random Forest;XGBoost;CSIC 2010
DO -
UR -
ER -
Eun ji Song, Seong-Cho Hong and Ah Reum Kang. (2026). An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System. Journal of The Korea Society of Computer and Information, 31(1), 109-120.
Eun ji Song, Seong-Cho Hong and Ah Reum Kang. 2026, "An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System", Journal of The Korea Society of Computer and Information, vol.31, no.1 pp.109-120.
Eun ji Song, Seong-Cho Hong, Ah Reum Kang "An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System" Journal of The Korea Society of Computer and Information 31.1 pp.109-120 (2026) : 109.
Eun ji Song, Seong-Cho Hong, Ah Reum Kang. An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System. 2026; 31(1), 109-120.
Eun ji Song, Seong-Cho Hong and Ah Reum Kang. "An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System" Journal of The Korea Society of Computer and Information 31, no.1 (2026) : 109-120.
Eun ji Song; Seong-Cho Hong; Ah Reum Kang. An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System. Journal of The Korea Society of Computer and Information, 31(1), 109-120.
Eun ji Song; Seong-Cho Hong; Ah Reum Kang. An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System. Journal of The Korea Society of Computer and Information. 2026; 31(1) 109-120.
Eun ji Song, Seong-Cho Hong, Ah Reum Kang. An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System. 2026; 31(1), 109-120.
Eun ji Song, Seong-Cho Hong and Ah Reum Kang. "An Implementation of TF-IDF Feature Extraction and Machine Learning Based Web Attack Detection System" Journal of The Korea Society of Computer and Information 31, no.1 (2026) : 109-120.