@article{ART003306002},
author={JongHwi Song},
title={A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2026},
volume={31},
number={2},
pages={231-239},
doi={10.9708/jksci.2026.31.02.231}
TY - JOUR
AU - JongHwi Song
TI - A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media
JO - Journal of The Korea Society of Computer and Information
PY - 2026
VL - 31
IS - 2
PB - The Korean Society Of Computer And Information
SP - 231
EP - 239
SN - 1598-849X
AB - This study compared the performance of BERTopic and Latent Dirichlet Allocation (LDA) for topic modeling of Korean sleep health-related social media text. A total of 8,002 blog posts were collected from Naver using nine sleep-related keywords between March and October 2025. Both methods were applied to the same dataset, and their performance was evaluated using metrics including the number of topics, noise ratio, distribution entropy, and topic coherence. The results indicated that BERTopic identified 9 topics with a noise ratio of 22.8%, whereas LDA yielded 6 effective topics with a significantly lower noise ratio of 0.9%. BERTopic demonstrated higher distribution uniformity (0.852) compared to LDA (0.804), indicating more balanced topic assignments. LDA achieved a coherence score (C_V) of 0.5287. The cross-tabulation analysis revealed that BERTopic's "Melatonin/Hormone" topic showed 84.1% concentration in LDA's "Insomnia General" topic, demonstrating high consistency for well-defined topics. This study provides methodological insights for researchers selecting topic modeling approaches for Korean health-related text analysis.
KW - Topic Modeling;BERTopic;LDA;Sleep Health;Social Media Analysis;Text Mining
DO - 10.9708/jksci.2026.31.02.231
ER -
JongHwi Song. (2026). A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media. Journal of The Korea Society of Computer and Information, 31(2), 231-239.
JongHwi Song. 2026, "A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media", Journal of The Korea Society of Computer and Information, vol.31, no.2 pp.231-239. Available from: doi:10.9708/jksci.2026.31.02.231
JongHwi Song "A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media" Journal of The Korea Society of Computer and Information 31.2 pp.231-239 (2026) : 231.
JongHwi Song. A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media. 2026; 31(2), 231-239. Available from: doi:10.9708/jksci.2026.31.02.231
JongHwi Song. "A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media" Journal of The Korea Society of Computer and Information 31, no.2 (2026) : 231-239.doi: 10.9708/jksci.2026.31.02.231
JongHwi Song. A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media. Journal of The Korea Society of Computer and Information, 31(2), 231-239. doi: 10.9708/jksci.2026.31.02.231
JongHwi Song. A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media. Journal of The Korea Society of Computer and Information. 2026; 31(2) 231-239. doi: 10.9708/jksci.2026.31.02.231
JongHwi Song. A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media. 2026; 31(2), 231-239. Available from: doi:10.9708/jksci.2026.31.02.231
JongHwi Song. "A Comparative Analysis of BERTopic and LDA for Topic Modeling of Korean Sleep Health Discourse on Social Media" Journal of The Korea Society of Computer and Information 31, no.2 (2026) : 231-239.doi: 10.9708/jksci.2026.31.02.231