@article{ART002998485},
author={Seobin Yoon and Namgyu Kim},
title={Document Classification Methodology Using Autoencoder-based Keywords Embedding},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2023},
volume={28},
number={9},
pages={35-46},
doi={10.9708/jksci.2023.28.09.035}
TY - JOUR
AU - Seobin Yoon
AU - Namgyu Kim
TI - Document Classification Methodology Using Autoencoder-based Keywords Embedding
JO - Journal of The Korea Society of Computer and Information
PY - 2023
VL - 28
IS - 9
PB - The Korean Society Of Computer And Information
SP - 35
EP - 46
SN - 1598-849X
AB - In this study, we propose a Dual Approach methodology to enhance the accuracy of document classifiers by utilizing both contextual and keyword information. Firstly, contextual information is extracted using Google's BERT, a pre-trained language model known for its outstanding performance in various natural language understanding tasks. Specifically, we employ KoBERT, a pre-trained model on the Korean corpus, to extract contextual information in the form of the CLS token. Secondly, keyword information is generated for each document by encoding the set of keywords into a single vector using an Autoencoder. We applied the proposed approach to 40,130 documents related to healthcare and medicine from the National R&D Projects database of the National Science and Technology Information Service (NTIS). The experimental results demonstrate that the proposed methodology outperforms existing methods that rely solely on document or word information in terms of accuracy for document classification.
KW - Deep Learning;Document Classification;Keyword Embedding;Document Embedding;Pre-Trained Language Model
DO - 10.9708/jksci.2023.28.09.035
ER -
Seobin Yoon and Namgyu Kim. (2023). Document Classification Methodology Using Autoencoder-based Keywords Embedding. Journal of The Korea Society of Computer and Information, 28(9), 35-46.
Seobin Yoon and Namgyu Kim. 2023, "Document Classification Methodology Using Autoencoder-based Keywords Embedding", Journal of The Korea Society of Computer and Information, vol.28, no.9 pp.35-46. Available from: doi:10.9708/jksci.2023.28.09.035
Seobin Yoon, Namgyu Kim "Document Classification Methodology Using Autoencoder-based Keywords Embedding" Journal of The Korea Society of Computer and Information 28.9 pp.35-46 (2023) : 35.
Seobin Yoon, Namgyu Kim. Document Classification Methodology Using Autoencoder-based Keywords Embedding. 2023; 28(9), 35-46. Available from: doi:10.9708/jksci.2023.28.09.035
Seobin Yoon and Namgyu Kim. "Document Classification Methodology Using Autoencoder-based Keywords Embedding" Journal of The Korea Society of Computer and Information 28, no.9 (2023) : 35-46.doi: 10.9708/jksci.2023.28.09.035
Seobin Yoon; Namgyu Kim. Document Classification Methodology Using Autoencoder-based Keywords Embedding. Journal of The Korea Society of Computer and Information, 28(9), 35-46. doi: 10.9708/jksci.2023.28.09.035
Seobin Yoon; Namgyu Kim. Document Classification Methodology Using Autoencoder-based Keywords Embedding. Journal of The Korea Society of Computer and Information. 2023; 28(9) 35-46. doi: 10.9708/jksci.2023.28.09.035
Seobin Yoon, Namgyu Kim. Document Classification Methodology Using Autoencoder-based Keywords Embedding. 2023; 28(9), 35-46. Available from: doi:10.9708/jksci.2023.28.09.035
Seobin Yoon and Namgyu Kim. "Document Classification Methodology Using Autoencoder-based Keywords Embedding" Journal of The Korea Society of Computer and Information 28, no.9 (2023) : 35-46.doi: 10.9708/jksci.2023.28.09.035