@article{ART002588860},
author={Yun Yeo Il and Namgyu Kim},
title={Self-Supervised Document Representation Method},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2020},
volume={25},
number={5},
pages={187-197},
doi={10.9708/jksci.2020.25.05.187}
TY - JOUR
AU - Yun Yeo Il
AU - Namgyu Kim
TI - Self-Supervised Document Representation Method
JO - Journal of The Korea Society of Computer and Information
PY - 2020
VL - 25
IS - 5
PB - The Korean Society Of Computer And Information
SP - 187
EP - 197
SN - 1598-849X
AB - Recently, various methods of text embedding using deep learning algorithms have been proposed.
Especially, the way of using pre-trained language model which uses tremendous amount of text data in training is mainly applied for embedding new text data. However, traditional pre-trained language model has some limitations that it is hard to understand unique context of new text data when the text has too many tokens. In this paper, we propose self-supervised learning-based fine tuning method for pre-trained language model to infer vectors of long-text. Also, we applied our method to news articles and classified them into categories and compared classification accuracy with traditional models. As a result, it was confirmed that the vector generated by the proposed model more accurately expresses the inherent characteristics of the document than the vectors generated by the traditional models.
KW - Deep Learning;Document Embedding;Pre-Trained Language Model;Self-Supervised Learning;Text Mining
DO - 10.9708/jksci.2020.25.05.187
ER -
Yun Yeo Il and Namgyu Kim. (2020). Self-Supervised Document Representation Method. Journal of The Korea Society of Computer and Information, 25(5), 187-197.
Yun Yeo Il and Namgyu Kim. 2020, "Self-Supervised Document Representation Method", Journal of The Korea Society of Computer and Information, vol.25, no.5 pp.187-197. Available from: doi:10.9708/jksci.2020.25.05.187
Yun Yeo Il, Namgyu Kim "Self-Supervised Document Representation Method" Journal of The Korea Society of Computer and Information 25.5 pp.187-197 (2020) : 187.
Yun Yeo Il, Namgyu Kim. Self-Supervised Document Representation Method. 2020; 25(5), 187-197. Available from: doi:10.9708/jksci.2020.25.05.187
Yun Yeo Il and Namgyu Kim. "Self-Supervised Document Representation Method" Journal of The Korea Society of Computer and Information 25, no.5 (2020) : 187-197.doi: 10.9708/jksci.2020.25.05.187
Yun Yeo Il; Namgyu Kim. Self-Supervised Document Representation Method. Journal of The Korea Society of Computer and Information, 25(5), 187-197. doi: 10.9708/jksci.2020.25.05.187
Yun Yeo Il; Namgyu Kim. Self-Supervised Document Representation Method. Journal of The Korea Society of Computer and Information. 2020; 25(5) 187-197. doi: 10.9708/jksci.2020.25.05.187
Yun Yeo Il, Namgyu Kim. Self-Supervised Document Representation Method. 2020; 25(5), 187-197. Available from: doi:10.9708/jksci.2020.25.05.187
Yun Yeo Il and Namgyu Kim. "Self-Supervised Document Representation Method" Journal of The Korea Society of Computer and Information 25, no.5 (2020) : 187-197.doi: 10.9708/jksci.2020.25.05.187