@article{ART002813390},
author={Ji-Mo Jang and Jae-Ok Min and Han-Sung Noh},
title={KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2022},
volume={27},
number={2},
pages={15-23},
doi={10.9708/jksci.2022.27.02.015}
TY - JOUR
AU - Ji-Mo Jang
AU - Jae-Ok Min
AU - Han-Sung Noh
TI - KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)
JO - Journal of The Korea Society of Computer and Information
PY - 2022
VL - 27
IS - 2
PB - The Korean Society Of Computer And Information
SP - 15
EP - 23
SN - 1598-849X
AB - In the field of patents, as NLP(Natural Language Processing) is a challenging task due to the linguistic specificity of patent literature, there is an urgent need to research a language model optimized for Korean patent literature. Recently, in the field of NLP, there have been continuous attempts to establish a pre-trained language model for specific domains to improve performance in various tasks of related fields.
Among them, ELECTRA is a pre-trained language model by Google using a new method called RTD(Replaced Token Detection), after BERT, for increasing training efficiency. The purpose of this paper is to propose KorPatELECTRA pre-trained on a large amount of Korean patent literature data. In addition, optimal pre-training was conducted by preprocessing the training corpus according to the characteristics of the patent literature and applying patent vocabulary and tokenizer. In order to confirm the performance, KorPatELECTRA was tested for NER(Named Entity Recognition), MRC(Machine Reading Comprehension), and patent classification tasks using actual patent data, and the most excellent performance was verified in all the three tasks compared to comparative general-purpose language models.
KW - Patent;ELECTRA;pre-training;NLP;tokenizer;Language model ∙
DO - 10.9708/jksci.2022.27.02.015
ER -
Ji-Mo Jang, Jae-Ok Min and Han-Sung Noh. (2022). KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA). Journal of The Korea Society of Computer and Information, 27(2), 15-23.
Ji-Mo Jang, Jae-Ok Min and Han-Sung Noh. 2022, "KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)", Journal of The Korea Society of Computer and Information, vol.27, no.2 pp.15-23. Available from: doi:10.9708/jksci.2022.27.02.015
Ji-Mo Jang, Jae-Ok Min, Han-Sung Noh "KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)" Journal of The Korea Society of Computer and Information 27.2 pp.15-23 (2022) : 15.
Ji-Mo Jang, Jae-Ok Min, Han-Sung Noh. KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA). 2022; 27(2), 15-23. Available from: doi:10.9708/jksci.2022.27.02.015
Ji-Mo Jang, Jae-Ok Min and Han-Sung Noh. "KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)" Journal of The Korea Society of Computer and Information 27, no.2 (2022) : 15-23.doi: 10.9708/jksci.2022.27.02.015
Ji-Mo Jang; Jae-Ok Min; Han-Sung Noh. KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA). Journal of The Korea Society of Computer and Information, 27(2), 15-23. doi: 10.9708/jksci.2022.27.02.015
Ji-Mo Jang; Jae-Ok Min; Han-Sung Noh. KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA). Journal of The Korea Society of Computer and Information. 2022; 27(2) 15-23. doi: 10.9708/jksci.2022.27.02.015
Ji-Mo Jang, Jae-Ok Min, Han-Sung Noh. KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA). 2022; 27(2), 15-23. Available from: doi:10.9708/jksci.2022.27.02.015
Ji-Mo Jang, Jae-Ok Min and Han-Sung Noh. "KorPatELECTRA : A Pre-trained Language Model for Korean Patent Literature to improve performance in the field of natural language processing(Korean Patent ELECTRA)" Journal of The Korea Society of Computer and Information 27, no.2 (2022) : 15-23.doi: 10.9708/jksci.2022.27.02.015