@article{ART002437840},
author={Oh Junho},
title={Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method},
journal={The Journal Of Korean Medical Classics},
issn={1229-8328},
year={2019},
volume={32},
number={1},
pages={61-74},
doi={10.14369/jkmc.2019.32.1.061}
TY - JOUR
AU - Oh Junho
TI - Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method
JO - The Journal Of Korean Medical Classics
PY - 2019
VL - 32
IS - 1
PB - 대한한의학원전학회
SP - 61
EP - 74
SN - 1229-8328
AB - Objectives : The purpose of this study is to help select an appropriate word embedding method when analyzing East Asian traditional medicine texts as data.
Methods : Based on prescription data that imply traditional methods in traditional East Asian medicine, we have examined 4 count-based word embedding and 2 prediction-based word embedding methods. In order to intuitively compare these word embedding methods, we proposed a "prescription generating game" and compared its results with those from the application of the 6 methods.
Results : When the adjacent vectors are extracted, the count-based word embedding method derives the main herbs that are frequently used in conjunction with each other. On the other hand, in the prediction-based word embedding method, the synonyms of the herbs were derived.
Conclusions : Counting based word embedding methods seems to be more effective than prediction-based word embedding methods in analyzing the use of domesticated herbs. Among count-based word embedding methods, the TF-vector method tends to exaggerate the frequency effect, and hence the TF-IDF vector or co-word vector may be a more reasonable choice. Also, the t-score vector may be recommended in search for unusual information that could not be found in frequency. On the other hand, prediction-based embedding seems to be effective when deriving the bases of similar meanings in context.
KW - Word Embedding;East Asian Traditional Medicine;Korean Medicine;Data Analysis;Natural Language Processing
DO - 10.14369/jkmc.2019.32.1.061
ER -
Oh Junho. (2019). Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method. The Journal Of Korean Medical Classics, 32(1), 61-74.
Oh Junho. 2019, "Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method", The Journal Of Korean Medical Classics, vol.32, no.1 pp.61-74. Available from: doi:10.14369/jkmc.2019.32.1.061
Oh Junho "Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method" The Journal Of Korean Medical Classics 32.1 pp.61-74 (2019) : 61.
Oh Junho. Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method. 2019; 32(1), 61-74. Available from: doi:10.14369/jkmc.2019.32.1.061
Oh Junho. "Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method" The Journal Of Korean Medical Classics 32, no.1 (2019) : 61-74.doi: 10.14369/jkmc.2019.32.1.061
Oh Junho. Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method. The Journal Of Korean Medical Classics, 32(1), 61-74. doi: 10.14369/jkmc.2019.32.1.061
Oh Junho. Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method. The Journal Of Korean Medical Classics. 2019; 32(1) 61-74. doi: 10.14369/jkmc.2019.32.1.061
Oh Junho. Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method. 2019; 32(1), 61-74. Available from: doi:10.14369/jkmc.2019.32.1.061
Oh Junho. "Comparison between Word Embedding Techniques in Traditional Korean Medicine for Data Analysis: Implementation of a Natural Language Processing Method" The Journal Of Korean Medical Classics 32, no.1 (2019) : 61-74.doi: 10.14369/jkmc.2019.32.1.061