@article{ART002587867},
author={Oh Junho},
title={A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words},
journal={The Journal Of Korean Medical Classics},
issn={1229-8328},
year={2020},
volume={33},
number={2},
pages={51-59},
doi={10.14369/jkmc.2020.33.2.051}
TY - JOUR
AU - Oh Junho
TI - A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words
JO - The Journal Of Korean Medical Classics
PY - 2020
VL - 33
IS - 2
PB - 대한한의학원전학회
SP - 51
EP - 59
SN - 1229-8328
AB - Objectives : We would like to study what is the most appropriate "feature" to effectively perform authorship attribution of the text of Traditional East Asian MedicineMethods : The authorship attribution performance of the Support Vector Machine (SVM) was compared by cross validation, depending on whether the function words or content words, single word or collocations, and IDF weights were applied or not, using ‘Variorum of the Nanjing’ as an experimental Corpus.
Results : When using the combination of 'function words/uni-bigram/TF', the performance was best with accuracy of 0.732, and the combination of 'content words/unigram/TFIDF' showed the lowest accuracy of 0.351.
Conclusions : This shows the following facts from the authorship attribution of the text of East Asian traditional medicine. First, function words play an important role in comparison to content words. Second, collocations was relatively important in content words, but single words have more important meanings in function words. Third, unlike general text analysis, IDF weighting resulted in worse performance.
KW - authorship attribution;Function words;Korean Medical Classics;East Asian traditional medicine. Variorum of the Nanjing.
DO - 10.14369/jkmc.2020.33.2.051
ER -
Oh Junho. (2020). A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words. The Journal Of Korean Medical Classics, 33(2), 51-59.
Oh Junho. 2020, "A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words", The Journal Of Korean Medical Classics, vol.33, no.2 pp.51-59. Available from: doi:10.14369/jkmc.2020.33.2.051
Oh Junho "A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words" The Journal Of Korean Medical Classics 33.2 pp.51-59 (2020) : 51.
Oh Junho. A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words. 2020; 33(2), 51-59. Available from: doi:10.14369/jkmc.2020.33.2.051
Oh Junho. "A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words" The Journal Of Korean Medical Classics 33, no.2 (2020) : 51-59.doi: 10.14369/jkmc.2020.33.2.051
Oh Junho. A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words. The Journal Of Korean Medical Classics, 33(2), 51-59. doi: 10.14369/jkmc.2020.33.2.051
Oh Junho. A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words. The Journal Of Korean Medical Classics. 2020; 33(2) 51-59. doi: 10.14369/jkmc.2020.33.2.051
Oh Junho. A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words. 2020; 33(2), 51-59. Available from: doi:10.14369/jkmc.2020.33.2.051
Oh Junho. "A Comparative Study of Feature Extraction Methods for Authorship Attribution in the Text of Traditional East Asian Medicine with a Focus on Function Words" The Journal Of Korean Medical Classics 33, no.2 (2020) : 51-59.doi: 10.14369/jkmc.2020.33.2.051