@article{ART001677693},
author={KIM, YONG HWAN and Young-Mee Chung},
title={An Experimental Study on Feature Selection Using Wikipedia for Text Categorization},
journal={Journal of the Korean Society for Information Management},
issn={1013-0799},
year={2012},
volume={29},
number={2},
pages={155-171},
doi={10.3743/KOSIM.2012.29.2.155}
TY - JOUR
AU - KIM, YONG HWAN
AU - Young-Mee Chung
TI - An Experimental Study on Feature Selection Using Wikipedia for Text Categorization
JO - Journal of the Korean Society for Information Management
PY - 2012
VL - 29
IS - 2
PB - 한국정보관리학회
SP - 155
EP - 171
SN - 1013-0799
AB - In text categorization, core terms of an input document are hardly selected as classification features if they do not occur in a training document set. Besides, synonymous terms with the same concept are usually treated as different features. This study aims to improve text categorization performance by integrating synonyms into a single feature and by replacing input terms not in the training document set with the most similar term occurring in training documents using Wikipedia. For the selection of classification features, experiments were performed in various settings composed of three different conditions: the use of category information of non-training terms, the part of Wikipedia used for measuring term-term similarity, and the type of similarity measures. The categorization performance of a kNN classifier was improved by 0.35~1.85% in F1 value in all the experimental settings when non-learning terms were replaced by the learning term with the highest similarity above the threshold value. Although the improvement ratio is not as high as expected, several semantic as well as structural devices of Wikipedia could be used for selecting more effective classification features.
KW - text categorization;document classification;feature selection;Wikipedia;term similarity;non-learning term
DO - 10.3743/KOSIM.2012.29.2.155
ER -
KIM, YONG HWAN and Young-Mee Chung. (2012). An Experimental Study on Feature Selection Using Wikipedia for Text Categorization. Journal of the Korean Society for Information Management, 29(2), 155-171.
KIM, YONG HWAN and Young-Mee Chung. 2012, "An Experimental Study on Feature Selection Using Wikipedia for Text Categorization", Journal of the Korean Society for Information Management, vol.29, no.2 pp.155-171. Available from: doi:10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN, Young-Mee Chung "An Experimental Study on Feature Selection Using Wikipedia for Text Categorization" Journal of the Korean Society for Information Management 29.2 pp.155-171 (2012) : 155.
KIM, YONG HWAN, Young-Mee Chung. An Experimental Study on Feature Selection Using Wikipedia for Text Categorization. 2012; 29(2), 155-171. Available from: doi:10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN and Young-Mee Chung. "An Experimental Study on Feature Selection Using Wikipedia for Text Categorization" Journal of the Korean Society for Information Management 29, no.2 (2012) : 155-171.doi: 10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN; Young-Mee Chung. An Experimental Study on Feature Selection Using Wikipedia for Text Categorization. Journal of the Korean Society for Information Management, 29(2), 155-171. doi: 10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN; Young-Mee Chung. An Experimental Study on Feature Selection Using Wikipedia for Text Categorization. Journal of the Korean Society for Information Management. 2012; 29(2) 155-171. doi: 10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN, Young-Mee Chung. An Experimental Study on Feature Selection Using Wikipedia for Text Categorization. 2012; 29(2), 155-171. Available from: doi:10.3743/KOSIM.2012.29.2.155
KIM, YONG HWAN and Young-Mee Chung. "An Experimental Study on Feature Selection Using Wikipedia for Text Categorization" Journal of the Korean Society for Information Management 29, no.2 (2012) : 155-171.doi: 10.3743/KOSIM.2012.29.2.155