@article{ART001103698},
author={Lee, Jae Yun},
title={Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods},
journal={Journal of the Korean Society for Library and Information Science},
issn={1225-598X},
year={2005},
volume={39},
number={2},
pages={123-146}
TY - JOUR
AU - Lee, Jae Yun
TI - Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods
JO - Journal of the Korean Society for Library and Information Science
PY - 2005
VL - 39
IS - 2
PB - 한국문헌정보학회
SP - 123
EP - 146
SN - 1225-598X
AB - This study aims to find consistent strategies for feature selection and feature weighting methods, which can improve the effectiveness and efficiency of kNN text classifier. Feature selection criteria and feature weighting methods are as important factor as classification algorithms to achieve good performance of text categorization systems. Most of the former studies chose conflicting strategies for feature selection criteria and weighting methods. In this study, the performance of several feature selection criteria are measured considering the storage space for inverted index records and the classification time. The classification experiments in this study are conducted to examine the performance of IDF as feature selection criteria and the performance of conventional feature selection criteria, e.g. mutual information, as feature weighting methods. The results of these experiments suggest that using those measures which prefer low-frequency features as feature selection criterion and also as feature weighting method, we can increase the classification speed up to three or five times without loosing classification accuracy.
KW - Text Categorization;Automatic Classification;Feature Selection;Feature Weighting Methods;kNN Classifier
DO -
UR -
ER -
Lee, Jae Yun. (2005). Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods. Journal of the Korean Society for Library and Information Science, 39(2), 123-146.
Lee, Jae Yun. 2005, "Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods", Journal of the Korean Society for Library and Information Science, vol.39, no.2 pp.123-146.
Lee, Jae Yun "Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods" Journal of the Korean Society for Library and Information Science 39.2 pp.123-146 (2005) : 123.
Lee, Jae Yun. Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods. 2005; 39(2), 123-146.
Lee, Jae Yun. "Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods" Journal of the Korean Society for Library and Information Science 39, no.2 (2005) : 123-146.
Lee, Jae Yun. Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods. Journal of the Korean Society for Library and Information Science, 39(2), 123-146.
Lee, Jae Yun. Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods. Journal of the Korean Society for Library and Information Science. 2005; 39(2) 123-146.
Lee, Jae Yun. Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods. 2005; 39(2), 123-146.
Lee, Jae Yun. "Empirical Study on Improving the Performance of Text Categorization Considering the Relationships between Feature Selection Criterea and Weighting Methods" Journal of the Korean Society for Library and Information Science 39, no.2 (2005) : 123-146.