@article{ART002663790},
author={NamOh Kang and Jae Ho Kim},
title={A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link},
journal={Journal of Knowledge Information Technology and Systems},
issn={1975-7700},
year={2020},
volume={15},
number={6},
pages={1075-1083},
doi={10.34163/jkits.2020.15.6.015}
TY - JOUR
AU - NamOh Kang
AU - Jae Ho Kim
TI - A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link
JO - Journal of Knowledge Information Technology and Systems
PY - 2020
VL - 15
IS - 6
PB - Korea Knowledge Information Technology Society
SP - 1075
EP - 1083
SN - 1975-7700
AB - A search engine maintaining whole Web consistency is very important to retrieve information correctly and efficiently. However, as the size of Web is rapidly growing and content is also dynamically changing, it is impossible for the search engine to achieve the goal by using limited resources such as hardware, network and computing time. In order to solve this problem, a focused web crawler has been introduced which can identify and visit the most promising links related to a specific topic and avoid downloading off-topic documents efficiently under limited resources. In this research, we propose a paragraph-based focused web crawler using semantic priority of link. The proposed system selects promising links from a downloaded web page by measuring similarity between a topic and link's data such as anchor text and a paragraph containing the link. In this paper, different from existing methods, we proposed a novel similarity function for calculating a link priority by using WordNet. And we introduced a method to visit high-priority link first. We conducted experiments to prove the performance of the proposed paragraph-based web focused crawler by using some topics. The experimental result showed the paragraph-based web focused crawler using semantic priority of link improves the term frequency of document retrieval.
KW - Web search engines;Focused web crawlers;Link priorities;Semantic webs;Information retrievals;WordNet
DO - 10.34163/jkits.2020.15.6.015
ER -
NamOh Kang and Jae Ho Kim. (2020). A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link. Journal of Knowledge Information Technology and Systems, 15(6), 1075-1083.
NamOh Kang and Jae Ho Kim. 2020, "A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link", Journal of Knowledge Information Technology and Systems, vol.15, no.6 pp.1075-1083. Available from: doi:10.34163/jkits.2020.15.6.015
NamOh Kang, Jae Ho Kim "A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link" Journal of Knowledge Information Technology and Systems 15.6 pp.1075-1083 (2020) : 1075.
NamOh Kang, Jae Ho Kim. A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link. 2020; 15(6), 1075-1083. Available from: doi:10.34163/jkits.2020.15.6.015
NamOh Kang and Jae Ho Kim. "A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link" Journal of Knowledge Information Technology and Systems 15, no.6 (2020) : 1075-1083.doi: 10.34163/jkits.2020.15.6.015
NamOh Kang; Jae Ho Kim. A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link. Journal of Knowledge Information Technology and Systems, 15(6), 1075-1083. doi: 10.34163/jkits.2020.15.6.015
NamOh Kang; Jae Ho Kim. A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link. Journal of Knowledge Information Technology and Systems. 2020; 15(6) 1075-1083. doi: 10.34163/jkits.2020.15.6.015
NamOh Kang, Jae Ho Kim. A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link. 2020; 15(6), 1075-1083. Available from: doi:10.34163/jkits.2020.15.6.015
NamOh Kang and Jae Ho Kim. "A Design and Implementation of Paragraph-based Focused Web Crawler Using Semantic Priority of Link" Journal of Knowledge Information Technology and Systems 15, no.6 (2020) : 1075-1083.doi: 10.34163/jkits.2020.15.6.015