@article{ART001295258},
author={곽준영 and BAEJONGMIN},
title={An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2008},
volume={13},
number={6},
pages={77-86}
TY - JOUR
AU - 곽준영
AU - BAEJONGMIN
TI - An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources
JO - Journal of The Korea Society of Computer and Information
PY - 2008
VL - 13
IS - 6
PB - The Korean Society Of Computer And Information
SP - 77
EP - 86
SN - 1598-849X
AB - The Web information sources can be regarded as the largest distributed database to the users. By virtually integrating the distributed information sources and regarding them as a single huge database, we can query the database to extract information. This capability is important to develop Web application programs. We have to infer a database schema from browsing-oriented Web documents in order to integrate databases.
This paper presents a heuristic algorithm to infer the XML Schema fully automatically from semi-structured Web documents. The algorithm first extracts candidate pattern regions based on predefined structure-making tags, and determines a target pattern region using a few heuristic factors, and then derives XML Schema extraction rules from the target pattern region. The schema extraction rule is represented in XQuery, which makes development of various application systems possible using open standard XML tools. We also present the experimental results for several public web sources to show the effectiveness of the algorithm.
KW - 정보추출(Information Extraction);XML스키마(XML Schema);XML;반복패턴(Repeated Pattern);정보통합(Information Integration)
DO -
UR -
ER -
곽준영 and BAEJONGMIN. (2008). An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources. Journal of The Korea Society of Computer and Information, 13(6), 77-86.
곽준영 and BAEJONGMIN. 2008, "An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources", Journal of The Korea Society of Computer and Information, vol.13, no.6 pp.77-86.
곽준영, BAEJONGMIN "An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources" Journal of The Korea Society of Computer and Information 13.6 pp.77-86 (2008) : 77.
곽준영, BAEJONGMIN. An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources. 2008; 13(6), 77-86.
곽준영 and BAEJONGMIN. "An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources" Journal of The Korea Society of Computer and Information 13, no.6 (2008) : 77-86.
곽준영; BAEJONGMIN. An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources. Journal of The Korea Society of Computer and Information, 13(6), 77-86.
곽준영; BAEJONGMIN. An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources. Journal of The Korea Society of Computer and Information. 2008; 13(6) 77-86.
곽준영, BAEJONGMIN. An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources. 2008; 13(6), 77-86.
곽준영 and BAEJONGMIN. "An Automatic Schema Generation System based on the Contents for Integrating Web Information Sources" Journal of The Korea Society of Computer and Information 13, no.6 (2008) : 77-86.