@article{ART003084945},
author={Chiho Song},
title={A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing},
journal={Journal of Korean Society of Archives and Records Management},
issn={1598-1487},
year={2024},
volume={24},
number={2},
pages={65-88},
doi={10.14404/JKSARM.2024.24.2.065}
TY - JOUR
AU - Chiho Song
TI - A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing
JO - Journal of Korean Society of Archives and Records Management
PY - 2024
VL - 24
IS - 2
PB - Korean Society of Archives and Records Management
SP - 65
EP - 88
SN - 1598-1487
AB - This pilot study explores a method of extracting metadata values and descriptions from records using named entity recognition (NER), a technique in natural language processing (NLP), a subfield of artificial intelligence. The study focuses on handwritten records from the Guro Industrial Complex, produced during the 1960s and 1970s, comprising approximately 1,200 pages and 80,000 words.
After the preprocessing process of the records, which included digitization, the study employed a publicly available language API based on Google’s Bidirectional Encoder Representations from Transformers (BERT) language model to recognize entity names within the text. As a result, 173 names of people and 314 of organizations and institutions were extracted from the Guro Industrial Complex’s past records. These extracted entities are expected to serve as direct search terms for accessing the contents of the records.
Furthermore, the study identified challenges that arose when applying the theoretical methodology of NLP to real-world records consisting of semistructured text. It also presents potential solutions and implications to consider when addressing these issues.
KW - AI;NLP;Metadata;LLM;NER
DO - 10.14404/JKSARM.2024.24.2.065
ER -
Chiho Song. (2024). A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing. Journal of Korean Society of Archives and Records Management, 24(2), 65-88.
Chiho Song. 2024, "A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing", Journal of Korean Society of Archives and Records Management, vol.24, no.2 pp.65-88. Available from: doi:10.14404/JKSARM.2024.24.2.065
Chiho Song "A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing" Journal of Korean Society of Archives and Records Management 24.2 pp.65-88 (2024) : 65.
Chiho Song. A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing. 2024; 24(2), 65-88. Available from: doi:10.14404/JKSARM.2024.24.2.065
Chiho Song. "A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing" Journal of Korean Society of Archives and Records Management 24, no.2 (2024) : 65-88.doi: 10.14404/JKSARM.2024.24.2.065
Chiho Song. A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing. Journal of Korean Society of Archives and Records Management, 24(2), 65-88. doi: 10.14404/JKSARM.2024.24.2.065
Chiho Song. A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing. Journal of Korean Society of Archives and Records Management. 2024; 24(2) 65-88. doi: 10.14404/JKSARM.2024.24.2.065
Chiho Song. A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing. 2024; 24(2), 65-88. Available from: doi:10.14404/JKSARM.2024.24.2.065
Chiho Song. "A method for metadata extraction from a collection of records using Named Entity Recognition in Natural Language Processing" Journal of Korean Society of Archives and Records Management 24, no.2 (2024) : 65-88.doi: 10.14404/JKSARM.2024.24.2.065