@article{ART003277346},
author={Hwang Sung Hun and Milandu Keith Moussavou Boussougou and Dong Joo Park},
title={Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works},
journal={Journal of Software Assessment and Valuation},
issn={2092-8114},
year={2025},
volume={21},
number={4},
pages={53-61}
TY - JOUR
AU - Hwang Sung Hun
AU - Milandu Keith Moussavou Boussougou
AU - Dong Joo Park
TI - Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works
JO - Journal of Software Assessment and Valuation
PY - 2025
VL - 21
IS - 4
PB - Korea Software Assessment and Valuation Society
SP - 53
EP - 61
SN - 2092-8114
AB - Korean legal documents pose challenges for information extraction due to complex layouts, Optical Character Recognition (OCR) noise, and agglutinative morphology. This paper proposes an automated Named-Entity Recognition(NER) pipeline that integrates Qwen-VL-based OCR, a Begin-Inside-Outside (B-I-O)-tagged training dataset, and fine-tuned BERT-family encoders with a BiLSTM-Conditional Random Field (CRF) decoder. We fine-tune mBERT, KLUE-RoBERTa-Large, and XLM-RoBERTa-Large under both Pure and BiLSTM-CRF settings, incorporating 30% OCR-style noise. A 5-폴드cross-validation demonstrates that CRF-enhanced models achieve more stable and structurally consistent predictions, with XLM-RoBERTa-Large-CRF reaching an average F1-score of 0.998. The results highlight a practical design for robust NER in noisy OCR environments.
KW - Named-Entity Recognition (NER);BERT;Conditional Random Field (CRF);Public Domain
DO -
UR -
ER -
Hwang Sung Hun, Milandu Keith Moussavou Boussougou and Dong Joo Park. (2025). Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works. Journal of Software Assessment and Valuation, 21(4), 53-61.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou and Dong Joo Park. 2025, "Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works", Journal of Software Assessment and Valuation, vol.21, no.4 pp.53-61.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou, Dong Joo Park "Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works" Journal of Software Assessment and Valuation 21.4 pp.53-61 (2025) : 53.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou, Dong Joo Park. Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works. 2025; 21(4), 53-61.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou and Dong Joo Park. "Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works" Journal of Software Assessment and Valuation 21, no.4 (2025) : 53-61.
Hwang Sung Hun; Milandu Keith Moussavou Boussougou; Dong Joo Park. Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works. Journal of Software Assessment and Valuation, 21(4), 53-61.
Hwang Sung Hun; Milandu Keith Moussavou Boussougou; Dong Joo Park. Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works. Journal of Software Assessment and Valuation. 2025; 21(4) 53-61.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou, Dong Joo Park. Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works. 2025; 21(4), 53-61.
Hwang Sung Hun, Milandu Keith Moussavou Boussougou and Dong Joo Park. "Performance and Stability Analysis of a Fine-Tuned BERT-BiLSTM-CRF NER Model for Automated Information Extraction in Openly Licensed Works" Journal of Software Assessment and Valuation 21, no.4 (2025) : 53-61.