@article{ART003212671},
author={Minchae Song and Jaeyoung Park},
title={Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2025},
volume={30},
number={6},
pages={65-76}
TY - JOUR
AU - Minchae Song
AU - Jaeyoung Park
TI - Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance
JO - Journal of The Korea Society of Computer and Information
PY - 2025
VL - 30
IS - 6
PB - The Korean Society Of Computer And Information
SP - 65
EP - 76
SN - 1598-849X
AB - Retrieval-Augmented Generation (RAG) enhances the accuracy of generative AI services by allowing Large Language Models (LLMs) to reference external knowledge bases rather than relying solely on pre-trained knowledge. This study analyzes various types of financial document images to examine the impact of document image structure on RAG effectiveness. The results reveal that, although OCR achieves high recognition accuracy even with handwritten text, the overall performance of RAG remains suboptimal. This suggests that increased structural complexity in original document images hinders contextual understanding, which in turn degrades performance across the retrieval, chunking, and generation stages of the RAG pipeline. Therefore, assuming OCR text quality exceeds a certain threshold, structuring input data into a format that is more readily interpretable by machines through post-processing plays a more critical role in enhancing RAG performance.
KW - Generative Artificial Intelligence;Finance;OCR;RAG;Word Error Rate
DO -
UR -
ER -
Minchae Song and Jaeyoung Park. (2025). Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance. Journal of The Korea Society of Computer and Information, 30(6), 65-76.
Minchae Song and Jaeyoung Park. 2025, "Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance", Journal of The Korea Society of Computer and Information, vol.30, no.6 pp.65-76.
Minchae Song, Jaeyoung Park "Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance" Journal of The Korea Society of Computer and Information 30.6 pp.65-76 (2025) : 65.
Minchae Song, Jaeyoung Park. Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance. 2025; 30(6), 65-76.
Minchae Song and Jaeyoung Park. "Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance" Journal of The Korea Society of Computer and Information 30, no.6 (2025) : 65-76.
Minchae Song; Jaeyoung Park. Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance. Journal of The Korea Society of Computer and Information, 30(6), 65-76.
Minchae Song; Jaeyoung Park. Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance. Journal of The Korea Society of Computer and Information. 2025; 30(6) 65-76.
Minchae Song, Jaeyoung Park. Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance. 2025; 30(6), 65-76.
Minchae Song and Jaeyoung Park. "Quantitative Assessment of OCR for Complex Documents on Retrieval-Augmented Generation Performance" Journal of The Korea Society of Computer and Information 30, no.6 (2025) : 65-76.