@article{ART003348242},
author={Ahn cheolbum and KIM, JIN HONG},
title={A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)},
journal={ Journal of Software Forensics},
issn={3092-541X},
year={2026},
volume={22},
number={2},
pages={179-191},
doi={10.29056/jsf.2026.06.16}
TY - JOUR
AU - Ahn cheolbum
AU - KIM, JIN HONG
TI - A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)
JO - Journal of Software Forensics
PY - 2026
VL - 22
IS - 2
PB - Korea Software Assessment and Valuation Society
SP - 179
EP - 191
SN - 3092-541X
AB - Recent multimodal large language models (MLLMs) have achieved remarkable advances in visual information processing; however, they continue to face two persistent limitations. First, these models are unable to access up-to-date information or domain-specific private data that falls outside their training datasets. Second, the phenomenon known as visual hallucination—whereby models misinterpret factual relationships and generate plausible yet erroneous outputs—occurs with considerable frequency. These limitations serve as critical obstacles to the practical adoption of MLLMs in fields that demand high levels of reliability, such as medical diagnosis, legal analysis, and precision manufacturing inspection. In response, this paper proposes a novel framework that integrates Retrieval-Augmented Generation (RAG) technology into image analysis as a means of overcoming these challenges, and systematically examines the key implementation hurdles alongside the prospects for future advancement.
KW - MLLMs;hallucination;RAG;domain-specific private data;framework
DO - 10.29056/jsf.2026.06.16
ER -
Ahn cheolbum and KIM, JIN HONG. (2026). A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs). Journal of Software Forensics, 22(2), 179-191.
Ahn cheolbum and KIM, JIN HONG. 2026, "A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)", Journal of Software Forensics, vol.22, no.2 pp.179-191. Available from: doi:10.29056/jsf.2026.06.16
Ahn cheolbum, KIM, JIN HONG "A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)" Journal of Software Forensics 22.2 pp.179-191 (2026) : 179.
Ahn cheolbum, KIM, JIN HONG. A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs). 2026; 22(2), 179-191. Available from: doi:10.29056/jsf.2026.06.16
Ahn cheolbum and KIM, JIN HONG. "A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)" Journal of Software Forensics 22, no.2 (2026) : 179-191.doi: 10.29056/jsf.2026.06.16
Ahn cheolbum; KIM, JIN HONG. A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs). Journal of Software Forensics, 22(2), 179-191. doi: 10.29056/jsf.2026.06.16
Ahn cheolbum; KIM, JIN HONG. A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs). Journal of Software Forensics. 2026; 22(2) 179-191. doi: 10.29056/jsf.2026.06.16
Ahn cheolbum, KIM, JIN HONG. A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs). 2026; 22(2), 179-191. Available from: doi:10.29056/jsf.2026.06.16
Ahn cheolbum and KIM, JIN HONG. "A Study on a Multimodal RAG Framework for Overcoming the Limitations of Multimodal Large Language Models (MLLMs)" Journal of Software Forensics 22, no.2 (2026) : 179-191.doi: 10.29056/jsf.2026.06.16