@article{ART003305929},
author={Misun Lee},
title={A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2026},
volume={31},
number={2},
pages={43-50},
doi={10.9708/jksci.2026.31.02.043}
TY - JOUR
AU - Misun Lee
TI - A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework
JO - Journal of The Korea Society of Computer and Information
PY - 2026
VL - 31
IS - 2
PB - The Korean Society Of Computer And Information
SP - 43
EP - 50
SN - 1598-849X
AB - This study presents a comparative analysis of three large language models (LLMs)—GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash—using a novel evaluation framework called Prompt Runner. The framework systematically measures the models’ performance across nine linguistic and reasoning prompt types, totaling 90 items. Evaluation criteria include Accuracy, Consistency, Logic, Creativity, and Response Time. Accuracy was computed through Sentence-BERT-based cosine similarity, with Consistency and Logic derived by applying weight factors (0.95, 0.9 respectively). Creativity was assessed based on a weighted sum of Novelty, Diversity, and Fluency (0.5N + 0.3D + 0.2F). The analysis revealed that Claude 4 Sonnet demonstrated superior performance in logical reasoning (0.58) and creativity (0.44), while GPT-4o-mini exhibited faster response times. Gemini 2.5 Flash showed higher performance in accuracy (0.66) and consistency (0.62). Notably, Claude 4 Sonnet achieved the most stable and consistent performance in balancing overall capability and response time, thereby being evaluated as a model that effectively ensures both efficiency and quality. This study systematically identified the characteristics and performance differences of large language models (LLMs) across various prompt types by conducting a comparative analysis of quantitative performance indicators based on each model’s API.
KW - Prompt Runner;LLM Evaluation;GPT-4o-mini;Claude 4 Sonnet;Gemini 2.5 Flash
DO - 10.9708/jksci.2026.31.02.043
ER -
Misun Lee. (2026). A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework. Journal of The Korea Society of Computer and Information, 31(2), 43-50.
Misun Lee. 2026, "A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework", Journal of The Korea Society of Computer and Information, vol.31, no.2 pp.43-50. Available from: doi:10.9708/jksci.2026.31.02.043
Misun Lee "A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework" Journal of The Korea Society of Computer and Information 31.2 pp.43-50 (2026) : 43.
Misun Lee. A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework. 2026; 31(2), 43-50. Available from: doi:10.9708/jksci.2026.31.02.043
Misun Lee. "A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework" Journal of The Korea Society of Computer and Information 31, no.2 (2026) : 43-50.doi: 10.9708/jksci.2026.31.02.043
Misun Lee. A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework. Journal of The Korea Society of Computer and Information, 31(2), 43-50. doi: 10.9708/jksci.2026.31.02.043
Misun Lee. A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework. Journal of The Korea Society of Computer and Information. 2026; 31(2) 43-50. doi: 10.9708/jksci.2026.31.02.043
Misun Lee. A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework. 2026; 31(2), 43-50. Available from: doi:10.9708/jksci.2026.31.02.043
Misun Lee. "A Comparative Study on the Performance of GPT-4o-mini, Claude 4 Sonnet, and Gemini 2.5 Flash Models Using the Prompt Runner Framework" Journal of The Korea Society of Computer and Information 31, no.2 (2026) : 43-50.doi: 10.9708/jksci.2026.31.02.043