@article{ART003212655},
author={Hansle Lee and Dong-Hyun Kim and Hyeong-Seok Kim and Jaesung Yoo},
title={Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2025},
volume={30},
number={6},
pages={31-46}
TY - JOUR
AU - Hansle Lee
AU - Dong-Hyun Kim
AU - Hyeong-Seok Kim
AU - Jaesung Yoo
TI - Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System
JO - Journal of The Korea Society of Computer and Information
PY - 2025
VL - 30
IS - 6
PB - The Korean Society Of Computer And Information
SP - 31
EP - 46
SN - 1598-849X
AB - In the modern battlefield environment, where vast amounts of information are distributed in real time, there is a growing need for AI-based battlefield situation analysis systems to support commanders in analyzing massive volumes of data. This study aims to align the preferences of a small large language model (sLLM) tailored for a Retrieval-Augmented Generation (RAG) system designed for battlefield situation analysis. To this end, we redefine "safety" in the military domain from the perspective of minimizing hallucinations and construct a Direct Preference Optimization (DPO) dataset using a Teacher Critique-based Inference-with-Hint technique. This technique achieved improvements in hallucination-related safety preferences of 47.35% based on human evaluation and 78.42% based on LLM-as-Judge evaluation. Subsequently, through DPO-based preference learning, we identified the optimal hyperparameter configuraion for battlefield environments as =0.9, epoch=15. Under this setting, the model achieved improvements of +24.41% in safety and +3.77% in helpfulness compared to the SFT baseline. Furthermore, it achieved a performance gain of +85.58 points in the normalized safety-focused Z-score metric, demonstrating the effectiveness of the proposed method in reducing hallucinations.This study demonstrates the potential of developing an sLLM that effectively balances safety and helpfulness in defense applications.
KW - Large Language Models;Language Model Safety;Hallucination;Direct Preference Optimization;Retrieval-Augmented Generation;Battlefield Situation Analysis
DO -
UR -
ER -
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim and Jaesung Yoo. (2025). Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System. Journal of The Korea Society of Computer and Information, 30(6), 31-46.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim and Jaesung Yoo. 2025, "Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System", Journal of The Korea Society of Computer and Information, vol.30, no.6 pp.31-46.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim, Jaesung Yoo "Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System" Journal of The Korea Society of Computer and Information 30.6 pp.31-46 (2025) : 31.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim, Jaesung Yoo. Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System. 2025; 30(6), 31-46.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim and Jaesung Yoo. "Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System" Journal of The Korea Society of Computer and Information 30, no.6 (2025) : 31-46.
Hansle Lee; Dong-Hyun Kim; Hyeong-Seok Kim; Jaesung Yoo. Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System. Journal of The Korea Society of Computer and Information, 30(6), 31-46.
Hansle Lee; Dong-Hyun Kim; Hyeong-Seok Kim; Jaesung Yoo. Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System. Journal of The Korea Society of Computer and Information. 2025; 30(6) 31-46.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim, Jaesung Yoo. Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System. 2025; 30(6), 31-46.
Hansle Lee, Dong-Hyun Kim, Hyeong-Seok Kim and Jaesung Yoo. "Preference-Aligned sLLM for Safe and Helpful RAG-Based Battlefield Analysis System" Journal of The Korea Society of Computer and Information 30, no.6 (2025) : 31-46.