@article{ART003234202},
author={Hee-Dong Yoon and Se-Uk Lee and Dong-Kyu Moon and Myung-Ho Kim},
title={Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2025},
volume={30},
number={8},
pages={75-85}
TY - JOUR
AU - Hee-Dong Yoon
AU - Se-Uk Lee
AU - Dong-Kyu Moon
AU - Myung-Ho Kim
TI - Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)
JO - Journal of The Korea Society of Computer and Information
PY - 2025
VL - 30
IS - 8
PB - The Korean Society Of Computer And Information
SP - 75
EP - 85
SN - 1598-849X
AB - In this paper, we propose KM-AVSR, a Korean Morpheme-based Multimodal Audio-Visual Speech Recognition (AVSR) model designed to enhance sentence-level lip-reading accuracy. Lip-reading has become increasingly valuable for understanding speech in noisy environments or in the absence of audio, with promising applications in Korean language education, assistive technologies, and surveillance. To address the challenges posed by the syllabic and agglutinative nature of Korean, KM-AVSR adopts morpheme-based subword tokenization. The model independently encodes visual (lip movements) and auditory (raw waveform) inputs using separate encoders, fuses the modalities through a multilayer perceptron, and decodes the output using a hybrid Connectionist Temporal Classification (CTC) and Transformer-based decoder. Evaluations on a Korean lip-reading dataset demonstrate that KM-AVSR achieves a Character Error Rate (CER) of 15.66%, representing a 39.35% improvement over a conventional CNN-based AVSR model. These results highlight the effectiveness of morpheme-level subword modeling and hybrid decoding in Korean AVSR.
KW - Korean lip-reading;Audio-visual speech recognition;Morpheme-based subwords;Deep learning;Natural language processing
DO -
UR -
ER -
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon and Myung-Ho Kim. (2025). Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR). Journal of The Korea Society of Computer and Information, 30(8), 75-85.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon and Myung-Ho Kim. 2025, "Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)", Journal of The Korea Society of Computer and Information, vol.30, no.8 pp.75-85.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon, Myung-Ho Kim "Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)" Journal of The Korea Society of Computer and Information 30.8 pp.75-85 (2025) : 75.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon, Myung-Ho Kim. Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR). 2025; 30(8), 75-85.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon and Myung-Ho Kim. "Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)" Journal of The Korea Society of Computer and Information 30, no.8 (2025) : 75-85.
Hee-Dong Yoon; Se-Uk Lee; Dong-Kyu Moon; Myung-Ho Kim. Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR). Journal of The Korea Society of Computer and Information, 30(8), 75-85.
Hee-Dong Yoon; Se-Uk Lee; Dong-Kyu Moon; Myung-Ho Kim. Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR). Journal of The Korea Society of Computer and Information. 2025; 30(8) 75-85.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon, Myung-Ho Kim. Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR). 2025; 30(8), 75-85.
Hee-Dong Yoon, Se-Uk Lee, Dong-Kyu Moon and Myung-Ho Kim. "Design and Implementation of Sentence-Level Lip-reading with a Korean Morpheme-Based Multimodal AVSR Model (KM-AVSR)" Journal of The Korea Society of Computer and Information 30, no.8 (2025) : 75-85.