@article{ART003277374},
author={Hyeon-Ji Ko and Won-Hu Seo and Chol Yong Soo},
title={Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction},
journal={Journal of Software Assessment and Valuation},
issn={2092-8114},
year={2025},
volume={21},
number={4},
pages={211-222}
TY - JOUR
AU - Hyeon-Ji Ko
AU - Won-Hu Seo
AU - Chol Yong Soo
TI - Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction
JO - Journal of Software Assessment and Valuation
PY - 2025
VL - 21
IS - 4
PB - Korea Software Assessment and Valuation Society
SP - 211
EP - 222
SN - 2092-8114
AB - Traditional energy-based voice activity detection (VAD) fails to clearly distinguish between speech and non-speech in noisy environments and leads to unnecessary computations. It presents a pre-processing pipeline to address the issue of decreased speech recognition accuracy due to background noise in low-spec edge device environments. This study designs a four-stage sequential pipeline that uses visual voice activity detection as a computational gating mechanism in the system. The proposed system selectively performs noise reduction and STT only on verified speech segments, demonstrating that real-time performance can be achieved and non-speech noise can be effectively blocked using only a basic combination of algorithms, without the need for deep learning-based models. Experimental results show that the system maintains an RTF of 0.134 while improving noise reduction performance in speech-active segments to 15.67 dB, and as background noise is removed, a Speech Loss of 14.59 dB is observed, demonstrating overall improved performance compared to conventional noise-removal-based VAD.
KW - Video VAD;noise reduction;STT accuracy;high performance on low-spec;digital signage
DO -
UR -
ER -
Hyeon-Ji Ko, Won-Hu Seo and Chol Yong Soo. (2025). Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction. Journal of Software Assessment and Valuation, 21(4), 211-222.
Hyeon-Ji Ko, Won-Hu Seo and Chol Yong Soo. 2025, "Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction", Journal of Software Assessment and Valuation, vol.21, no.4 pp.211-222.
Hyeon-Ji Ko, Won-Hu Seo, Chol Yong Soo "Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction" Journal of Software Assessment and Valuation 21.4 pp.211-222 (2025) : 211.
Hyeon-Ji Ko, Won-Hu Seo, Chol Yong Soo. Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction. 2025; 21(4), 211-222.
Hyeon-Ji Ko, Won-Hu Seo and Chol Yong Soo. "Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction" Journal of Software Assessment and Valuation 21, no.4 (2025) : 211-222.
Hyeon-Ji Ko; Won-Hu Seo; Chol Yong Soo. Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction. Journal of Software Assessment and Valuation, 21(4), 211-222.
Hyeon-Ji Ko; Won-Hu Seo; Chol Yong Soo. Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction. Journal of Software Assessment and Valuation. 2025; 21(4) 211-222.
Hyeon-Ji Ko, Won-Hu Seo, Chol Yong Soo. Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction. 2025; 21(4), 211-222.
Hyeon-Ji Ko, Won-Hu Seo and Chol Yong Soo. "Enhancing Voice Recognition Accuracy through Sequential Application of Visual Speech Detection and Noise Reduction" Journal of Software Assessment and Valuation 21, no.4 (2025) : 211-222.