@article{ART003266063},
author={Do Kyung Shin and Young Dae Kim},
title={Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2025},
volume={30},
number={11},
pages={37-62}
TY - JOUR
AU - Do Kyung Shin
AU - Young Dae Kim
TI - Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech
JO - Journal of The Korea Society of Computer and Information
PY - 2025
VL - 30
IS - 11
PB - The Korean Society Of Computer And Information
SP - 37
EP - 62
SN - 1598-849X
AB - Recent advances in Automatic Speech Recognition (ASR) technology have driven active research in Speech Emotion Recognition (SER) applications. While SER performance heavily depends on data quality and quantity, data scarcity remains a persistent challenge, making data augmentation techniques essential. Existing voice quality evaluation metrics such as PESQ and STOI are single-dimensional evaluation methods, and have the disadvantage of not being able to comprehensively evaluate the quality of audio data with high-dimensional and multi-dimensional characteristics, such as emotional speech. This study proposes ESQ (Emotion-Specific Quality Assessment) metrics for evaluating the quality of augmented emotional speech data. To validate the ESQ metrics, we utilized the EMO dataset augmented across quality levels using MetricGAN. Experimental results demonstrate consistent score improvements across all seven groups as quality levels increase, achieving an overall average improvement rate of 90.75%.
KW - SER;Audio Quality Assessment;Data Augmentation;PESQ;STOI;MetricGAN
DO -
UR -
ER -
Do Kyung Shin and Young Dae Kim. (2025). Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech. Journal of The Korea Society of Computer and Information, 30(11), 37-62.
Do Kyung Shin and Young Dae Kim. 2025, "Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech", Journal of The Korea Society of Computer and Information, vol.30, no.11 pp.37-62.
Do Kyung Shin, Young Dae Kim "Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech" Journal of The Korea Society of Computer and Information 30.11 pp.37-62 (2025) : 37.
Do Kyung Shin, Young Dae Kim. Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech. 2025; 30(11), 37-62.
Do Kyung Shin and Young Dae Kim. "Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech" Journal of The Korea Society of Computer and Information 30, no.11 (2025) : 37-62.
Do Kyung Shin; Young Dae Kim. Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech. Journal of The Korea Society of Computer and Information, 30(11), 37-62.
Do Kyung Shin; Young Dae Kim. Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech. Journal of The Korea Society of Computer and Information. 2025; 30(11) 37-62.
Do Kyung Shin, Young Dae Kim. Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech. 2025; 30(11), 37-62.
Do Kyung Shin and Young Dae Kim. "Multi-Domain ESQ Metrics for Quality Assessment of Augmented Emotional Speech" Journal of The Korea Society of Computer and Information 30, no.11 (2025) : 37-62.