@article{ART003353669},
author={Hye-Rim Yoon and Hyun-Seung Lee and KIM, TAEKOOK},
title={Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform},
journal={Journal of Internet of Things and Convergence},
issn={2466-0078},
year={2026},
volume={12},
number={3},
pages={23}
TY - JOUR
AU - Hye-Rim Yoon
AU - Hyun-Seung Lee
AU - KIM, TAEKOOK
TI - Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform
JO - Journal of Internet of Things and Convergence
PY - 2026
VL - 12
IS - 3
PB - The Korea Internet of Things Society
SP - 23
EP -
SN - 2466-0078
AB - This study implements an auditory-centered emotion recording platform that enables users to record and recall their emotions using speech and environmental sounds. Conventional recording methods based on photos, videos, and text have limitations in capturing the emotional atmosphere and contextual information conveyed by everyday sounds. To address this issue, this study uses sounds directly recorded by users as input data and applies artificial intelligence-based emotion analysis techniques to implement an automatic emotion-tagging function. The proposed system is designed as a branching structure that first determines whether the input audio contains speech. If speech is detected, a WavLM-based speech emotion recognition model is applied. If speech is not detected, a Mel-spectrogram-based CNN model is used to analyze environmental sounds. The CNN model classifies environmental sounds at the scene level and then converts the classification results into emotion tags based on predefined scene-emotion mapping rules. In addition, the platform stores the emotion tags together with the time, location, and user records at the moment of recording, allowing users to explore and recall their records by emotion, date, and location. Through this implementation, this study integrates speech emotion recognition and environmental sound analysis models into an actual service flow and demonstrates the feasibility of an emotion archiving platform that utilizes auditory information.
KW - Emotion Recognition;Internet of Things (IoT);Speech Emotion Analysis;Environmental Sound Analysis;WavLM;Convolutional Neural Network (CNN);Artificial Intelligence (AI)
DO -
UR -
ER -
Hye-Rim Yoon, Hyun-Seung Lee and KIM, TAEKOOK. (2026). Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform. Journal of Internet of Things and Convergence, 12(3), 23.
Hye-Rim Yoon, Hyun-Seung Lee and KIM, TAEKOOK. 2026, "Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform", Journal of Internet of Things and Convergence, vol.12, no.3 23.
Hye-Rim Yoon, Hyun-Seung Lee, KIM, TAEKOOK "Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform" Journal of Internet of Things and Convergence 12.3 23 (2026) : 23.
Hye-Rim Yoon, Hyun-Seung Lee, KIM, TAEKOOK. Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform. 2026; 12(3), 23.
Hye-Rim Yoon, Hyun-Seung Lee and KIM, TAEKOOK. "Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform" Journal of Internet of Things and Convergence 12, no.3 (2026) : 23.
Hye-Rim Yoon; Hyun-Seung Lee; KIM, TAEKOOK. Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform. Journal of Internet of Things and Convergence, 12(3), 23.
Hye-Rim Yoon; Hyun-Seung Lee; KIM, TAEKOOK. Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform. Journal of Internet of Things and Convergence. 2026; 12(3) 23.
Hye-Rim Yoon, Hyun-Seung Lee, KIM, TAEKOOK. Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform. 2026; 12(3), 23.
Hye-Rim Yoon, Hyun-Seung Lee and KIM, TAEKOOK. "Implementation of CNN-WavLM-based Environmental Sound and Speech Emotion Recognition Platform" Journal of Internet of Things and Convergence 12, no.3 (2026) : 23.