@article{ART003148643},
author={Moon-Ki Back and Hyoung-Seop Shim},
title={Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models},
journal={Journal of Software Assessment and Valuation},
issn={2092-8114},
year={2024},
volume={20},
number={4},
pages={233-242},
doi={10.29056/jsav.2024.12.24}
TY - JOUR
AU - Moon-Ki Back
AU - Hyoung-Seop Shim
TI - Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models
JO - Journal of Software Assessment and Valuation
PY - 2024
VL - 20
IS - 4
PB - Korea Software Assessment and Valuation Society
SP - 233
EP - 242
SN - 2092-8114
AB - Spectrograms are widely utilized in audio signal processing research to effectively analyze the magnitude of frequency components but they are limited in representing time-varying phase information.
To overcome this limitation, this paper explores a time-frequency representation that combines Power Spectrogram (PS) and Instantaneous Frequency (IF) features and validates its effectiveness through environmental sound classification tasks using various deep learning architectures. Experiments on the ESC-50 dataset demonstrate that ConvNeXt model, leveraging the vertical integration of PS and IF, achieves a classification accuracy of 87.16%, reflecting a 1.7% improvement over conventional methods.
The confusion matrix analysis reveals that misclassifications often occur for water-related sounds and sirens, as they exhibit highly similar time- frequency patterns, making them challenging to distinguish.
This study highlights the potential of the proposed approach to enhance the performance of deep learning models in audio-related tasks, particularly for small- to medium-scale datasets and anticipates broad applicability in sound-related applications.
KW - Time-Frequency;Environmental Sound;Spectrogram;Instantaneous Frequency;Deep Learning
DO - 10.29056/jsav.2024.12.24
ER -
Moon-Ki Back and Hyoung-Seop Shim. (2024). Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models. Journal of Software Assessment and Valuation, 20(4), 233-242.
Moon-Ki Back and Hyoung-Seop Shim. 2024, "Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models", Journal of Software Assessment and Valuation, vol.20, no.4 pp.233-242. Available from: doi:10.29056/jsav.2024.12.24
Moon-Ki Back, Hyoung-Seop Shim "Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models" Journal of Software Assessment and Valuation 20.4 pp.233-242 (2024) : 233.
Moon-Ki Back, Hyoung-Seop Shim. Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models. 2024; 20(4), 233-242. Available from: doi:10.29056/jsav.2024.12.24
Moon-Ki Back and Hyoung-Seop Shim. "Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models" Journal of Software Assessment and Valuation 20, no.4 (2024) : 233-242.doi: 10.29056/jsav.2024.12.24
Moon-Ki Back; Hyoung-Seop Shim. Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models. Journal of Software Assessment and Valuation, 20(4), 233-242. doi: 10.29056/jsav.2024.12.24
Moon-Ki Back; Hyoung-Seop Shim. Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models. Journal of Software Assessment and Valuation. 2024; 20(4) 233-242. doi: 10.29056/jsav.2024.12.24
Moon-Ki Back, Hyoung-Seop Shim. Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models. 2024; 20(4), 233-242. Available from: doi:10.29056/jsav.2024.12.24
Moon-Ki Back and Hyoung-Seop Shim. "Time-Frequency Representations for Improving Environmental Sound Classification with Deep Learning Models" Journal of Software Assessment and Valuation 20, no.4 (2024) : 233-242.doi: 10.29056/jsav.2024.12.24