@article{ART003074080},
author={Jin-Young Jun and Youn-A Min},
title={Missing Value Imputation Technique for Water Quality Dataset},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2024},
volume={29},
number={4},
pages={39-46},
doi={10.9708/jksci.2024.29.04.039}
TY - JOUR
AU - Jin-Young Jun
AU - Youn-A Min
TI - Missing Value Imputation Technique for Water Quality Dataset
JO - Journal of The Korea Society of Computer and Information
PY - 2024
VL - 29
IS - 4
PB - The Korean Society Of Computer And Information
SP - 39
EP - 46
SN - 1598-849X
AB - Many researchers make efforts to evaluate water quality using various models. Such models require a dataset without missing values, but in real world, most datasets include missing values for various reasons. Simple deletion of samples having missing value(s) could distort distribution of the underlying data and pose a significant risk of biasing the model’s inference when the missing mechanism is not MCAR. In this study, to explore the most appropriate technique for handing missing values in water quality data, several imputation techniques were experimented based on existing KNN and MICE imputation with/without the generative neural network model, Autoencoder(AE) and Denoising Autoencoder(DAE). The results shows that KNN and MICE combined imputation without generative networks provides the closest estimated values to the true values. When evaluating binary classification models based on support vector machine and ensemble algorithms after applying the combined imputation technique to the observed water quality dataset with missing values, it shows better performance in terms of Accuracy, F1 score, RoC-AuC score and MCC compared to those evaluated after deleting samples having missing values.
KW - Water Quality Data;Missing Value;MCAR;MICE Imputation;Combined Imputation
DO - 10.9708/jksci.2024.29.04.039
ER -
Jin-Young Jun and Youn-A Min. (2024). Missing Value Imputation Technique for Water Quality Dataset. Journal of The Korea Society of Computer and Information, 29(4), 39-46.
Jin-Young Jun and Youn-A Min. 2024, "Missing Value Imputation Technique for Water Quality Dataset", Journal of The Korea Society of Computer and Information, vol.29, no.4 pp.39-46. Available from: doi:10.9708/jksci.2024.29.04.039
Jin-Young Jun, Youn-A Min "Missing Value Imputation Technique for Water Quality Dataset" Journal of The Korea Society of Computer and Information 29.4 pp.39-46 (2024) : 39.
Jin-Young Jun, Youn-A Min. Missing Value Imputation Technique for Water Quality Dataset. 2024; 29(4), 39-46. Available from: doi:10.9708/jksci.2024.29.04.039
Jin-Young Jun and Youn-A Min. "Missing Value Imputation Technique for Water Quality Dataset" Journal of The Korea Society of Computer and Information 29, no.4 (2024) : 39-46.doi: 10.9708/jksci.2024.29.04.039
Jin-Young Jun; Youn-A Min. Missing Value Imputation Technique for Water Quality Dataset. Journal of The Korea Society of Computer and Information, 29(4), 39-46. doi: 10.9708/jksci.2024.29.04.039
Jin-Young Jun; Youn-A Min. Missing Value Imputation Technique for Water Quality Dataset. Journal of The Korea Society of Computer and Information. 2024; 29(4) 39-46. doi: 10.9708/jksci.2024.29.04.039
Jin-Young Jun, Youn-A Min. Missing Value Imputation Technique for Water Quality Dataset. 2024; 29(4), 39-46. Available from: doi:10.9708/jksci.2024.29.04.039
Jin-Young Jun and Youn-A Min. "Missing Value Imputation Technique for Water Quality Dataset" Journal of The Korea Society of Computer and Information 29, no.4 (2024) : 39-46.doi: 10.9708/jksci.2024.29.04.039