@article{ART003132786},
author={Huiseong Kim and Jihoon Moon},
title={Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret},
journal={Journal of Internet of Things and Convergence},
issn={2466-0078},
year={2024},
volume={10},
number={5},
pages={213-226}
TY - JOUR
AU - Huiseong Kim
AU - Jihoon Moon
TI - Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret
JO - Journal of Internet of Things and Convergence
PY - 2024
VL - 10
IS - 5
PB - The Korea Internet of Things Society
SP - 213
EP - 226
SN - 2466-0078
AB - This study uses box office data collected by the Korean Film Council (KOFIC) to develop and compare predictive models for cinema attendance and revenue. Data preprocessing removed irrelevant variables and handled missing values separately for categorical and numerical data to ensure consistency. Exploratory data analysis identified key variables, including Seoul audience size, revenue, total number of screens, film genre, rating, and month of release, which revealed a strong correlation between Seoul audience size and revenue with box office performance. Based on this analysis, predictive models were developed using CatBoost and PyCaret AutoML. CatBoost was chosen for its effectiveness in handling categorical variables such as director name, production company, and genre, while PyCaret AutoML was chosen for its ability to automate the modeling process, making it easy for non-experts to compare different models. The performance of the models was evaluated using mean absolute error (MAE), root mean squared error (RMSE), and R-squared (R²), with CatBoost demonstrating superior accuracy. In addition, the SHAP technique was used to interpret the models, identifying Seoul's audience size and revenue as the most significant predictors. This research presents reliable box office prediction models that will improve decision-making in the film industry and support the development of data-driven strategies.
KW - Box Office Prediction;Exploratory Data Analysis;Machine Learning;Categorical Boosting;Automated Machine Learning;SHapley Additive exPlanations
DO -
UR -
ER -
Huiseong Kim and Jihoon Moon. (2024). Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret. Journal of Internet of Things and Convergence, 10(5), 213-226.
Huiseong Kim and Jihoon Moon. 2024, "Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret", Journal of Internet of Things and Convergence, vol.10, no.5 pp.213-226.
Huiseong Kim, Jihoon Moon "Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret" Journal of Internet of Things and Convergence 10.5 pp.213-226 (2024) : 213.
Huiseong Kim, Jihoon Moon. Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret. 2024; 10(5), 213-226.
Huiseong Kim and Jihoon Moon. "Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret" Journal of Internet of Things and Convergence 10, no.5 (2024) : 213-226.
Huiseong Kim; Jihoon Moon. Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret. Journal of Internet of Things and Convergence, 10(5), 213-226.
Huiseong Kim; Jihoon Moon. Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret. Journal of Internet of Things and Convergence. 2024; 10(5) 213-226.
Huiseong Kim, Jihoon Moon. Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret. 2024; 10(5), 213-226.
Huiseong Kim and Jihoon Moon. "Performance Comparison and SHAP Interpretation of Movie Box Office Prediction Models Based on CatBoost and PyCaret" Journal of Internet of Things and Convergence 10, no.5 (2024) : 213-226.