@article{ART002700669},
author={Chang-Soo Lee},
title={Machine Learning Classification of Literary Translation Samples by Human and Machine Translators},
journal={The Journal of Translation Studies},
issn={1229-795X},
year={2021},
volume={22},
number={1},
pages={199-217},
doi={10.15749/jts.2021.22.1.008}
TY - JOUR
AU - Chang-Soo Lee
TI - Machine Learning Classification of Literary Translation Samples by Human and Machine Translators
JO - The Journal of Translation Studies
PY - 2021
VL - 22
IS - 1
PB - The Korean Association for Translation Studies
SP - 199
EP - 217
SN - 1229-795X
AB - The current paper reports the results of a text classification experiment on literary translation samples by human and machine translators. The original data consists of the English translations of 28 short and long Korean novels by a set of human translators and 3 Web-based neural machine translators – Google Translate (Google), Bing (Microsoft), and Papago (Naver). Machine translation samples were collected twice in February 2019 and February 2020. One hundred most frequent words were extracted from the data and subjected to supervised classification by two machine learning algorithms – random forest (RF) and linear discriminant analysis (LDA) - for cross-reference tests. The most important findings are as follows. First, Both RF and LDA classified human and machine translation samples from both 2019 and 2020 with high accuracy, with prediction accuracy rates topping 90 percent. This indicated a clear distinction in word use patterns between human and machine translators, which did not change much over the 1-year period. Second, in both RF and LDA tests, most of the 2019 machine translation samples were accurately classified according to their translators with prediction accuracy rates ranging between 78 and 100 percent. Classification accuracy, however, fell visibly for Bing and Papago in 2020, with Papago plunging from 100 and 80 percent to 41 percent. This meant that over the 1-year period the three machine translators moved in closer toward each other, suggesting a trend toward homogeneity in word use patterns over time.
KW - machine translation;literary translation;machine learning text classification;random forest;linear discriminant analysis
DO - 10.15749/jts.2021.22.1.008
ER -
Chang-Soo Lee. (2021). Machine Learning Classification of Literary Translation Samples by Human and Machine Translators. The Journal of Translation Studies, 22(1), 199-217.
Chang-Soo Lee. 2021, "Machine Learning Classification of Literary Translation Samples by Human and Machine Translators", The Journal of Translation Studies, vol.22, no.1 pp.199-217. Available from: doi:10.15749/jts.2021.22.1.008
Chang-Soo Lee "Machine Learning Classification of Literary Translation Samples by Human and Machine Translators" The Journal of Translation Studies 22.1 pp.199-217 (2021) : 199.
Chang-Soo Lee. Machine Learning Classification of Literary Translation Samples by Human and Machine Translators. 2021; 22(1), 199-217. Available from: doi:10.15749/jts.2021.22.1.008
Chang-Soo Lee. "Machine Learning Classification of Literary Translation Samples by Human and Machine Translators" The Journal of Translation Studies 22, no.1 (2021) : 199-217.doi: 10.15749/jts.2021.22.1.008
Chang-Soo Lee. Machine Learning Classification of Literary Translation Samples by Human and Machine Translators. The Journal of Translation Studies, 22(1), 199-217. doi: 10.15749/jts.2021.22.1.008
Chang-Soo Lee. Machine Learning Classification of Literary Translation Samples by Human and Machine Translators. The Journal of Translation Studies. 2021; 22(1) 199-217. doi: 10.15749/jts.2021.22.1.008
Chang-Soo Lee. Machine Learning Classification of Literary Translation Samples by Human and Machine Translators. 2021; 22(1), 199-217. Available from: doi:10.15749/jts.2021.22.1.008
Chang-Soo Lee. "Machine Learning Classification of Literary Translation Samples by Human and Machine Translators" The Journal of Translation Studies 22, no.1 (2021) : 199-217.doi: 10.15749/jts.2021.22.1.008