@article{ART003125826},
author={Kang San Noh and Kim, Soo Yeon and Hye-Won Choi and JANG, HAYEUN and Sanghoun Song},
title={Fine-tuned Korean Language Models for Sociolinguistic Studies},
journal={The Sociolinguistic Journal of Korea},
issn={1226-4822},
year={2024},
volume={32},
number={3},
pages={41-64}
TY - JOUR
AU - Kang San Noh
AU - Kim, Soo Yeon
AU - Hye-Won Choi
AU - JANG, HAYEUN
AU - Sanghoun Song
TI - Fine-tuned Korean Language Models for Sociolinguistic Studies
JO - The Sociolinguistic Journal of Korea
PY - 2024
VL - 32
IS - 3
PB - The Sociolinguistic Society Of Korea
SP - 41
EP - 64
SN - 1226-4822
AB - This paper aims to test deep-learning-based Korean language models’ capacity to learn and detect social registers embedded in speech data, specifically age, gender, and regional dialects. A comprehensive understanding of linguistic phenomena requires contextualizing speech based on speakers’ age, gender, and geographic background, along with the processing of syntactic structures. To bridge the gap between human language understanding and model processing, we fine-tuned three representative Korean language models—KR-BERT, KoELECTRA-base, and KLUE-RoBERTa-base—using transcribed data from 4,000 hours of speech by middle-aged and elderly Korean speakers. The findings reveal that KoELECTRA-base outperformed the other two models across all social registers, which is likely attributed to its larger vocabulary and parameters size. Among the dialects, the Jeju dialect showed the highest accuracy in inference, which is attributed to its distinctiveness, making it easier for the models to detect. In addition to the fine-tuning process, we have made our fine-tuned models publicly available to support researchers interested in Korean computational sociolinguistics.
KW - age;dialect;gender;Korean language model;social register
DO -
UR -
ER -
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN and Sanghoun Song. (2024). Fine-tuned Korean Language Models for Sociolinguistic Studies. The Sociolinguistic Journal of Korea, 32(3), 41-64.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN and Sanghoun Song. 2024, "Fine-tuned Korean Language Models for Sociolinguistic Studies", The Sociolinguistic Journal of Korea, vol.32, no.3 pp.41-64.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN, Sanghoun Song "Fine-tuned Korean Language Models for Sociolinguistic Studies" The Sociolinguistic Journal of Korea 32.3 pp.41-64 (2024) : 41.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN, Sanghoun Song. Fine-tuned Korean Language Models for Sociolinguistic Studies. 2024; 32(3), 41-64.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN and Sanghoun Song. "Fine-tuned Korean Language Models for Sociolinguistic Studies" The Sociolinguistic Journal of Korea 32, no.3 (2024) : 41-64.
Kang San Noh; Kim, Soo Yeon; Hye-Won Choi; JANG, HAYEUN; Sanghoun Song. Fine-tuned Korean Language Models for Sociolinguistic Studies. The Sociolinguistic Journal of Korea, 32(3), 41-64.
Kang San Noh; Kim, Soo Yeon; Hye-Won Choi; JANG, HAYEUN; Sanghoun Song. Fine-tuned Korean Language Models for Sociolinguistic Studies. The Sociolinguistic Journal of Korea. 2024; 32(3) 41-64.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN, Sanghoun Song. Fine-tuned Korean Language Models for Sociolinguistic Studies. 2024; 32(3), 41-64.
Kang San Noh, Kim, Soo Yeon, Hye-Won Choi, JANG, HAYEUN and Sanghoun Song. "Fine-tuned Korean Language Models for Sociolinguistic Studies" The Sociolinguistic Journal of Korea 32, no.3 (2024) : 41-64.