@article{ART002349439},
author={In-Su Kang},
title={Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets},
journal={Journal of The Korea Society of Computer and Information},
issn={1598-849X},
year={2018},
volume={23},
number={5},
pages={1-7},
doi={10.9708/jksci.2018.23.05.001}
TY - JOUR
AU - In-Su Kang
TI - Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets
JO - Journal of The Korea Society of Computer and Information
PY - 2018
VL - 23
IS - 5
PB - The Korean Society Of Computer And Information
SP - 1
EP - 7
SN - 1598-849X
AB - Reference string recognition is to extract individual reference strings from a reference section of an academic article, which consists of a sequence of reference lines. This task has been attacked by heuristic-based, clustering-based, classification-based approaches, exploiting lexical and layout characteristics of reference lines. Most classification-based methods have used sequence labeling to assign labels to either a sequence of tokens within reference lines, or a sequence of reference lines.
Unlike the previous token-level sequence labeling approach, this study attempts to assign different labels to the beginning, intermediate and terminating tokens of a reference string. After that, post-processing is applied to identify reference strings by predicting their beginning and/or terminating tokens. Experimental evaluation using English and German reference string recognition datasets shows that the proposed method obtains above 94% in the macro-averaged F1.
KW - Reference String Recognition;Sequence Labeling;Citation
DO - 10.9708/jksci.2018.23.05.001
ER -
In-Su Kang. (2018). Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets. Journal of The Korea Society of Computer and Information, 23(5), 1-7.
In-Su Kang. 2018, "Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets", Journal of The Korea Society of Computer and Information, vol.23, no.5 pp.1-7. Available from: doi:10.9708/jksci.2018.23.05.001
In-Su Kang "Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets" Journal of The Korea Society of Computer and Information 23.5 pp.1-7 (2018) : 1.
In-Su Kang. Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets. 2018; 23(5), 1-7. Available from: doi:10.9708/jksci.2018.23.05.001
In-Su Kang. "Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets" Journal of The Korea Society of Computer and Information 23, no.5 (2018) : 1-7.doi: 10.9708/jksci.2018.23.05.001
In-Su Kang. Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets. Journal of The Korea Society of Computer and Information, 23(5), 1-7. doi: 10.9708/jksci.2018.23.05.001
In-Su Kang. Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets. Journal of The Korea Society of Computer and Information. 2018; 23(5) 1-7. doi: 10.9708/jksci.2018.23.05.001
In-Su Kang. Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets. 2018; 23(5), 1-7. Available from: doi:10.9708/jksci.2018.23.05.001
In-Su Kang. "Reference String Recognition based on Word Sequence Tagging and Post-processing: Evaluation with English and German Datasets" Journal of The Korea Society of Computer and Information 23, no.5 (2018) : 1-7.doi: 10.9708/jksci.2018.23.05.001