License: Creative Commons Attribution 3.0 Unported license (CC BY 3.0)
When quoting this document, please refer to the following
DOI: 10.4230/OASIcs.SLATE.2013.249
URN: urn:nbn:de:0030-drops-40441
URL: http://dagstuhl.sunsite.rwth-aachen.de/volltexte/2013/4044/
Orosz, György ;
Laki, László János ;
Novák, Attila ;
Siklósi, Borbála
Combining Language Independent Part-of-Speech Tagging Tools
Abstract
Part-of-speech tagging is a fundamental task of natural language processing. For languages with a very rich agglutinating morphology, generic PoS tagging algorithms do not yield very high accuracy due to data sparseness issues. Though integrating a morphological analyzer can efficiently solve this problem, this is a resource-intensive solution. In this paper we show a method of combining language independent statistical solutions -- including a statistical machine translation tool -- of PoS-tagging to effectively boost tagging accuracy. Our experiments show that, using the same training set, our combination of language independent tools yield an accuracy that approaches that of a language dependent system with an integrated morphological analyzer.
BibTeX - Entry
@InProceedings{orosz_et_al:OASIcs:2013:4044,
author = {Gy{\"o}rgy Orosz and L{\'a}szl{\'o} J{\'a}nos Laki and Attila Nov{\'a}k and Borb{\'a}la Sikl{\'o}si},
title = {{Combining Language Independent Part-of-Speech Tagging Tools}},
booktitle = {2nd Symposium on Languages, Applications and Technologies},
pages = {249--257},
series = {OpenAccess Series in Informatics (OASIcs)},
ISBN = {978-3-939897-52-1},
ISSN = {2190-6807},
year = {2013},
volume = {29},
editor = {Jos{\'e} Paulo Leal and Ricardo Rocha and Alberto Sim{\~o}es},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
address = {Dagstuhl, Germany},
URL = {http://drops.dagstuhl.de/opus/volltexte/2013/4044},
URN = {urn:nbn:de:0030-drops-40441},
doi = {10.4230/OASIcs.SLATE.2013.249},
annote = {Keywords: part-of-speech tagging, combination, agglutinative languages, machine learning, machine translation}
}
Keywords: |
|
part-of-speech tagging, combination, agglutinative languages, machine learning, machine translation |
Collection: |
|
2nd Symposium on Languages, Applications and Technologies |
Issue Date: |
|
2013 |
Date of publication: |
|
05.06.2013 |