@inproceedings{961742726e684079a2ac2ab8039ca3b9,
title = "Simple semi-supervised POS tagging",
abstract = "We tackle the question: how much supervision is needed to achieve state-of-the-art performance in part-of-speech (POS) tagging, if we leverage lexical representations given by the model of Brown et al. (1992)? It has become a standard practice to use automatically induced “Brown clusters” in place of POS tags. We claim that the underlying sequence model for these clusters is particularly well-suited for capturing POS tags. We empirically demonstrate this claim by drastically reducing supervision in POS tagging with these representations. Using either the bit-string form given by the algorithm of Brown et al. (1992) or the (less well-known) embedding form given by the canonical correlation analysis algorithm of Stratos et al. (2014), we can obtain 93% tagging accuracy with just 400 labeled words and achieve state-of-the-art accuracy (> 97%) with less than 1 percent of the original training data.",
author = "Karl Stratos and Michael Collins",
note = "Publisher Copyright: {\textcopyright} 2015 The North American Chapter of the Association for Computational Linguistics.; 1st Workshop on Vector Space Modeling for Natural Language Processing, VS 2015 at the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2015 ; Conference date: 05-06-2015",
year = "2015",
doi = "10.3115/v1/w15-1511",
language = "English (US)",
series = "1st Workshop on Vector Space Modeling for Natural Language Processing, VS 2015 at the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2015",
publisher = "Association for Computational Linguistics (ACL)",
pages = "79--87",
editor = "Phil Blunsom and Shay Cohen and Paramveer Dhillon and Percy Liang",
booktitle = "1st Workshop on Vector Space Modeling for Natural Language Processing, VS 2015 at the Conference of the North American Chapter of the Association for Computational Linguistics",
}