speer-conceptnet.bib

# Guidelines for putting things in this file:
#
# - Google Scholar can usually give you the BibTeX data for any paper.
#   It usually provides a reasonable-looking ID of the form
#   [author name][year][keyword], so let's follow this format as much as
#   possible. The keyword it picks is the first word of the title; you
#   can change it to something more memorable.
#
# - Add a 'url' entry indicating where you can read the paper online.
#
# - Add a 'comment' entry indicating why the paper is important and what key
#   ideas we should cite it for.
#
# - Try to keep the papers organized into sections, so we know where to look
#   when writing about particular topics.
#
# - Try to keep each section in chronological order, at least by year.

# Knowledge sources for ConceptNet
# --------------------------------

@book{miller1998wordnet,
  title={WordNet},
  author={Miller, George and Fellbaum, Christiane and Tengi, Randee and Wakefield, P and Langone, H and Haskell, BR},
  year={1998},
  publisher={MIT Press Cambridge}
}

@book{lenat1989cyc,
  title={Building large knowledge-based systems: representation and inference in the {C}yc project},
  author={Douglas B Lenat and Ramanathan V Guha},
  year={1989},
  publisher={Addison-Wesley Longman}
}

@inproceedings{singh2002omcs,
  title={The public acquisition of commonsense knowledge},
  author={Push Singh},
  year={2002},
  booktitle={Proceedings of AAAI Spring Symposium: Acquiring (and Using) Linguistic (and World) Knowledge for Information Access},
  organization={AAAI},
  comment={The original publication of OMCS}
}

@incollection{singh2002omcs2,
  title={{O}pen {M}ind {C}ommon {S}ense: Knowledge acquisition from the general public},
  author={Singh, Push and Lin, Thomas and Mueller, Erik T and Lim, Grace and Perkins, Travell and Zhu, Wan Li},
  booktitle={On the move to meaningful internet systems 2002: CoopIS, DOA, and ODBASE},
  pages={1223--1237},
  year={2002},
  publisher={Springer},
  comment={The second OMCS paper}
}

@inproceedings{breen2004jmdict,
  title={{JM}{D}ict: a {J}apanese-multilingual dictionary},
  author={Breen, James},
  booktitle={Proceedings of the Workshop on Multilingual Linguistic Resources},
  pages={71--79},
  year={2004},
  organization={Association for Computational Linguistics}
}

@incollection{anacleto2006portuguese,
  title={Can common sense uncover cultural differences in computer applications?},
  author={Anacleto, Junia and Lieberman, Henry and Tsutsumi, Marie and Neris, V{\^a}nia and Carvalho, Aparecido and Espinosa, Jose and Godoi, Muriel and Zem-Mascarenhas, Silvia},
  booktitle={Artificial intelligence in theory and practice},
  pages={1--10},
  year={2006},
  publisher={Springer}
}

@phdthesis{chung2006globalmind,
  title={{G}lobal{M}ind: bridging the gap between different cultures and languages with common-sense computing},
  author={Chung, Hyemin},
  year={2006},
  school={Massachusetts Institute of Technology}
}

@inproceedings{matuszek2006cyc,
  title={An Introduction to the Syntax and Content of {C}yc},
  author={Matuszek, Cynthia and Cabral, John and Witbrock, Michael J and DeOliveira, John},
  booktitle={AAAI Spring Symposium: Formalizing and Compiling Background Knowledge and Its Applications to Knowledge Representation and Question Answering},
  pages={44--49},
  year={2006}
}

@inproceedings{vonahn2006verbosity,
  title={Verbosity: a game for collecting common-sense facts},
  author={Luis von Ahn and Kedia, Mihir and Blum, Manuel},
  booktitle={Proceedings of the SIGCHI conference on Human Factors in computing systems},
  pages={75--78},
  year={2006},
  organization={ACM}
}

@book{auer2007dbpedia,
  title={{DB}pedia: A nucleus for a web of open data},
  author={Auer, S{\"o}ren and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary},
  year={2007},
  publisher={Springer}
}

@techreport{bergman2008umbel,
  title={{UMBEL} ontology},
  author={Bergman, Michael K and Giasson, Fr{\'e}d{\'e}rick},
  year={2008},
  institution={Structured Dynamics LLC},
  note={Documentation retrieved from \url{http://umbel.org/resources/about/} on 2015-07-28.}
}

@techreport{davis2013unicode,
  title={{Unicode} Standard Annex \#15: {Unicode} Normalization Forms},
  author={Mark Davis and Ken Whistler},
  year={2013},
  institution={Unicode Consortium},
  note={\url{http://www.unicode.org/reports/tr15/tr15-39.html}}
}

@techreport{bcp47,
  title={{BCP} 47: Tags for Identifying Languages},
  author={Addison Phillips and Mark Davis},
  year={2009},
  institution={Internet Engineering Task Force},
  note={\url{https://tools.ietf.org/html/rfc5646}}
}

@inproceedings{demelo2009uwn,
  title={Towards a universal wordnet by learning from combined evidence},
  author={De Melo, Gerard and Weikum, Gerhard},
  booktitle={Proceedings of the 18th ACM conference on Information and knowledge management},
  pages={513--522},
  year={2009},
  organization={ACM}
}

@inproceedings{kuo2009petgame,
  title={Community-based game design: experiments on social games for commonsense data collection},
  author={Kuo, Yen-Ling and Lee, Jong-Chuan and Chiang, Kai-Yang and Wang, Rex and Shen, Edward and Chan, Cheng-Wei and Hsu, Jane Yung-Jen},
  booktitle={Proceedings of the ACM SIGKDD Workshop on Human Computation},
  pages={15--22},
  year={2009},
  organization={ACM}
}

@article{nakahara2011nadya,
  title={
    {D}evelopment and Evaluation of a {W}eb-Based Game for Common-Sense Knowledge Acquisition in {J}apan
  },
  author={Kazuhiro Nakahara and Shigeo Yamada},
  journal={Unisys Technical Report},
  volume={30},
  number={4},
  pages={295--305},
  year={2011},
  publisher={Nihon Unisys}
}

@article{singhal2012googleblog,
  title={Introducing the knowledge graph: things, not strings},
  author={Singhal, Amit},
  journal={Official {G}oogle blog},
  year={2012},
  note={Retrieved from \url{https://googleblog.blogspot.com/2012/05/introducing-knowledge-graph-things-not.html} on Dec. 1, 2016},
  url={https://googleblog.blogspot.com/2012/05/introducing-knowledge-graph-things-not.html}
}

@inproceedings{bond2013linking,
  title = {{Linking and Extending an Open Multilingual Wordnet}},
  author = {Bond, Francis and Foster, Ryan},
  booktitle = {51st Annual Meeting of the Association for Computational Linguistics: ACL-2013},
  pages = {1352--1362},
  year = {2013},
}


@misc{wiktionary2014de,
   author = "Wiktionary",
   title = "Wiktionary{,} The Free Dictionary --- {G}erman data export",
   year = "2014",
   url = "https://dumps.wikimedia.org/dewiktionary/",
   note = "(A collaborative project with thousands of authors.) Retrieved from \url{https://dumps.wikimedia.org/dewiktionary/} on 2014-08-26"
 }

@misc{wiktionary2014en,
   author = "Wiktionary",
   title = "Wiktionary{,} The Free Dictionary --- {E}nglish data export",
   year = "2014",
   url = "https://dumps.wikimedia.org/enwiktionary/",
   note = "(A collaborative project with thousands of authors.) Retrieved from \url{https://dumps.wikimedia.org/enwiktionary/} on 2014-08-26"
 }


# Influences
# ----------

@article{liu2004conceptnet,
  title = {{ConceptNet} -- A Practical Commonsense Reasoning Tool-Kit},
  author = {Hugo Liu and Push Singh},
  doi = {10.1023/b:bttj.0000047600.45421.6d},
  url = {http://dx.doi.org/10.1023/b:bttj.0000047600.45421.6d},
  year = {2004},
  month = {oct},
  publisher = {Springer Science $\mathplus$ Business Media},
  volume = {22},
  number = {4},
  pages = {211--226},
  journal = {{BT} Technology Journal},
}


@article{pustejovsky1991generative,
  title={The generative lexicon},
  author={Pustejovsky, James},
  journal={Computational linguistics},
  volume={17},
  number={4},
  pages={409--441},
  year={1991},
  publisher={MIT press}
}

@inproceedings{nickel2015holographic,
  title={Holographic Embeddings of Knowledge Graphs},
  author={Nickel, Maximilian and Rosasco, Lorenzo and Poggio, Tomaso},
  booktitle={AAAI},
  year={2016}
}

@inproceedings{xiao2014distributed,
  title={Distributed Word Representation Learning for Cross-Lingual Dependency Parsing.},
  author={Xiao, Min and Guo, Yuhong},
  booktitle={CoNLL},
  pages={119--129},
  year={2014}
}

# Existing implementations of word similarity
# -------------------------------------------

@article{mikolov2013word2vec,
  author    = {Tomas Mikolov and
               Kai Chen and
               Greg Corrado and
               Jeffrey Dean},
  title     = {Efficient Estimation of Word Representations in Vector Space},
  journal   = {CoRR},
  volume    = {abs/1301.3781},
  year      = {2013},
  url       = {http://arxiv.org/abs/1301.3781},
  timestamp = {Thu, 07 May 2015 20:02:01 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1301-3781},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  comment = {
      The first publication of word2vec, with its CBOW and skip-gram models.
  }
}

@incollection{mikolov2013distributed,
  title = {{Distributed Representations of Words and Phrases and their Compositionality}},
  author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
  booktitle = {Advances in Neural Information Processing Systems 26},
  editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
  pages = {3111--3119},
  year = {2013},
  publisher = {Curran Associates, Inc.},
  url = {http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf},
  comment = {
      The NIPS followup to word2vec.
  }
}


@inproceedings{zou2013bilingual,
  title={Bilingual Word Embeddings for Phrase-Based Machine Translation},
  author={Zou, Will Y and Socher, Richard and Cer, Daniel M and Manning, Christopher D},
  booktitle={EMNLP},
  pages={1393--1398},
  year={2013}
}

@article{pennington2014glove,
  title={Glo{V}e: Global vectors for word representation},
  author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher D},
  journal={Proceedings of the Empiricial Methods in Natural Language Processing (EMNLP 2014)},
  volume={12},
  pages={1532--1543},
  year={2014},
  url={http://www-nlp.stanford.edu/pubs/glove.pdf},
  comment={
      GloVe is the best starting point for word embeddings in a vector space, in our view.
  }
}

@InProceedings{faruqui2015retrofitting,
  author    = {Faruqui, Manaal and Dodge, Jesse and Jauhar, Sujay K.  and  Dyer, Chris and Hovy, Eduard and Smith, Noah A.},
  title     = {Retrofitting Word Vectors to Semantic Lexicons},
  booktitle = {Proceedings of NAACL},
  year      = {2015},
  url={http://arxiv.org/abs/1411.4166},
  comment={
    We're using this method to combine ConceptNet with GloVe.
  }
}

@article{levy2015embeddings,
  title={Improving distributional similarity with lessons learned from word embeddings},
  author={Levy, Omer and Goldberg, Yoav and Dagan, Ido},
  journal={Transactions of the Association for Computational Linguistics},
  volume={3},
  pages={211--225},
  year={2015},
  url={https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/viewFile/570/124},
  comment={
    This is a survey of word similarity methods and how they perform with
    different parameter settings, aiming to "compare apples to apples" when
    evaluating the claims of word2vec and GloVe.

    One thing it examines is the effect of L2-normalizing the columns of GloVe.

    It also implements a method based on the SVD of the pointwise mutual
    information matrix, which achieves the best score I've seen on the
    rare words dataset, besides ours (rho=.514).
  }
}

@InProceedings{rothe2015autoextend,
  author    = {Rothe, Sascha  and  Sch\"{u}tze, Hinrich},
  title     = {Auto{E}xtend: Extending Word Embeddings to Embeddings for Synsets and Lexemes},
  booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {July},
  year      = {2015},
  address   = {Beijing, China},
  publisher = {Association for Computational Linguistics},
  pages     = {1793--1803},
  url       = {http://www.aclweb.org/anthology/P15-1173}
}

@article{speer2016ensemble,
  title={An Ensemble Method to Produce High-Quality Word Embeddings},
  author={Speer, Robert and Chin, Joshua},
  journal={arXiv preprint arXiv:1604.01692},
  year={2016},
  comment={Our previous paper that was turned down by NAACL.}
}

@article{fasttext,
  title={Enriching word vectors with subword information},
  author={Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
  journal={arXiv preprint arXiv:1607.04606},
  year={2016}
}

@article{salle2016lexvec,
  title={Enhancing the {L}ex{V}ec Distributed Word Representation Model Using Positional Contexts and External Memory},
  author={Salle, Alexandre and Idiart, Marco and Villavicencio, Aline},
  journal={arXiv preprint arXiv:1606.01283},
  year={2016}
}

# Other analogy systems
# ---------------------

@article{turney2006lra,
  title={Similarity of semantic relations},
  author={Peter D. Turney},
  journal={Computational Linguistics},
  volume={32},
  number={3},
  pages={379--416},
  year={2006},
  publisher={MIT Press}
}

@inproceedings{herdagdelen2009bagpack,
 author = {Herda\v{g}delen, Ama\c{c} and Baroni, Marco},
 title = {BagPack: A General Framework to Represent Semantic Relations},
 booktitle = {Proceedings of the Workshop on Geometrical Models of Natural Language Semantics},
 series = {GEMS '09},
 year = {2009},
 location = {Athens, Greece},
 pages = {33--40},
 numpages = {8},
 url = {http://dl.acm.org/citation.cfm?id=1705415.1705420},
 acmid = {1705420},
 publisher = {Association for Computational Linguistics},
 address = {Stroudsburg, PA, USA},
}


@article{turney2013supersim,
  title={Distributional Semantics Beyond Words: Supervised Learning of Analogy and Paraphrase},
  author={Turney, Peter D},
  volume={1},
  pages={353--366},
  year={2013},
  url={http://aclweb.org/anthology/Q/Q13/Q13-1029.pdf},
  comment={
    References Turney's current SAT data set, and provides the best performance on it without
    Web searching.
  }
}

# Ways to extend word similarity
# ------------------------------

@inproceedings{nickel2011rescal,
  title={A three-way model for collective learning on multi-relational data},
  author={Nickel, Maximilian and Tresp, Volker and Kriegel, Hans-Peter},
  booktitle={Proceedings of the 28th international conference on machine learning (ICML-11)},
  pages={809--816},
  year={2011}
}

@inproceedings{mikolov2013distributed,
  title={Distributed representations of words and phrases and their compositionality},
  author={Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
  booktitle={Advances in neural information processing systems},
  pages={3111--3119},
  year={2013},
  comment={
    Extends word2vec's skip-gram model to phrases, so it can learn that
    "Air Canada" is not just "Air" + "Canada".
  }
}

@inproceedings{soricut2015unsupervised,
  title={Unsupervised morphology induction using word embeddings},
  author={Soricut, Radu and Och, Franz},
  booktitle={Proceedings of NAACL},
  year={2015}
}

@inproceedings{zhao2015learning,
  title={Learning Translation Models from Monolingual Continuous Representations},
  author={Zhao, Kai and Hassan, Hany and Auli, Michael},
  year={2015},
  booktitle = {Proceedings of NAACL},
  comment={
    This inspired the way that we merge word2vec with GloVe.
  }
}

@inproceedings{kiros2015skip,
  title={Skip-thought vectors},
  author={Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Ruslan R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja},
  booktitle={Advances in Neural Information Processing Systems},
  pages={3276--3284},
  year={2015}
}

@article{trask2015sense2vec,
  author    = {Andrew Trask and
               Phil Michalak and
               John Liu},
  title     = {sense2vec - {A} Fast and Accurate Method for Word Sense Disambiguation
               In Neural Word Embeddings},
  journal   = {CoRR},
  volume    = {abs/1511.06388},
  year      = {2015},
  url       = {http://arxiv.org/abs/1511.06388},
  timestamp = {Tue, 01 Dec 2015 19:22:34 +0100},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/TraskML15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
}

# Corpora and resources
# ---------------------
@inproceedings{ganitkevitch2013ppdb,
  title={{PPDB}: The Paraphrase Database.},
  author={Ganitkevitch, Juri and Van Durme, Benjamin and Callison-Burch, Chris},
  booktitle={HLT-NAACL},
  pages={758--764},
  year={2013},
  url={http://www.aclweb.org/anthology/N13-1#page=796},
  comment={
    The external resource that Faruqui used for retrofitting.
  }
}

@article{marcus1993treebank,
  title={Building a large annotated corpus of English: The Penn Treebank},
  author={Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
  journal={Computational linguistics},
  volume={19},
  number={2},
  pages={313--330},
  year={1993},
  publisher={MIT Press},
  url={http://repository.upenn.edu/cgi/viewcontent.cgi?article=1246&context=cis_reports},
  comment={The publication of the Penn Treebank Parser}
}

@article{wang2015solving,
  title={Solving Verbal Comprehension Questions in IQ Test by Knowledge-Powered Word Embedding},
  author={Wang, Huazheng and Gao, Bin and Bian, Jiang and Tian, Fei and Liu, Tie-Yan},
  journal={arXiv preprint arXiv:1505.07909},
  year={2015},
  comment = {
    Not referred to yet, but provides a source of "IQ test" analogies.
  }
}

@inproceedings{miller2014alignment,
  title = {{WordNet-Wikipedia-Wiktionary: Construction of a Three-way Alignment}},
  author = {Miller, Tristan and Gurevych, Iryna},
  booktitle = {LREC},
  pages = {2094--2100},
  year = {2014},
}


# Evaluation data
# ---------------

@article{rubenstein1965rg,
  title={Contextual correlates of synonymy},
  author={Rubenstein, Herbert and Goodenough, John B},
  journal={Communications of the ACM},
  volume={8},
  number={10},
  pages={627--633},
  year={1965},
  publisher={ACM},
  comment={
    The publication of the RG-65 dataset. Full text not available online,
    except you can usually find a cached PDF somewhere by Googling for it.
  }
}

@article{miller1991mc,
  title={Contextual correlates of semantic similarity},
  author={Miller, George A and Charles, Walter G},
  journal={Language and cognitive processes},
  volume={6},
  number={1},
  pages={1--28},
  year={1991},
  publisher={Taylor \& Francis},
  comment={The publication of the MC dataset, paper not freely available}
}

@inproceedings{finkelstein2001ws,
  title={Placing search in context: The concept revisited},
  author={Finkelstein, Lev and Gabrilovich, Evgeniy and Matias, Yossi and Rivlin, Ehud and Solan, Zach and Wolfman, Gadi and Ruppin, Eytan},
  booktitle={Proceedings of the 10th international conference on World Wide Web},
  pages={406--414},
  year={2001},
  organization={ACM},
  url={http://www.iicm.tugraz.at/thesis/cguetl_diss/literatur/Kapitel07/References/Finkelstein_et_al._2002/p116-finkelstein.pdf},
  comment={The publication of the WordSim-353 datset}
}

@inproceedings{huang2012scws,
  author = {Eric H. Huang and Richard Socher and Christopher D. Manning and Andrew Y. Ng},
  title = {{Improving Word Representations via Global Context and Multiple Word Prototypes}},
  booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
  year = 2012,
  url={http://www.socher.org/index.php/Main/ImprovingWordRepresentationsViaGlobalContextAndMultipleWordPrototypes},
  comment={
    The publication of the SCWS dataset, which compares words in context, even
    though we may need to ignore the context.
  }
}

@inproceedings{halawi2012mturk,
  title={Large-scale learning of word relatedness with constraints},
  author={Halawi, Guy and Dror, Gideon and Gabrilovich, Evgeniy and Koren, Yehuda},
  booktitle={Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining},
  pages={1406--1414},
  year={2012},
  organization={ACM}
}

@article{luong2013rw,
  title={Better word representations with recursive neural networks for morphology},
  author={Luong, Minh-Thang and Socher, Richard and Manning, Christopher D},
  journal={CoNLL-2013},
  volume={104},
  year={2013},
  publisher={Citeseer},
  url={http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.377.5234\&rep=rep1\&type=pdf#page=116},
  comment={The publication of the Stanford Rare Words (RW) dataset.}
}

@article{bruni2014men,
  title={Multimodal Distributional Semantics},
  author={Bruni, Elia and Tran, Nam-Khanh and Baroni, Marco},
  journal={J. Artif. Intell. Res. (JAIR)},
  volume={49},
  pages={1--47},
  year={2014},
  url={http://clic.cimec.unitn.it/~elia.bruni/publications/bruni2014multimodal.pdf},
  comment={The publication of the MEN-3000 dataset}
}

@InProceedings{hassan2009crosslingual,
  author = {Samer Hassan and Rada Mihalcea},
  title = {Cross-lingual Semantic Relatedness Using Encyclopedic Knowledge},
  booktitle = {Proceedings of the conference on Empirical Methods in Natural Language Processing},
  address = {Singapore},
  year = {2009},
  url={https://6bc8a60a-a-62cb3a1a-s-sites.googlegroups.com/site/samerhassan/documents/Hassan09a.pdf},
  comment = {Translations of WS-353 and MC-30 into Spanish, Arabic, and Romanian.}
}

@incollection{gurevych2005german,
  title={Using the structure of a conceptual network in computing semantic relatedness},
  author={Gurevych, Iryna},
  booktitle={Natural Language Processing--IJCNLP 2005},
  pages={767--778},
  year={2005},
  publisher={Springer},
  comment={The Gur65 dataset, a translation of RG-65 into German.}
}

@incollection{joubarne2011french,
  title={Comparison of semantic similarity for different languages using the {G}oogle {N}-gram corpus and second-order co-occurrence measures},
  author={Joubarne, Colette and Inkpen, Diana},
  booktitle={Advances in Artificial Intelligence},
  pages={216--221},
  year={2011},
  publisher={Springer},
  comment={Provides a translation of RG-65 into French.}
}

@InProceedings{mostafazadeh2016cloze,
  author    = {Mostafazadeh, Nasrin  and  Chambers, Nathanael  and  He, Xiaodong  and  Parikh, Devi  and  Batra, Dhruv  and  Vanderwende, Lucy  and  Kohli, Pushmeet  and  Allen, James},
  title     = {A Corpus and Cloze Evaluation for Deeper Understanding of Commonsense Stories},
  booktitle = {Proceedings of NAACL: Human Language Technologies},
  month     = {June},
  year      = {2016},
  address   = {San Diego, California},
  publisher = {Association for Computational Linguistics},
  pages     = {839--849},
  url       = {http://www.aclweb.org/anthology/N16-1098}
}

# Background on how we got here
# -----------------------------

@inproceedings{speer2008analogyspace,
  title={Analogy{S}pace: Reducing the Dimensionality of Common Sense Knowledge.},
  author={Speer, Robert and Havasi, Catherine and Lieberman, Henry},
  booktitle={AAAI},
  volume={8},
  pages={548--553},
  year={2008}
}

@article{havasi2009digital,
  title={Digital intuition: Applying common sense using dimensionality reduction},
  author={Havasi, Catherine and Speer, Robert and Pustejovsky, James and Lieberman, Henry},
  journal={Intelligent Systems, IEEE},
  volume={24},
  number={4},
  pages={24--35},
  year={2009},
  publisher={IEEE},
  url={http://dspace.mit.edu/openaccess-disseminate/1721.1/51870},
  comment = {
    This is the paper underlying what eventually became Luminoso, though our
    methods have changed significantly.

    The paper describes the "blending" operation in particular, which is how
    we combine ConceptNet with domain-specific co-occurrences.

    Don't go looking in this paper for any kind of evaluation that can be
    compared to anything else; the only task we evaluated was to infer
    ConceptNet-like assertions.
  }
}

@inproceedings{havasi2010coarse,
  title = {{Coarse Word-Sense Disambiguation Using Common Sense}},
  author = {Havasi, Catherine and Speer, Robert and Pustejovsky, James},
  booktitle = {AAAI Fall Symposium: Commonsense Knowledge},
  year = {2010},
}

@inproceedings{havasi2010color,
  title={Automated Color Selection Using Semantic Knowledge},
  author={Havasi, Catherine and Speer, Robert and Holmgren, Justin},
  booktitle={AAAI Fall Symposium: Commonsense Knowledge},
  year={2010}
}

@inproceedings{speer2012conceptnet,
  title={Representing General Relational Knowledge in {C}oncept{N}et 5},
  author={Speer, Robert and Havasi, Catherine},
  booktitle={LREC},
  pages={3679--3686},
  year={2012},
  url={http://www.lrec-conf.org/proceedings/lrec2012/pdf/1072_Paper.pdf},
  comment={The most recent open access publication of ConceptNet.}
}

@incollection{speer2013conceptnet,
  title={ConceptNet 5: A large semantic network for relational knowledge},
  author={Speer, Robert and Havasi, Catherine},
  booktitle={The People’s Web Meets NLP},
  pages={161--176},
  year={2013},
  publisher={Springer}
}

# General background
# ------------------

@book{zipf1949human,
  title={Human behavior and the principle of least effort: an introduction to human ecology},
  author={Zipf, G.K.},
  lccn={49007787},
  url={https://books.google.com/books?id=1tx9AAAAIAAJ},
  year={1949},
  publisher={Addison-Wesley Press},
  comment={
    The book that gave us Zipf's law. Apparently considered a bad work
    of anthropology but an excellent work of corpus linguistics.
  }
}

@article{deerwester1990indexing,
  title={Indexing by latent semantic analysis},
  author={Deerwester, Scott C. and Dumais, Susan T and Landauer, Thomas K. and Furnas, George W. and Harshman, Richard A.},
  journal={JAsIs},
  volume={41},
  number={6},
  pages={391--407},
  year={1990},
  comment={
    The original, unreliable way to get word and document embeddings.
  }
}

@article{hinton2006deep,
  title={A fast learning algorithm for deep belief nets},
  author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye},
  journal={Neural computation},
  volume={18},
  number={7},
  pages={1527--1554},
  year={2006},
  publisher={MIT Press},
  comment={
    The original publication that hints at the term "deep learning".
  }
}

@inproceedings{agirre2009similarity,
  title={A study on similarity and relatedness using distributional and {W}ord{N}et-based approaches},
  author={Agirre, Eneko and Alfonseca, Enrique and Hall, Keith and Kravalova, Jana and Pa{\c{s}}ca, Marius and Soroa, Aitor},
  booktitle={Proceedings of NAACL: Human Language Technologies},
  pages={19--27},
  year={2009},
  organization={Association for Computational Linguistics},
  url={http://dl.acm.org/citation.cfm?id=1620758},
  comment={
    Distinguishes similarity and relatedness in wordsim-353. Also recognizes
    the difference between distributional similarity and lexical resources.
  }
}

@inproceedings{linzen2016issues,
  title={Issues in evaluating semantic spaces using word analogies},
  author={Tal Linzen},
  booktitle={Proceedings of the 1st Workshop on Evaluating Vector Space Representations for NLP},
  pages={13--18},
  year={2016},
  organization={Association for Computational Linguistics},
  url={http://aclweb.org/anthology/W/W16/W16-2503.pdf},
  comment={
    "Destroys" the Google analogy evaluation.
  }
}

@misc{speer2016wordfreq,
  author       = {Robert Speer and
                  Joshua Chin and
                  Andrew Lin and
                  Lance Nathan and
                  Sara Jewett},
  title        = {wordfreq: v1.5.1},
  month        = sep,
  year         = 2016,
  note         = {DOI 10.5281/zenodo.61937},
  doi          = {10.5281/zenodo.61937},
  url          = {https://doi.org/10.5281/zenodo.61937}
}


# Miscellaneous tools
# -------------------
@article{fisher1915frequency,
  title={Frequency distribution of the values of the correlation coefficient in samples from an indefinitely large population},
  author={Fisher, Ronald A},
  journal={Biometrika},
  pages={507--521},
  year={1915},
  publisher={JSTOR}
}

@article{micallef2014euler,
    author = {Micallef, Luana AND Rodgers, Peter},
    journal = {PLoS ONE},
    publisher = {Public Library of Science},
    title = {{eulerAPE}: Drawing Area-Proportional 3-Venn Diagrams Using Ellipses},
    year = {2014},
    month = {07},
    volume = {9},
    url = {http://dx.doi.org/10.1371%2Fjournal.pone.0101717},
    pages = {e101717},
    comment = {Used to illustrate the overlap of vocabularies.},
    number = {7},
    doi = {10.1371/journal.pone.0101717}
}

@article{bonett2000sample,
  title={Sample size requirements for estimating {P}earson, {K}endall and {S}pearman correlations},
  author={Bonett, Douglas G and Wright, Thomas A},
  journal={Psychometrika},
  volume={65},
  number={1},
  pages={23--28},
  year={2000},
  publisher={Springer}
}

@article{koster2012snakemake,
  title={Snakemake—a scalable bioinformatics workflow engine},
  author={K{\"o}ster, Johannes and Rahmann, Sven},
  journal={Bioinformatics},
  volume={28},
  number={19},
  pages={2520--2522},
  year={2012},
  publisher={Oxford Univ Press}
}