-
Notifications
You must be signed in to change notification settings - Fork 2
/
speer-conceptnet.bib
838 lines (745 loc) · 28.5 KB
/
speer-conceptnet.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
# Guidelines for putting things in this file:
#
# - Google Scholar can usually give you the BibTeX data for any paper.
# It usually provides a reasonable-looking ID of the form
# [author name][year][keyword], so let's follow this format as much as
# possible. The keyword it picks is the first word of the title; you
# can change it to something more memorable.
#
# - Add a 'url' entry indicating where you can read the paper online.
#
# - Add a 'comment' entry indicating why the paper is important and what key
# ideas we should cite it for.
#
# - Try to keep the papers organized into sections, so we know where to look
# when writing about particular topics.
#
# - Try to keep each section in chronological order, at least by year.
# Knowledge sources for ConceptNet
# --------------------------------
@book{miller1998wordnet,
title={WordNet},
author={Miller, George and Fellbaum, Christiane and Tengi, Randee and Wakefield, P and Langone, H and Haskell, BR},
year={1998},
publisher={MIT Press Cambridge}
}
@book{lenat1989cyc,
title={Building large knowledge-based systems: representation and inference in the {C}yc project},
author={Douglas B Lenat and Ramanathan V Guha},
year={1989},
publisher={Addison-Wesley Longman}
}
@inproceedings{singh2002omcs,
title={The public acquisition of commonsense knowledge},
author={Push Singh},
year={2002},
booktitle={Proceedings of AAAI Spring Symposium: Acquiring (and Using) Linguistic (and World) Knowledge for Information Access},
organization={AAAI},
comment={The original publication of OMCS}
}
@incollection{singh2002omcs2,
title={{O}pen {M}ind {C}ommon {S}ense: Knowledge acquisition from the general public},
author={Singh, Push and Lin, Thomas and Mueller, Erik T and Lim, Grace and Perkins, Travell and Zhu, Wan Li},
booktitle={On the move to meaningful internet systems 2002: CoopIS, DOA, and ODBASE},
pages={1223--1237},
year={2002},
publisher={Springer},
comment={The second OMCS paper}
}
@inproceedings{breen2004jmdict,
title={{JM}{D}ict: a {J}apanese-multilingual dictionary},
author={Breen, James},
booktitle={Proceedings of the Workshop on Multilingual Linguistic Resources},
pages={71--79},
year={2004},
organization={Association for Computational Linguistics}
}
@incollection{anacleto2006portuguese,
title={Can common sense uncover cultural differences in computer applications?},
author={Anacleto, Junia and Lieberman, Henry and Tsutsumi, Marie and Neris, V{\^a}nia and Carvalho, Aparecido and Espinosa, Jose and Godoi, Muriel and Zem-Mascarenhas, Silvia},
booktitle={Artificial intelligence in theory and practice},
pages={1--10},
year={2006},
publisher={Springer}
}
@phdthesis{chung2006globalmind,
title={{G}lobal{M}ind: bridging the gap between different cultures and languages with common-sense computing},
author={Chung, Hyemin},
year={2006},
school={Massachusetts Institute of Technology}
}
@inproceedings{matuszek2006cyc,
title={An Introduction to the Syntax and Content of {C}yc},
author={Matuszek, Cynthia and Cabral, John and Witbrock, Michael J and DeOliveira, John},
booktitle={AAAI Spring Symposium: Formalizing and Compiling Background Knowledge and Its Applications to Knowledge Representation and Question Answering},
pages={44--49},
year={2006}
}
@inproceedings{vonahn2006verbosity,
title={Verbosity: a game for collecting common-sense facts},
author={Luis von Ahn and Kedia, Mihir and Blum, Manuel},
booktitle={Proceedings of the SIGCHI conference on Human Factors in computing systems},
pages={75--78},
year={2006},
organization={ACM}
}
@book{auer2007dbpedia,
title={{DB}pedia: A nucleus for a web of open data},
author={Auer, S{\"o}ren and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary},
year={2007},
publisher={Springer}
}
@techreport{bergman2008umbel,
title={{UMBEL} ontology},
author={Bergman, Michael K and Giasson, Fr{\'e}d{\'e}rick},
year={2008},
institution={Structured Dynamics LLC},
note={Documentation retrieved from \url{http://umbel.org/resources/about/} on 2015-07-28.}
}
@techreport{davis2013unicode,
title={{Unicode} Standard Annex \#15: {Unicode} Normalization Forms},
author={Mark Davis and Ken Whistler},
year={2013},
institution={Unicode Consortium},
note={\url{http://www.unicode.org/reports/tr15/tr15-39.html}}
}
@techreport{bcp47,
title={{BCP} 47: Tags for Identifying Languages},
author={Addison Phillips and Mark Davis},
year={2009},
institution={Internet Engineering Task Force},
note={\url{https://tools.ietf.org/html/rfc5646}}
}
@inproceedings{demelo2009uwn,
title={Towards a universal wordnet by learning from combined evidence},
author={De Melo, Gerard and Weikum, Gerhard},
booktitle={Proceedings of the 18th ACM conference on Information and knowledge management},
pages={513--522},
year={2009},
organization={ACM}
}
@inproceedings{kuo2009petgame,
title={Community-based game design: experiments on social games for commonsense data collection},
author={Kuo, Yen-Ling and Lee, Jong-Chuan and Chiang, Kai-Yang and Wang, Rex and Shen, Edward and Chan, Cheng-Wei and Hsu, Jane Yung-Jen},
booktitle={Proceedings of the ACM SIGKDD Workshop on Human Computation},
pages={15--22},
year={2009},
organization={ACM}
}
@article{nakahara2011nadya,
title={
{D}evelopment and Evaluation of a {W}eb-Based Game for Common-Sense Knowledge Acquisition in {J}apan
},
author={Kazuhiro Nakahara and Shigeo Yamada},
journal={Unisys Technical Report},
volume={30},
number={4},
pages={295--305},
year={2011},
publisher={Nihon Unisys}
}
@article{singhal2012googleblog,
title={Introducing the knowledge graph: things, not strings},
author={Singhal, Amit},
journal={Official {G}oogle blog},
year={2012},
note={Retrieved from \url{https://googleblog.blogspot.com/2012/05/introducing-knowledge-graph-things-not.html} on Dec. 1, 2016},
url={https://googleblog.blogspot.com/2012/05/introducing-knowledge-graph-things-not.html}
}
@inproceedings{bond2013linking,
title = {{Linking and Extending an Open Multilingual Wordnet}},
author = {Bond, Francis and Foster, Ryan},
booktitle = {51st Annual Meeting of the Association for Computational Linguistics: ACL-2013},
pages = {1352--1362},
year = {2013},
}
@misc{wiktionary2014de,
author = "Wiktionary",
title = "Wiktionary{,} The Free Dictionary --- {G}erman data export",
year = "2014",
url = "https://dumps.wikimedia.org/dewiktionary/",
note = "(A collaborative project with thousands of authors.) Retrieved from \url{https://dumps.wikimedia.org/dewiktionary/} on 2014-08-26"
}
@misc{wiktionary2014en,
author = "Wiktionary",
title = "Wiktionary{,} The Free Dictionary --- {E}nglish data export",
year = "2014",
url = "https://dumps.wikimedia.org/enwiktionary/",
note = "(A collaborative project with thousands of authors.) Retrieved from \url{https://dumps.wikimedia.org/enwiktionary/} on 2014-08-26"
}
# Influences
# ----------
@article{liu2004conceptnet,
title = {{ConceptNet} -- A Practical Commonsense Reasoning Tool-Kit},
author = {Hugo Liu and Push Singh},
doi = {10.1023/b:bttj.0000047600.45421.6d},
url = {http://dx.doi.org/10.1023/b:bttj.0000047600.45421.6d},
year = {2004},
month = {oct},
publisher = {Springer Science $\mathplus$ Business Media},
volume = {22},
number = {4},
pages = {211--226},
journal = {{BT} Technology Journal},
}
@article{pustejovsky1991generative,
title={The generative lexicon},
author={Pustejovsky, James},
journal={Computational linguistics},
volume={17},
number={4},
pages={409--441},
year={1991},
publisher={MIT press}
}
@inproceedings{nickel2015holographic,
title={Holographic Embeddings of Knowledge Graphs},
author={Nickel, Maximilian and Rosasco, Lorenzo and Poggio, Tomaso},
booktitle={AAAI},
year={2016}
}
@inproceedings{xiao2014distributed,
title={Distributed Word Representation Learning for Cross-Lingual Dependency Parsing.},
author={Xiao, Min and Guo, Yuhong},
booktitle={CoNLL},
pages={119--129},
year={2014}
}
# Existing implementations of word similarity
# -------------------------------------------
@article{mikolov2013word2vec,
author = {Tomas Mikolov and
Kai Chen and
Greg Corrado and
Jeffrey Dean},
title = {Efficient Estimation of Word Representations in Vector Space},
journal = {CoRR},
volume = {abs/1301.3781},
year = {2013},
url = {http://arxiv.org/abs/1301.3781},
timestamp = {Thu, 07 May 2015 20:02:01 +0200},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1301-3781},
bibsource = {dblp computer science bibliography, http://dblp.org},
comment = {
The first publication of word2vec, with its CBOW and skip-gram models.
}
}
@incollection{mikolov2013distributed,
title = {{Distributed Representations of Words and Phrases and their Compositionality}},
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
pages = {3111--3119},
year = {2013},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf},
comment = {
The NIPS followup to word2vec.
}
}
@inproceedings{zou2013bilingual,
title={Bilingual Word Embeddings for Phrase-Based Machine Translation},
author={Zou, Will Y and Socher, Richard and Cer, Daniel M and Manning, Christopher D},
booktitle={EMNLP},
pages={1393--1398},
year={2013}
}
@article{pennington2014glove,
title={Glo{V}e: Global vectors for word representation},
author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher D},
journal={Proceedings of the Empiricial Methods in Natural Language Processing (EMNLP 2014)},
volume={12},
pages={1532--1543},
year={2014},
url={http://www-nlp.stanford.edu/pubs/glove.pdf},
comment={
GloVe is the best starting point for word embeddings in a vector space, in our view.
}
}
@InProceedings{faruqui2015retrofitting,
author = {Faruqui, Manaal and Dodge, Jesse and Jauhar, Sujay K. and Dyer, Chris and Hovy, Eduard and Smith, Noah A.},
title = {Retrofitting Word Vectors to Semantic Lexicons},
booktitle = {Proceedings of NAACL},
year = {2015},
url={http://arxiv.org/abs/1411.4166},
comment={
We're using this method to combine ConceptNet with GloVe.
}
}
@article{levy2015embeddings,
title={Improving distributional similarity with lessons learned from word embeddings},
author={Levy, Omer and Goldberg, Yoav and Dagan, Ido},
journal={Transactions of the Association for Computational Linguistics},
volume={3},
pages={211--225},
year={2015},
url={https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/viewFile/570/124},
comment={
This is a survey of word similarity methods and how they perform with
different parameter settings, aiming to "compare apples to apples" when
evaluating the claims of word2vec and GloVe.
One thing it examines is the effect of L2-normalizing the columns of GloVe.
It also implements a method based on the SVD of the pointwise mutual
information matrix, which achieves the best score I've seen on the
rare words dataset, besides ours (rho=.514).
}
}
@InProceedings{rothe2015autoextend,
author = {Rothe, Sascha and Sch\"{u}tze, Hinrich},
title = {Auto{E}xtend: Extending Word Embeddings to Embeddings for Synsets and Lexemes},
booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
month = {July},
year = {2015},
address = {Beijing, China},
publisher = {Association for Computational Linguistics},
pages = {1793--1803},
url = {http://www.aclweb.org/anthology/P15-1173}
}
@article{speer2016ensemble,
title={An Ensemble Method to Produce High-Quality Word Embeddings},
author={Speer, Robert and Chin, Joshua},
journal={arXiv preprint arXiv:1604.01692},
year={2016},
comment={Our previous paper that was turned down by NAACL.}
}
@article{fasttext,
title={Enriching word vectors with subword information},
author={Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
journal={arXiv preprint arXiv:1607.04606},
year={2016}
}
@article{salle2016lexvec,
title={Enhancing the {L}ex{V}ec Distributed Word Representation Model Using Positional Contexts and External Memory},
author={Salle, Alexandre and Idiart, Marco and Villavicencio, Aline},
journal={arXiv preprint arXiv:1606.01283},
year={2016}
}
# Other analogy systems
# ---------------------
@article{turney2006lra,
title={Similarity of semantic relations},
author={Peter D. Turney},
journal={Computational Linguistics},
volume={32},
number={3},
pages={379--416},
year={2006},
publisher={MIT Press}
}
@inproceedings{herdagdelen2009bagpack,
author = {Herda\v{g}delen, Ama\c{c} and Baroni, Marco},
title = {BagPack: A General Framework to Represent Semantic Relations},
booktitle = {Proceedings of the Workshop on Geometrical Models of Natural Language Semantics},
series = {GEMS '09},
year = {2009},
location = {Athens, Greece},
pages = {33--40},
numpages = {8},
url = {http://dl.acm.org/citation.cfm?id=1705415.1705420},
acmid = {1705420},
publisher = {Association for Computational Linguistics},
address = {Stroudsburg, PA, USA},
}
@article{turney2013supersim,
title={Distributional Semantics Beyond Words: Supervised Learning of Analogy and Paraphrase},
author={Turney, Peter D},
volume={1},
pages={353--366},
year={2013},
url={http://aclweb.org/anthology/Q/Q13/Q13-1029.pdf},
comment={
References Turney's current SAT data set, and provides the best performance on it without
Web searching.
}
}
# Ways to extend word similarity
# ------------------------------
@inproceedings{nickel2011rescal,
title={A three-way model for collective learning on multi-relational data},
author={Nickel, Maximilian and Tresp, Volker and Kriegel, Hans-Peter},
booktitle={Proceedings of the 28th international conference on machine learning (ICML-11)},
pages={809--816},
year={2011}
}
@inproceedings{mikolov2013distributed,
title={Distributed representations of words and phrases and their compositionality},
author={Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
booktitle={Advances in neural information processing systems},
pages={3111--3119},
year={2013},
comment={
Extends word2vec's skip-gram model to phrases, so it can learn that
"Air Canada" is not just "Air" + "Canada".
}
}
@inproceedings{soricut2015unsupervised,
title={Unsupervised morphology induction using word embeddings},
author={Soricut, Radu and Och, Franz},
booktitle={Proceedings of NAACL},
year={2015}
}
@inproceedings{zhao2015learning,
title={Learning Translation Models from Monolingual Continuous Representations},
author={Zhao, Kai and Hassan, Hany and Auli, Michael},
year={2015},
booktitle = {Proceedings of NAACL},
comment={
This inspired the way that we merge word2vec with GloVe.
}
}
@inproceedings{kiros2015skip,
title={Skip-thought vectors},
author={Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Ruslan R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja},
booktitle={Advances in Neural Information Processing Systems},
pages={3276--3284},
year={2015}
}
@article{trask2015sense2vec,
author = {Andrew Trask and
Phil Michalak and
John Liu},
title = {sense2vec - {A} Fast and Accurate Method for Word Sense Disambiguation
In Neural Word Embeddings},
journal = {CoRR},
volume = {abs/1511.06388},
year = {2015},
url = {http://arxiv.org/abs/1511.06388},
timestamp = {Tue, 01 Dec 2015 19:22:34 +0100},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/TraskML15},
bibsource = {dblp computer science bibliography, http://dblp.org},
}
# Corpora and resources
# ---------------------
@inproceedings{ganitkevitch2013ppdb,
title={{PPDB}: The Paraphrase Database.},
author={Ganitkevitch, Juri and Van Durme, Benjamin and Callison-Burch, Chris},
booktitle={HLT-NAACL},
pages={758--764},
year={2013},
url={http://www.aclweb.org/anthology/N13-1#page=796},
comment={
The external resource that Faruqui used for retrofitting.
}
}
@article{marcus1993treebank,
title={Building a large annotated corpus of English: The Penn Treebank},
author={Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
journal={Computational linguistics},
volume={19},
number={2},
pages={313--330},
year={1993},
publisher={MIT Press},
url={http://repository.upenn.edu/cgi/viewcontent.cgi?article=1246&context=cis_reports},
comment={The publication of the Penn Treebank Parser}
}
@article{wang2015solving,
title={Solving Verbal Comprehension Questions in IQ Test by Knowledge-Powered Word Embedding},
author={Wang, Huazheng and Gao, Bin and Bian, Jiang and Tian, Fei and Liu, Tie-Yan},
journal={arXiv preprint arXiv:1505.07909},
year={2015},
comment = {
Not referred to yet, but provides a source of "IQ test" analogies.
}
}
@inproceedings{miller2014alignment,
title = {{WordNet-Wikipedia-Wiktionary: Construction of a Three-way Alignment}},
author = {Miller, Tristan and Gurevych, Iryna},
booktitle = {LREC},
pages = {2094--2100},
year = {2014},
}
# Evaluation data
# ---------------
@article{rubenstein1965rg,
title={Contextual correlates of synonymy},
author={Rubenstein, Herbert and Goodenough, John B},
journal={Communications of the ACM},
volume={8},
number={10},
pages={627--633},
year={1965},
publisher={ACM},
comment={
The publication of the RG-65 dataset. Full text not available online,
except you can usually find a cached PDF somewhere by Googling for it.
}
}
@article{miller1991mc,
title={Contextual correlates of semantic similarity},
author={Miller, George A and Charles, Walter G},
journal={Language and cognitive processes},
volume={6},
number={1},
pages={1--28},
year={1991},
publisher={Taylor \& Francis},
comment={The publication of the MC dataset, paper not freely available}
}
@inproceedings{finkelstein2001ws,
title={Placing search in context: The concept revisited},
author={Finkelstein, Lev and Gabrilovich, Evgeniy and Matias, Yossi and Rivlin, Ehud and Solan, Zach and Wolfman, Gadi and Ruppin, Eytan},
booktitle={Proceedings of the 10th international conference on World Wide Web},
pages={406--414},
year={2001},
organization={ACM},
url={http://www.iicm.tugraz.at/thesis/cguetl_diss/literatur/Kapitel07/References/Finkelstein_et_al._2002/p116-finkelstein.pdf},
comment={The publication of the WordSim-353 datset}
}
@inproceedings{huang2012scws,
author = {Eric H. Huang and Richard Socher and Christopher D. Manning and Andrew Y. Ng},
title = {{Improving Word Representations via Global Context and Multiple Word Prototypes}},
booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
year = 2012,
url={http://www.socher.org/index.php/Main/ImprovingWordRepresentationsViaGlobalContextAndMultipleWordPrototypes},
comment={
The publication of the SCWS dataset, which compares words in context, even
though we may need to ignore the context.
}
}
@inproceedings{halawi2012mturk,
title={Large-scale learning of word relatedness with constraints},
author={Halawi, Guy and Dror, Gideon and Gabrilovich, Evgeniy and Koren, Yehuda},
booktitle={Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining},
pages={1406--1414},
year={2012},
organization={ACM}
}
@article{luong2013rw,
title={Better word representations with recursive neural networks for morphology},
author={Luong, Minh-Thang and Socher, Richard and Manning, Christopher D},
journal={CoNLL-2013},
volume={104},
year={2013},
publisher={Citeseer},
url={http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.377.5234\&rep=rep1\&type=pdf#page=116},
comment={The publication of the Stanford Rare Words (RW) dataset.}
}
@article{bruni2014men,
title={Multimodal Distributional Semantics},
author={Bruni, Elia and Tran, Nam-Khanh and Baroni, Marco},
journal={J. Artif. Intell. Res. (JAIR)},
volume={49},
pages={1--47},
year={2014},
url={http://clic.cimec.unitn.it/~elia.bruni/publications/bruni2014multimodal.pdf},
comment={The publication of the MEN-3000 dataset}
}
@InProceedings{hassan2009crosslingual,
author = {Samer Hassan and Rada Mihalcea},
title = {Cross-lingual Semantic Relatedness Using Encyclopedic Knowledge},
booktitle = {Proceedings of the conference on Empirical Methods in Natural Language Processing},
address = {Singapore},
year = {2009},
url={https://6bc8a60a-a-62cb3a1a-s-sites.googlegroups.com/site/samerhassan/documents/Hassan09a.pdf},
comment = {Translations of WS-353 and MC-30 into Spanish, Arabic, and Romanian.}
}
@incollection{gurevych2005german,
title={Using the structure of a conceptual network in computing semantic relatedness},
author={Gurevych, Iryna},
booktitle={Natural Language Processing--IJCNLP 2005},
pages={767--778},
year={2005},
publisher={Springer},
comment={The Gur65 dataset, a translation of RG-65 into German.}
}
@incollection{joubarne2011french,
title={Comparison of semantic similarity for different languages using the {G}oogle {N}-gram corpus and second-order co-occurrence measures},
author={Joubarne, Colette and Inkpen, Diana},
booktitle={Advances in Artificial Intelligence},
pages={216--221},
year={2011},
publisher={Springer},
comment={Provides a translation of RG-65 into French.}
}
@InProceedings{mostafazadeh2016cloze,
author = {Mostafazadeh, Nasrin and Chambers, Nathanael and He, Xiaodong and Parikh, Devi and Batra, Dhruv and Vanderwende, Lucy and Kohli, Pushmeet and Allen, James},
title = {A Corpus and Cloze Evaluation for Deeper Understanding of Commonsense Stories},
booktitle = {Proceedings of NAACL: Human Language Technologies},
month = {June},
year = {2016},
address = {San Diego, California},
publisher = {Association for Computational Linguistics},
pages = {839--849},
url = {http://www.aclweb.org/anthology/N16-1098}
}
# Background on how we got here
# -----------------------------
@inproceedings{speer2008analogyspace,
title={Analogy{S}pace: Reducing the Dimensionality of Common Sense Knowledge.},
author={Speer, Robert and Havasi, Catherine and Lieberman, Henry},
booktitle={AAAI},
volume={8},
pages={548--553},
year={2008}
}
@article{havasi2009digital,
title={Digital intuition: Applying common sense using dimensionality reduction},
author={Havasi, Catherine and Speer, Robert and Pustejovsky, James and Lieberman, Henry},
journal={Intelligent Systems, IEEE},
volume={24},
number={4},
pages={24--35},
year={2009},
publisher={IEEE},
url={http://dspace.mit.edu/openaccess-disseminate/1721.1/51870},
comment = {
This is the paper underlying what eventually became Luminoso, though our
methods have changed significantly.
The paper describes the "blending" operation in particular, which is how
we combine ConceptNet with domain-specific co-occurrences.
Don't go looking in this paper for any kind of evaluation that can be
compared to anything else; the only task we evaluated was to infer
ConceptNet-like assertions.
}
}
@inproceedings{havasi2010coarse,
title = {{Coarse Word-Sense Disambiguation Using Common Sense}},
author = {Havasi, Catherine and Speer, Robert and Pustejovsky, James},
booktitle = {AAAI Fall Symposium: Commonsense Knowledge},
year = {2010},
}
@inproceedings{havasi2010color,
title={Automated Color Selection Using Semantic Knowledge},
author={Havasi, Catherine and Speer, Robert and Holmgren, Justin},
booktitle={AAAI Fall Symposium: Commonsense Knowledge},
year={2010}
}
@inproceedings{speer2012conceptnet,
title={Representing General Relational Knowledge in {C}oncept{N}et 5},
author={Speer, Robert and Havasi, Catherine},
booktitle={LREC},
pages={3679--3686},
year={2012},
url={http://www.lrec-conf.org/proceedings/lrec2012/pdf/1072_Paper.pdf},
comment={The most recent open access publication of ConceptNet.}
}
@incollection{speer2013conceptnet,
title={ConceptNet 5: A large semantic network for relational knowledge},
author={Speer, Robert and Havasi, Catherine},
booktitle={The People’s Web Meets NLP},
pages={161--176},
year={2013},
publisher={Springer}
}
# General background
# ------------------
@book{zipf1949human,
title={Human behavior and the principle of least effort: an introduction to human ecology},
author={Zipf, G.K.},
lccn={49007787},
url={https://books.google.com/books?id=1tx9AAAAIAAJ},
year={1949},
publisher={Addison-Wesley Press},
comment={
The book that gave us Zipf's law. Apparently considered a bad work
of anthropology but an excellent work of corpus linguistics.
}
}
@article{deerwester1990indexing,
title={Indexing by latent semantic analysis},
author={Deerwester, Scott C. and Dumais, Susan T and Landauer, Thomas K. and Furnas, George W. and Harshman, Richard A.},
journal={JAsIs},
volume={41},
number={6},
pages={391--407},
year={1990},
comment={
The original, unreliable way to get word and document embeddings.
}
}
@article{hinton2006deep,
title={A fast learning algorithm for deep belief nets},
author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye},
journal={Neural computation},
volume={18},
number={7},
pages={1527--1554},
year={2006},
publisher={MIT Press},
comment={
The original publication that hints at the term "deep learning".
}
}
@inproceedings{agirre2009similarity,
title={A study on similarity and relatedness using distributional and {W}ord{N}et-based approaches},
author={Agirre, Eneko and Alfonseca, Enrique and Hall, Keith and Kravalova, Jana and Pa{\c{s}}ca, Marius and Soroa, Aitor},
booktitle={Proceedings of NAACL: Human Language Technologies},
pages={19--27},
year={2009},
organization={Association for Computational Linguistics},
url={http://dl.acm.org/citation.cfm?id=1620758},
comment={
Distinguishes similarity and relatedness in wordsim-353. Also recognizes
the difference between distributional similarity and lexical resources.
}
}
@inproceedings{linzen2016issues,
title={Issues in evaluating semantic spaces using word analogies},
author={Tal Linzen},
booktitle={Proceedings of the 1st Workshop on Evaluating Vector Space Representations for NLP},
pages={13--18},
year={2016},
organization={Association for Computational Linguistics},
url={http://aclweb.org/anthology/W/W16/W16-2503.pdf},
comment={
"Destroys" the Google analogy evaluation.
}
}
@misc{speer2016wordfreq,
author = {Robert Speer and
Joshua Chin and
Andrew Lin and
Lance Nathan and
Sara Jewett},
title = {wordfreq: v1.5.1},
month = sep,
year = 2016,
note = {DOI 10.5281/zenodo.61937},
doi = {10.5281/zenodo.61937},
url = {https://doi.org/10.5281/zenodo.61937}
}
# Miscellaneous tools
# -------------------
@article{fisher1915frequency,
title={Frequency distribution of the values of the correlation coefficient in samples from an indefinitely large population},
author={Fisher, Ronald A},
journal={Biometrika},
pages={507--521},
year={1915},
publisher={JSTOR}
}
@article{micallef2014euler,
author = {Micallef, Luana AND Rodgers, Peter},
journal = {PLoS ONE},
publisher = {Public Library of Science},
title = {{eulerAPE}: Drawing Area-Proportional 3-Venn Diagrams Using Ellipses},
year = {2014},
month = {07},
volume = {9},
url = {http://dx.doi.org/10.1371%2Fjournal.pone.0101717},
pages = {e101717},
comment = {Used to illustrate the overlap of vocabularies.},
number = {7},
doi = {10.1371/journal.pone.0101717}
}
@article{bonett2000sample,
title={Sample size requirements for estimating {P}earson, {K}endall and {S}pearman correlations},
author={Bonett, Douglas G and Wright, Thomas A},
journal={Psychometrika},
volume={65},
number={1},
pages={23--28},
year={2000},
publisher={Springer}
}
@article{koster2012snakemake,
title={Snakemake—a scalable bioinformatics workflow engine},
author={K{\"o}ster, Johannes and Rahmann, Sven},
journal={Bioinformatics},
volume={28},
number={19},
pages={2520--2522},
year={2012},
publisher={Oxford Univ Press}
}