visual_reasoning.bib

@inproceedings{amizadehNeuroSymbolicVisualReasoning2020,
 author = {Saeed Amizadeh and
Hamid Palangi and
Alex Polozov and
Yichen Huang and
Kazuhito Koishida},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/icml/AmizadehPPHK20.bib},
 booktitle = {Proceedings of the 37th International Conference on Machine Learning,
{ICML} 2020, 13-18 July 2020, Virtual Event},
 pages = {279--290},
 publisher = {{PMLR}},
 series = {Proceedings of Machine Learning Research},
 timestamp = {Tue, 15 Dec 2020 00:00:00 +0100},
 title = {Neuro-Symbolic Visual Reasoning: Disentangling "Visual" from "Reasoning"},
 url = {http://proceedings.mlr.press/v119/amizadeh20a.html},
 volume = {119},
 year = {2020}
}

@inproceedings{andreasNeuralModuleNetworks2016,
 author = {Jacob Andreas and
Marcus Rohrbach and
Trevor Darrell and
Dan Klein},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/AndreasRDK16.bib},
 booktitle = {2016 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2016, Las Vegas, NV, USA, June 27-30, 2016},
 doi = {10.1109/CVPR.2016.12},
 pages = {39--48},
 publisher = {{IEEE} Computer Society},
 timestamp = {Thu, 25 May 2017 01:00:00 +0200},
 title = {Neural Module Networks},
 url = {https://doi.org/10.1109/CVPR.2016.12},
 year = {2016}
}

@inproceedings{bakhtinPHYRENewBenchmark2019,
 author = {Anton Bakhtin and
Laurens van der Maaten and
Justin Johnson and
Laura Gustafson and
Ross B. Girshick},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/BakhtinM0GG19.bib},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {5083--5094},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {{PHYRE:} {A} New Benchmark for Physical Reasoning},
 url = {https://proceedings.neurips.cc/paper/2019/hash/4191ef5f6c1576762869ac49281130c9-Abstract.html},
 year = {2019}
}

@inproceedings{baradelCoPhyCounterfactualLearning2020a,
 author = {Fabien Baradel and
Natalia Neverova and
Julien Mille and
Greg Mori and
Christian Wolf},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/BaradelNMM020.bib},
 booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
 publisher = {OpenReview.net},
 timestamp = {Thu, 07 May 2020 01:00:00 +0200},
 title = {CoPhy: Counterfactual Learning of Physical Dynamics},
 url = {https://openreview.net/forum?id=SkeyppEFvS},
 year = {2020}
}

@inproceedings{barrettMeasuringabstractreasoning2018,
 author = {Adam Santoro and
Felix Hill and
David G. T. Barrett and
Ari S. Morcos and
Timothy P. Lillicrap},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/icml/SantoroHBML18.bib},
 booktitle = {Proceedings of the 35th International Conference on Machine Learning,
{ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15,
2018},
 editor = {Jennifer G. Dy and
Andreas Krause},
 pages = {4477--4486},
 publisher = {{PMLR}},
 series = {Proceedings of Machine Learning Research},
 timestamp = {Wed, 03 Apr 2019 01:00:00 +0200},
 title = {Measuring abstract reasoning in neural networks},
 url = {http://proceedings.mlr.press/v80/santoro18a.html},
 volume = {80},
 year = {2018}
}

@inproceedings{bennyScaleLocalizedAbstractReasoning2021,
 author = {Benny, Yaniv and Pekar, Niv and Wolf, Lior},
 booktitle = {2021 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {12552--12560},
 title = {Scale-{{Localized Abstract Reasoning}}},
 url = {https://ieeexplore.ieee.org/document/9577474/},
 urldate = {2022-10-25},
 year = {2021}
}

@inproceedings{bittonWinoGAViLGamifiedAssociation2022,
 author = {Bitton, Yonatan and Guetta, Nitzan Bitton and Yosef, Ron and Elovici, Yuval and Bansal, Mohit and Stanovsky, Gabriel and Schwartz, Roy},
 booktitle = {Thirty-Sixth {{Conference}} on {{Neural Information Processing Systems Datasets}} and {{Benchmarks Track}}},
 title = {{{WinoGAViL}}: {{Gamified Association Benchmark}} to {{Challenge Vision-and-Language Models}}},
 url = {https://openreview.net/forum?id=aJtVdI251Vv},
 urldate = {2022-10-25},
 year = {2022}
}

@inproceedings{chenGroundingPhysicalConcepts2022,
 author = {Zhenfang Chen and
Jiayuan Mao and
Jiajun Wu and
Kwan{-}Yee Kenneth Wong and
Joshua B. Tenenbaum and
Chuang Gan},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/ChenM0WTG21.bib},
 booktitle = {9th International Conference on Learning Representations, {ICLR} 2021,
Virtual Event, Austria, May 3-7, 2021},
 publisher = {OpenReview.net},
 timestamp = {Wed, 23 Jun 2021 01:00:00 +0200},
 title = {Grounding Physical Concepts of Objects and Events Through Dynamic
Visual Reasoning},
 url = {https://openreview.net/forum?id=bhCDO\_cEGCz},
 year = {2021}
}

@inproceedings{chenMetaModuleNetwork2021,
 author = {Chen, Wenhu and Gan, Zhe and Li, Linjie and Cheng, Yu and Wang, William and Liu, Jingjing},
 booktitle = {2021 {{IEEE Winter Conf}}. {{Appl}}. {{Comput}}. {{Vis}}. {{WACV}}},
 pages = {655--664},
 title = {Meta {{Module Network}} for {{Compositional Visual Reasoning}}},
 url = {https://ieeexplore.ieee.org/document/9423385/},
 urldate = {2022-10-25},
 year = {2021}
}

@inproceedings{chenREXReasoningawareGrounded2022,
 author = {Chen, Shi and Zhao, Qi},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {15565--15574},
 title = {{{REX}}: {{Reasoning-aware}} and {{Grounded Explanation}}},
 url = {https://ieeexplore.ieee.org/document/9879365/},
 urldate = {2022-11-01},
 year = {2022}
}

@inproceedings{dingDynamicVisualReasoning2021,
 author = {Ding, Mingyu and Chen, Zhenfang and Du, Tao and Luo, Ping and Tenenbaum, Josh and Gan, Chuang},
 booktitle = {Adv. {{Neural Inf}}. {{Process}}. {{Syst}}.},
 pages = {887--899},
 title = {Dynamic {{Visual Reasoning}} by {{Learning Differentiable Physics Models}} from {{Video}} and {{Language}}},
 url = {https://proceedings.neurips.cc/paper/2021/hash/07845cd9aefa6cde3f8926d25138a3a2-Abstract.html},
 urldate = {2022-10-24},
 volume = {34},
 year = {2021}
}

@inproceedings{eyzaguirreDifferentiableAdaptiveComputation2020,
 author = {Crist{\'{o}}bal Eyzaguirre and
{\'{A}}lvaro Soto},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/EyzaguirreS20.bib},
 booktitle = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
 doi = {10.1109/CVPR42600.2020.01283},
 pages = {12814--12822},
 publisher = {{IEEE}},
 timestamp = {Tue, 11 Aug 2020 01:00:00 +0200},
 title = {Differentiable Adaptive Computation Time for Visual Reasoning},
 url = {https://doi.org/10.1109/CVPR42600.2020.01283},
 year = {2020}
}

@inproceedings{girdharCATERdiagnosticdataset2020,
 author = {Rohit Girdhar and
Deva Ramanan},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/GirdharR20.bib},
 booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
 publisher = {OpenReview.net},
 timestamp = {Thu, 07 May 2020 01:00:00 +0200},
 title = {{CATER:} {A} diagnostic dataset for Compositional Actions {\&} TEmporal
Reasoning},
 url = {https://openreview.net/forum?id=HJgzt2VKPB},
 year = {2020}
}

@misc{girdharForwardPredictionPhysical2021,
 author = {Girdhar, Rohit and Gustafson, Laura and Adcock, Aaron and {\noopsort{maaten}}{van der Maaten}, Laurens},
 journal = {ArXiv preprint},
 title = {Forward {{Prediction}} for {{Physical Reasoning}}},
 url = {https://arxiv.org/abs/2006.10734},
 volume = {abs/2006.10734},
 year = {2020}
}

@inproceedings{hesselAbductionSherlockHolmes2022,
 author = {Hessel, Jack and Hwang, Jena D. and Park, Jae Sung and Zellers, Rowan and Bhagavatula, Chandra and Rohrbach, Anna and Saenko, Kate and Choi, Yejin},
 journal = {ArXiv preprint},
 title = {The {{Abduction}} of {{Sherlock Holmes}}: {{A Dataset}} for {{Visual Abductive Reasoning}}},
 url = {https://arxiv.org/abs/2202.04800},
 volume = {abs/2202.04800},
 year = {2022}
}

@inproceedings{hongTransformationDrivenVisual2021,
 author = {Hong, Xin and Lan, Yanyan and Pang, Liang and Guo, Jiafeng and Cheng, Xueqi},
 booktitle = {2021 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {6899--6908},
 title = {Transformation {{Driven Visual Reasoning}}},
 url = {https://ieeexplore.ieee.org/document/9578722/},
 urldate = {2022-07-12},
 year = {2021}
}

@inproceedings{huangVisualStorytelling2016,
 address = {San Diego, California},
 author = {Huang, Ting-Hao Kenneth  and
Ferraro, Francis  and
Mostafazadeh, Nasrin  and
Misra, Ishan  and
Agrawal, Aishwarya  and
Devlin, Jacob  and
Girshick, Ross  and
He, Xiaodong  and
Kohli, Pushmeet  and
Batra, Dhruv  and
Zitnick, C. Lawrence  and
Parikh, Devi  and
Vanderwende, Lucy  and
Galley, Michel  and
Mitchell, Margaret},
 booktitle = {Proceedings of the 2016 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies},
 doi = {10.18653/v1/N16-1147},
 pages = {1233--1239},
 publisher = {Association for Computational Linguistics},
 title = {Visual Storytelling},
 url = {https://aclanthology.org/N16-1147},
 year = {2016}
}

@inproceedings{hudsonCompositionalAttentionNetworks2018,
 author = {Drew A. Hudson and
Christopher D. Manning},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/HudsonM18.bib},
 booktitle = {6th International Conference on Learning Representations, {ICLR} 2018,
Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
 publisher = {OpenReview.net},
 timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
 title = {Compositional Attention Networks for Machine Reasoning},
 url = {https://openreview.net/forum?id=S1Euwz-Rb},
 year = {2018}
}

@inproceedings{hudsonGQANewDataset2019,
 author = {Drew A. Hudson and
Christopher D. Manning},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/HudsonM19.bib},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 doi = {10.1109/CVPR.2019.00686},
 pages = {6700--6709},
 publisher = {Computer Vision Foundation / {IEEE}},
 timestamp = {Fri, 27 Mar 2020 00:00:00 +0100},
 title = {{GQA:} {A} New Dataset for Real-World Visual Reasoning and Compositional
Question Answering},
 url = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Hudson\_GQA\_A\_New\_Dataset\_for\_Real-World\_Visual\_Reasoning\_and\_Compositional\_CVPR\_2019\_paper.html},
 year = {2019}
}

@inproceedings{hudsonLearningAbstractionNeural2019a,
 author = {Drew A. Hudson and
Christopher D. Manning},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/HudsonM19.bib},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {5901--5914},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {Learning by Abstraction: The Neural State Machine},
 url = {https://proceedings.neurips.cc/paper/2019/hash/c20a7ce2a627ba838cfbff082db35197-Abstract.html},
 year = {2019}
}

@inproceedings{huLearningReasonEndtoEnd2017,
 author = {Ronghang Hu and
Jacob Andreas and
Marcus Rohrbach and
Trevor Darrell and
Kate Saenko},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iccv/HuARDS17.bib},
 booktitle = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
Italy, October 22-29, 2017},
 doi = {10.1109/ICCV.2017.93},
 pages = {804--813},
 publisher = {{IEEE} Computer Society},
 timestamp = {Thu, 11 Jan 2018 00:00:00 +0100},
 title = {Learning to Reason: End-to-End Module Networks for Visual Question
Answering},
 url = {https://doi.org/10.1109/ICCV.2017.93},
 year = {2017}
}

@article{huStratifiedRuleAwareNetwork2021,
 author = {Hu, Sheng and Ma, Yuqing and Liu, Xianglong and Wei, Yanlu and Bai, Shihao},
 copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
 issn = {2374-3468},
 journal = {Proc. AAAI Conf. Artif. Intell.},
 keywords = {Visual Reasoning \& Symbolic Representations},
 number = {2},
 pages = {1567--1574},
 title = {Stratified {{Rule-Aware Network}} for {{Abstract Visual Reasoning}}},
 url = {https://ojs.aaai.org/index.php/AAAI/article/view/16248},
 urldate = {2022-10-25},
 volume = {35},
 year = {2021}
}

@inproceedings{jiangBongardHOIBenchmarkingFewShot2022a,
 author = {Jiang, Huaizu and Ma, Xiaojian and Nie, Weili and Yu, Zhiding and Zhu, Yuke and Anandkumar, Anima},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {19034--19043},
 title = {Bongard-{{HOI}}: {{Benchmarking Few-Shot Visual Reasoning}} for {{Human-Object Interactions}}},
 url = {https://ieeexplore.ieee.org/document/9878697/},
 urldate = {2022-11-06},
 year = {2022}
}

@inproceedings{jingMaintainingReasoningConsistency2022,
 author = {Jing, Chenchen and Jia, Yunde and Wu, Yuwei and Liu, Xinyu and Wu, Qi},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {5089--5098},
 title = {Maintaining {{Reasoning Consistency}} in {{Compositional Visual Question Answering}}},
 url = {https://ieeexplore.ieee.org/document/9879826/},
 urldate = {2022-11-01},
 year = {2022}
}

@inproceedings{johnsonCLEVRDiagnosticDataset2017,
 author = {Justin Johnson and
Bharath Hariharan and
Laurens van der Maaten and
Li Fei{-}Fei and
C. Lawrence Zitnick and
Ross B. Girshick},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/JohnsonHMFZG17.bib},
 booktitle = {2017 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2017, Honolulu, HI, USA, July 21-26, 2017},
 doi = {10.1109/CVPR.2017.215},
 pages = {1988--1997},
 publisher = {{IEEE} Computer Society},
 timestamp = {Sat, 30 May 2020 01:00:00 +0200},
 title = {{CLEVR:} {A} Diagnostic Dataset for Compositional Language and Elementary
Visual Reasoning},
 url = {https://doi.org/10.1109/CVPR.2017.215},
 year = {2017}
}

@inproceedings{johnsonInferringExecutingPrograms2017,
 author = {Justin Johnson and
Bharath Hariharan and
Laurens van der Maaten and
Judy Hoffman and
Li Fei{-}Fei and
C. Lawrence Zitnick and
Ross B. Girshick},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iccv/JohnsonHMHFZG17.bib},
 booktitle = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
Italy, October 22-29, 2017},
 doi = {10.1109/ICCV.2017.325},
 pages = {3008--3017},
 publisher = {{IEEE} Computer Society},
 timestamp = {Sat, 19 Oct 2019 01:00:00 +0200},
 title = {Inferring and Executing Programs for Visual Reasoning},
 url = {https://doi.org/10.1109/ICCV.2017.325},
 year = {2017}
}

@inproceedings{kimVisualReasoningProgressive2022,
 author = {Seung Wook Kim and
Makarand Tapaswi and
Sanja Fidler},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/KimTF19.bib},
 booktitle = {7th International Conference on Learning Representations, {ICLR} 2019,
New Orleans, LA, USA, May 6-9, 2019},
 publisher = {OpenReview.net},
 timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
 title = {Visual Reasoning by Progressive Module Networks},
 url = {https://openreview.net/forum?id=B1fpDsAqt7},
 year = {2019}
}

@article{liangVisualAbductiveReasoning2022,
 archiveprefix = {arXiv},
 author = {Liang, Chen and Wang, Wenguan and Zhou, Tianfei and Yang, Yi},
 eprint = {2203.14040},
 eprinttype = {arxiv},
 journal = {2022 IEEECVF Conf. Comput. Vis. Pattern Recognit. CVPR},
 keywords = {Computer Science - Computer Vision and Pattern Recognition},
 pages = {15544--15554},
 primaryclass = {cs},
 title = {Visual {{Abductive Reasoning}}},
 url = {https://ieeexplore.ieee.org/document/9880226/},
 urldate = {2022-05-22},
 year = {2022}
}

@inproceedings{liQLEVRDiagnosticDataset2022,
 address = {Seattle, United States},
 author = {Li, Zechen  and
S{\o}gaard, Anders},
 booktitle = {Findings of the Association for Computational Linguistics: NAACL 2022},
 doi = {10.18653/v1/2022.findings-naacl.73},
 pages = {980--996},
 publisher = {Association for Computational Linguistics},
 title = {{QLEVR}: A Diagnostic Dataset for Quantificational Language and Elementary Visual Reasoning},
 url = {https://aclanthology.org/2022.findings-naacl.73},
 year = {2022}
}

@inproceedings{liRepresentationReasoningboth2022,
 author = {Li, Jiangtong and Niu, Li and Zhang, Liqing},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {21241--21250},
 title = {From {{Representation}} to {{Reasoning}}: {{Towards}} Both {{Evidence}} and {{Commonsense Reasoning}} for {{Video Question-Answering}}},
 url = {https://ieeexplore.ieee.org/document/9878800/},
 urldate = {2022-11-01},
 year = {2022}
}

@inproceedings{liuCLEVRRefDiagnosingVisual2019,
 author = {Runtao Liu and
Chenxi Liu and
Yutong Bai and
Alan L. Yuille},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/LiuLBY19.bib},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 doi = {10.1109/CVPR.2019.00431},
 pages = {4185--4194},
 publisher = {Computer Vision Foundation / {IEEE}},
 timestamp = {Mon, 20 Jan 2020 00:00:00 +0100},
 title = {CLEVR-Ref+: Diagnosing Visual Reasoning With Referring Expressions},
 url = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Liu\_CLEVR-Ref\_Diagnosing\_Visual\_Reasoning\_With\_Referring\_Expressions\_CVPR\_2019\_paper.html},
 year = {2019}
}

@misc{liuVisualSpatialReasoning2022,
 author = {Liu, Fangyu and Emerson, Guy and Collier, Nigel},
 journal = {ArXiv preprint},
 title = {Visual {{Spatial Reasoning}}},
 url = {https://arxiv.org/abs/2205.00363},
 volume = {abs/2205.00363},
 year = {2022}
}

@misc{malkinskiDeepLearningMethods2022,
 author = {Małkiński, Mikołaj and Mańdziuk, Jacek},
 journal = {ArXiv preprint},
 title = {Deep {{Learning Methods}} for {{Abstract Visual Reasoning}}: {{A Survey}} on {{Raven}}'s {{Progressive Matrices}}},
 url = {https://arxiv.org/abs/2201.12382},
 volume = {abs/2201.12382},
 year = {2022}
}

@misc{malkinskiReviewEmergingResearch2022,
 author = {Małkiński, Mikołaj and Mańdziuk, Jacek},
 journal = {ArXiv preprint},
 title = {A {{Review}} of {{Emerging Research Directions}} in {{Abstract Visual Reasoning}}},
 url = {https://arxiv.org/abs/2202.10284},
 volume = {abs/2202.10284},
 year = {2022}
}

@inproceedings{maoGrammarBasedGroundedLexicon2022,
 author = {Mao, Jiayuan and Shi, Freda H. and Wu, Jiajun and Levy, Roger P. and Tenenbaum, Joshua B.},
 booktitle = {Advances in {{Neural Information Processing Systems}}},
 title = {Grammar-{{Based Grounded Lexicon Learning}}},
 url = {https://openreview.net/forum?id=iI6nkEZkOl},
 urldate = {2022-10-24},
 year = {2022}
}

@inproceedings{maoNeuroSymbolicConceptLearner2019,
 author = {Jiayuan Mao and
Chuang Gan and
Pushmeet Kohli and
Joshua B. Tenenbaum and
Jiajun Wu},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/MaoGKTW19.bib},
 booktitle = {7th International Conference on Learning Representations, {ICLR} 2019,
New Orleans, LA, USA, May 6-9, 2019},
 publisher = {OpenReview.net},
 timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
 title = {The Neuro-Symbolic Concept Learner: Interpreting Scenes, Words, and
Sentences From Natural Supervision},
 url = {https://openreview.net/forum?id=rJgMlhRctm},
 year = {2019}
}

@inproceedings{maRelViTConceptguidedVision2022a,
 author = {Ma, Xiaojian and Nie, Weili and Yu, Zhiding and Jiang, Huaizu and Xiao, Chaowei and Zhu, Yuke and Zhu, Song-Chun and Anandkumar, Anima},
 booktitle = {International {{Conference}} on {{Learning Representations}}},
 title = {{{RelViT}}: {{Concept-guided Vision Transformer}} for {{Visual Relational Reasoning}}},
 url = {https://openreview.net/forum?id=afoV8W3-IYp},
 urldate = {2022-10-25},
 year = {2022}
}

@inproceedings{minhleDynamicLanguageBinding2020,
 author = {Thao Minh Le and
Vuong Le and
Svetha Venkatesh and
Truyen Tran},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/ijcai/LeLV020.bib},
 booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on
Artificial Intelligence, {IJCAI} 2020},
 doi = {10.24963/ijcai.2020/114},
 editor = {Christian Bessiere},
 pages = {818--824},
 publisher = {ijcai.org},
 timestamp = {Tue, 29 Dec 2020 00:00:00 +0100},
 title = {Dynamic Language Binding in Relational Visual Reasoning},
 url = {https://doi.org/10.24963/ijcai.2020/114},
 year = {2020}
}

@inproceedings{nieBongardLOGONewBenchmark2020a,
 author = {Weili Nie and
Zhiding Yu and
Lei Mao and
Ankit B. Patel and
Yuke Zhu and
Anima Anandkumar},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/NieYMPZA20.bib},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
 title = {Bongard-LOGO: {A} New Benchmark for Human-Level Concept Learning and
Reasoning},
 url = {https://proceedings.neurips.cc/paper/2020/hash/bf15e9bbff22c7719020f9df4badc20a-Abstract.html},
 year = {2020}
}

@inproceedings{parkRobustChangeCaptioning2019b,
 author = {Dong Huk Park and
Trevor Darrell and
Anna Rohrbach},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iccv/ParkDR19.bib},
 booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
2019, Seoul, Korea (South), October 27 - November 2, 2019},
 doi = {10.1109/ICCV.2019.00472},
 pages = {4623--4632},
 publisher = {{IEEE}},
 timestamp = {Thu, 05 Mar 2020 00:00:00 +0100},
 title = {Robust Change Captioning},
 url = {https://doi.org/10.1109/ICCV.2019.00472},
 year = {2019}
}

@inproceedings{parkVisualCOMETReasoningDynamic2020c,
 author = {Park, Jae Sung and Bhagavatula, Chandra and Mottaghi, Roozbeh and Farhadi, Ali and Choi, Yejin},
 booktitle = {Comput. {{Vis}}. – {{ECCV}} 2020 16th {{Eur}}. {{Conf}}. {{Glasg}}. {{UK August}} 23–28 2020 {{Proc}}. {{Part V}}},
 pages = {508--524},
 title = {{{VisualCOMET}}: {{Reasoning About}} the {{Dynamic Context}} of a {{Still Image}}},
 url = {https://doi.org/10.1007/978-3-030-58558-7_30},
 urldate = {2022-10-18},
 year = {2020}
}

@inproceedings{perezFiLMVisualReasoning2017,
 author = {Ethan Perez and
Florian Strub and
Harm de Vries and
Vincent Dumoulin and
Aaron C. Courville},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/aaai/PerezSVDC18.bib},
 booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
(AAAI-18), the 30th innovative Applications of Artificial Intelligence
(IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
2-7, 2018},
 editor = {Sheila A. McIlraith and
Kilian Q. Weinberger},
 pages = {3942--3951},
 publisher = {{AAAI} Press},
 timestamp = {Mon, 22 Oct 2018 01:00:00 +0200},
 title = {FiLM: Visual Reasoning with a General Conditioning Layer},
 url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/16528},
 year = {2018}
}

@article{riochetIntPhys2019Benchmark2022,
 author = {Riochet, Ronan and Castro, Mario Ynocente and Bernard, Mathieu and Lerer, Adam and Fergus, Rob and Izard, Véronique and Dupoux, Emmanuel},
 issn = {1939-3539},
 journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
 keywords = {Benchmark testing,Motion pictures,Physics,Predictive models,Shape,Task analysis,Visualization},
 number = {9},
 pages = {5016--5025},
 title = {{{IntPhys}} 2019: {{A Benchmark}} for {{Visual Intuitive Physics Understanding}}},
 volume = {44},
 year = {2022}
}

@misc{sampatReasoningActionsVisual2022a,
 author = {Sampat, Shailaja Keyur and Patel, Maitreya and Das, Subhasish and Yang, Yezhou and Baral, Chitta},
 journal = {ArXiv preprint},
 title = {Reasoning about {{Actions}} over {{Visual}} and {{Linguistic Modalities}}: {{A Survey}}},
 url = {https://arxiv.org/abs/2207.07568},
 volume = {abs/2207.07568},
 year = {2022}
}

@inproceedings{santorosimpleneuralnetwork2017,
 author = {Adam Santoro and
David Raposo and
David G. T. Barrett and
Mateusz Malinowski and
Razvan Pascanu and
Peter W. Battaglia and
Tim Lillicrap},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/SantoroRBMPBL17.bib},
 booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems 2017, December 4-9, 2017,
Long Beach, CA, {USA}},
 editor = {Isabelle Guyon and
Ulrike von Luxburg and
Samy Bengio and
Hanna M. Wallach and
Rob Fergus and
S. V. N. Vishwanathan and
Roman Garnett},
 pages = {4967--4976},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {A simple neural network module for relational reasoning},
 url = {https://proceedings.neurips.cc/paper/2017/hash/e6acf4b0f69f6f6e60e9a815938aa1ff-Abstract.html},
 year = {2017}
}

@inproceedings{shiExplainableExplicitVisual2019,
 author = {Jiaxin Shi and
Hanwang Zhang and
Juanzi Li},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/ShiZL19.bib},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 doi = {10.1109/CVPR.2019.00857},
 pages = {8376--8384},
 publisher = {Computer Vision Foundation / {IEEE}},
 timestamp = {Fri, 25 Dec 2020 00:00:00 +0100},
 title = {Explainable and Explicit Visual Reasoning Over Scene Graphs},
 url = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Shi\_Explainable\_and\_Explicit\_Visual\_Reasoning\_Over\_Scene\_Graphs\_CVPR\_2019\_paper.html},
 year = {2019}
}

@inproceedings{suhrCorpusNaturalLanguage2017,
 address = {Vancouver, Canada},
 author = {Suhr, Alane  and
Lewis, Mike  and
Yeh, James  and
Artzi, Yoav},
 booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
 doi = {10.18653/v1/P17-2034},
 pages = {217--223},
 publisher = {Association for Computational Linguistics},
 title = {A Corpus of Natural Language for Visual Reasoning},
 url = {https://aclanthology.org/P17-2034},
 year = {2017}
}

@inproceedings{suhrCorpusReasoningNatural2019a,
 address = {Florence, Italy},
 author = {Suhr, Alane  and
Zhou, Stephanie  and
Zhang, Ally  and
Zhang, Iris  and
Bai, Huajun  and
Artzi, Yoav},
 booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
 doi = {10.18653/v1/P19-1644},
 pages = {6418--6428},
 publisher = {Association for Computational Linguistics},
 title = {A Corpus for Reasoning about Natural Language Grounded in Photographs},
 url = {https://aclanthology.org/P19-1644},
 year = {2019}
}

@inproceedings{teneyVPROMBenchmarkVisual2020,
 author = {Damien Teney and
Peng Wang and
Jiewei Cao and
Lingqiao Liu and
Chunhua Shen and
Anton van den Hengel},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/aaai/TeneyWCLSH20.bib},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {12071--12078},
 publisher = {{AAAI} Press},
 timestamp = {Tue, 02 Feb 2021 00:00:00 +0100},
 title = {{V-PROM:} {A} Benchmark for Visual Reasoning Using Visual Progressive
Matrices},
 url = {https://aaai.org/ojs/index.php/AAAI/article/view/6885},
 year = {2020}
}

@inproceedings{thrushWinogroundProbingVision2022a,
 author = {Thrush, Tristan and Jiang, Ryan and Bartolo, Max and Singh, Amanpreet and Williams, Adina and Kiela, Douwe and Ross, Candace},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {5228--5238},
 title = {Winoground: {{Probing Vision}} and {{Language Models}} for {{Visio-Linguistic Compositionality}}},
 url = {https://ieeexplore.ieee.org/document/9878945/},
 urldate = {2022-10-25},
 year = {2022}
}

@inproceedings{wangInterpretableVisualReasoning2021,
 author = {Wang, Zhonghao and Wang, Kai and Yu, Mo and Xiong, Jinjun and Hwu, Wen-Mei and {Hasegawa-Johnson}, Mark and Shi, Humphrey},
 booktitle = {2021 {{IEEECVF Int}}. {{Conf}}. {{Comput}}. {{Vis}}. {{ICCV}}},
 pages = {1858--1867},
 title = {Interpretable {{Visual Reasoning}} via {{Induced Symbolic Space}}},
 url = {https://ieeexplore.ieee.org/document/9710153/},
 urldate = {2022-10-25},
 year = {2021}
}

@inproceedings{wuChainReasoningVisual2018,
 author = {Chenfei Wu and
Jinlai Liu and
Xiaojie Wang and
Xuan Dong},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/WuLWD18.bib},
 booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference
on Neural Information Processing Systems 2018, NeurIPS 2018, December
3-8, 2018, Montr{\'{e}}al, Canada},
 editor = {Samy Bengio and
Hanna M. Wallach and
Hugo Larochelle and
Kristen Grauman and
Nicol{\`{o}} Cesa{-}Bianchi and
Roman Garnett},
 pages = {273--283},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {Chain of Reasoning for Visual Question Answering},
 url = {https://proceedings.neurips.cc/paper/2018/hash/31fefc0e570cb3860f2a6d4b38c6490d-Abstract.html},
 year = {2018}
}

@inproceedings{xiangSelfsupervisedSpatialReasoning2022,
 author = {Xiang, Siyuan and Yang, Anbang and Xue, Yanfei and Yang, Yaoqing and Feng, Chen},
 booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
 pages = {12735--12744},
 title = {Self-Supervised {{Spatial Reasoning}} on {{Multi-View Line Drawings}}},
 url = {https://ieeexplore.ieee.org/document/9879170/},
 urldate = {2022-11-01},
 year = {2022}
}

@misc{xieVisualEntailmentNovel2019,
 author = {Xie, Ning and Lai, Farley and Doran, Derek and Kadav, Asim},
 journal = {ArXiv preprint},
 title = {Visual {{Entailment}}: {{A Novel Task}} for {{Fine-Grained Image Understanding}}},
 url = {https://arxiv.org/abs/1901.06706},
 volume = {abs/1901.06706},
 year = {2019}
}

@inproceedings{xuWhatCanNeural2022,
 author = {Keyulu Xu and
Jingling Li and
Mozhi Zhang and
Simon S. Du and
Ken{-}ichi Kawarabayashi and
Stefanie Jegelka},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/XuLZDKJ20.bib},
 booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
 publisher = {OpenReview.net},
 timestamp = {Thu, 07 May 2020 01:00:00 +0200},
 title = {What Can Neural Networks Reason About?},
 url = {https://openreview.net/forum?id=rJxbJeHFPS},
 year = {2020}
}

@inproceedings{yiCLEVRERCollisionEvents2020a,
 author = {Kexin Yi and
Chuang Gan and
Yunzhu Li and
Pushmeet Kohli and
Jiajun Wu and
Antonio Torralba and
Joshua B. Tenenbaum},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iclr/YiGLK0TT20.bib},
 booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
 publisher = {OpenReview.net},
 timestamp = {Thu, 07 May 2020 01:00:00 +0200},
 title = {{CLEVRER:} Collision Events for Video Representation and Reasoning},
 url = {https://openreview.net/forum?id=HkxYzANYDB},
 year = {2020}
}

@inproceedings{yiNeuralSymbolicVQADisentangling2019,
 author = {Kexin Yi and
Jiajun Wu and
Chuang Gan and
Antonio Torralba and
Pushmeet Kohli and
Josh Tenenbaum},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/Yi0G0KT18.bib},
 booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference
on Neural Information Processing Systems 2018, NeurIPS 2018, December
3-8, 2018, Montr{\'{e}}al, Canada},
 editor = {Samy Bengio and
Hanna M. Wallach and
Hugo Larochelle and
Kristen Grauman and
Nicol{\`{o}} Cesa{-}Bianchi and
Roman Garnett},
 pages = {1039--1050},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {Neural-Symbolic {VQA:} Disentangling Reasoning from Vision and Language
Understanding},
 url = {https://proceedings.neurips.cc/paper/2018/hash/5e388103a391daabe3de1d76a6739ccd-Abstract.html},
 year = {2018}
}

@inproceedings{zellersRecognitionCognitionVisual2019,
 author = {Rowan Zellers and
Yonatan Bisk and
Ali Farhadi and
Yejin Choi},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/ZellersBFC19.bib},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 doi = {10.1109/CVPR.2019.00688},
 pages = {6720--6731},
 publisher = {Computer Vision Foundation / {IEEE}},
 timestamp = {Mon, 20 Jan 2020 00:00:00 +0100},
 title = {From Recognition to Cognition: Visual Commonsense Reasoning},
 url = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Zellers\_From\_Recognition\_to\_Cognition\_Visual\_Commonsense\_Reasoning\_CVPR\_2019\_paper.html},
 year = {2019}
}

@inproceedings{zhangLearningAlgebraicRepresentation2022,
 author = {Zhang, Chi and Xie, Sirui and Jia, Baoxiong and Wu, Ying Nian and Zhu, Song-Chun and Zhu, Yixin},
 booktitle = {Comput. {{Vis}}. – {{ECCV}} 2022},
 editor = {Avidan, Shai and Brostow, Gabriel and Cissé, Moustapha and Farinella, Giovanni Maria and Hassner, Tal},
 pages = {692--709},
 series = {Lecture {{Notes}} in {{Computer Science}}},
 title = {Learning {{Algebraic Representation}} for {{Systematic Generalization}} in {{Abstract Reasoning}}},
 url = {https://link.springer.com/content/pdf/10.1007/978-3-031-19842-7_40.pdf},
 year = {2022}
}

@inproceedings{zhangLearningPerceptualInference2019,
 author = {Chi Zhang and
Baoxiong Jia and
Feng Gao and
Yixin Zhu and
Hongjing Lu and
Song{-}Chun Zhu},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/ZhangJGZLZ19.bib},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {1073--1085},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {Learning Perceptual Inference by Contrasting},
 url = {https://proceedings.neurips.cc/paper/2019/hash/6766aa2750c19aad2fa1b32f36ed4aee-Abstract.html},
 year = {2019}
}

@inproceedings{zhangRAVENDatasetRelational2019,
 author = {Chi Zhang and
Feng Gao and
Baoxiong Jia and
Yixin Zhu and
Song{-}Chun Zhu},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/cvpr/ZhangGJZZ19.bib},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 doi = {10.1109/CVPR.2019.00546},
 pages = {5317--5327},
 publisher = {Computer Vision Foundation / {IEEE}},
 timestamp = {Tue, 29 Dec 2020 00:00:00 +0100},
 title = {{RAVEN:} {A} Dataset for Relational and Analogical Visual REasoNing},
 url = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Zhang\_RAVEN\_A\_Dataset\_for\_Relational\_and\_Analogical\_Visual\_REasoNing\_CVPR\_2019\_paper.html},
 year = {2019}
}

@article{zhaoVideoABCRealWorldVideo2022a,
 author = {Zhao, Wenliang and Rao, Yongming and Tang, Yansong and Zhou, Jie and Lu, Jiwen},
 issn = {1941-0042},
 journal = {IEEE Trans. Image Process.},
 keywords = {abudctive reasoning,Benchmark testing,Cognition,Convolutional neural networks,instruction video,Machine vision,Question answering (information retrieval),Task analysis,video understanding,Visual reasoning,Visualization},
 pages = {6048--6061},
 title = {{{VideoABC}}: {{A Real-World Video Dataset}} for {{Abductive Visual Reasoning}}},
 url = {https://ieeexplore.ieee.org/abstract/document/9893026},
 volume = {31},
 year = {2022}
}