@inproceedings{bb236100,
AUTHOR = "Chaudhry, R. and Shekhar, S. and Gupta, U. and Maneriker, P. and Bansal, P. and Joshi, A.",
TITLE = "LEAF-QA: Locate, Encode Attend for Figure Question Answering",
BOOKTITLE = WACV20,
YEAR = "2020",
PAGES = "3501-3510",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231065"}
@inproceedings{bb236101,
AUTHOR = "Liang, Y.Z. and Bai, Y.L. and Zhang, W. and Qian, X.M. and Zhu, L. and Mei, T.",
TITLE = "VrR-VG: Refocusing Visually-Relevant Relationships",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "10402-10411",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231066"}
@inproceedings{bb236102,
AUTHOR = "Bhattacharya, N. and Li, Q. and Gurari, D.",
TITLE = "Why Does a Visual Question Have Different Answers?",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4270-4279",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231067"}
@inproceedings{bb236103,
AUTHOR = "Li, L.J. and Gan, Z. and Cheng, Y. and Liu, J.J.",
TITLE = "Relation-Aware Graph Attention Network for Visual Question Answering",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "10312-10321",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231068"}
@inproceedings{bb236104,
AUTHOR = "Do, T. and Tran, H. and Do, T. and Tjiputra, E. and Tran, Q.",
TITLE = "Compact Trilinear Interaction for Visual Question Answering",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "392-401",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231069"}
@inproceedings{bb236105,
AUTHOR = "Schwartz, I. and Yu, S. and Hazan, T. and Schwing, A.G.",
TITLE = "Factor Graph Attention",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "2039-2048",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231070"}
@inproceedings{bb236106,
AUTHOR = "Kolesnikov, A. and Beyer, L. and Zhai, X.H. and Puigcerver, J. and Yung, J. and Gelly, S. and Houlsby, N.",
TITLE = "Big Transfer (BIT): General Visual Representation Learning",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "V:491-507",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231071"}
@inproceedings{bb236107,
AUTHOR = "Kolesnikov, A. and Zhai, X.H. and Beyer, L.",
TITLE = "Revisiting Self-Supervised Visual Representation Learning",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "1920-1929",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231072"}
@inproceedings{bb236108,
AUTHOR = "Xiong, P.X. and Zhan, H.Y. and Wang, X. and Sinha, B. and Wu, Y.",
TITLE = "Visual Query Answering by Entity-Attribute Graph Matching and Reasoning",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "8349-8358",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231073"}
@inproceedings{bb236109,
AUTHOR = "Singh, A. and Natarajan, V. and Shah, M. and Jiang, Y. and Chen, X.L. and Batra, D. and Parikh, D. and Rohrbach, M.",
TITLE = "Towards VQA Models That Can Read",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "8309-8318",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231074"}
@inproceedings{bb236110,
AUTHOR = "Manjunatha, V. and Saini, N. and Davis, L.S.",
TITLE = "Explicit Bias Discovery in Visual Question Answering Models",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "9554-9563",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231075"}
@inproceedings{bb236111,
AUTHOR = "Shrestha, R. and Kafle, K. and Kanan, C.",
TITLE = "Answer Them All! Toward Universal Visual Question Answering Models",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "10464-10473",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231076"}
@inproceedings{bb236112,
AUTHOR = "Noh, H. and Kim, T. and Mun, J. and Han, B.H.",
TITLE = "Transfer Learning via Unsupervised Task Discovery for Visual Question
Answering",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "8377-8386",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231077"}
@inproceedings{bb236113,
AUTHOR = "Wijmans, E. and Datta, S. and Maksymets, O. and Das, A. and Gkioxari, G. and Lee, S. and Essa, I. and Parikh, D. and Batra, D.",
TITLE = "Embodied Question Answering in Photorealistic Environments With Point
Cloud Perception",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6652-6661",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231078"}
@inproceedings{bb236114,
AUTHOR = "Shah, M. and Chen, X.L. and Rohrbach, M. and Parikh, D.",
TITLE = "Cycle-Consistency for Robust Visual Question Answering",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6642-6651",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231079"}
@inproceedings{bb236115,
AUTHOR = "Li, H. and Wang, P. and Shen, C.H. and van den Hengel, A.",
TITLE = "Visual Question Answering as Reading Comprehension",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6312-6321",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231080"}
@inproceedings{bb236116,
AUTHOR = "Yu, L.C. and Chen, X.L. and Gkioxari, G. and Bansal, M. and Berg, T.L. and Batra, D.",
TITLE = "Multi-Target Embodied Question Answering",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6302-6311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231081"}
@inproceedings{bb236117,
AUTHOR = "Yu, Z. and Yu, J. and Cui, Y.H. and Tao, D.C. and Tian, Q.",
TITLE = "Deep Modular Co-Attention Networks for Visual Question Answering",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6274-6283",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231082"}
@inproceedings{bb236118,
AUTHOR = "Abbasnejad, E. and Wu, Q. and Shi, Q.F. and van den Hengel, A.",
TITLE = "What's to Know? Uncertainty as a Guide to Asking Goal-Oriented
Questions",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "4150-4159",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231083"}
@inproceedings{bb236119,
AUTHOR = "Schwenk, D. and Khandelwal, A. and Clark, C. and Marino, K. and Mottaghi, R.",
TITLE = "A-OKVQA: A Benchmark for Visual Question Answering Using World
Knowledge",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "VIII:146-162",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231084"}
@inproceedings{bb236120,
AUTHOR = "Marino, K. and Rastegari, M. and Farhadi, A. and Mottaghi, R.",
TITLE = "OK-VQA: A Visual Question Answering Benchmark Requiring External
Knowledge",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "3190-3199",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231085"}
@inproceedings{bb236121,
AUTHOR = "Krishna, R. and Bernstein, M. and Fei Fei, L.",
TITLE = "Information Maximizing Visual Question Generation",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "2008-2018",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231086"}
@inproceedings{bb236122,
AUTHOR = "Haurilet, M. and Roitberg, A. and Stiefelhagen, R.",
TITLE = "It's Not About the Journey; It's About the Destination: Following Soft
Paths Under Question-Guidance for Visual Reasoning",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "1930-1939",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231087"}
@inproceedings{bb236123,
AUTHOR = "Qiu, Y. and Satoh, Y. and Suzuki, R. and Kataoka, H.",
TITLE = "Incorporating 3D Information Into Visual Question Answering",
BOOKTITLE = "3DV19",
YEAR = "2019",
PAGES = "756-765",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231088"}
@inproceedings{bb236124,
AUTHOR = "Liu, F. and Liu, J. and Fang, Z. and Lu, H.",
TITLE = "Language and Visual Relations Encoding for Visual Question Answering",
BOOKTITLE = ICIP19,
YEAR = "2019",
PAGES = "3307-3311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231089"}
@inproceedings{bb236125,
AUTHOR = "Fang, Z.W. and Liu, J. and Tang, Q. and Li, Y. and Lu, H.Q.",
TITLE = "Answer Distillation for Visual Question Answering",
BOOKTITLE = ACCV18,
YEAR = "2018",
PAGES = "I:72-87",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231090"}
@inproceedings{bb236126,
AUTHOR = "Kuhnle, A. and Xie, H.Y. and Copestake, A.",
TITLE = "How Clever Is the FiLM Model, and How Clever Can it Be?",
BOOKTITLE = VL18,
YEAR = "2018",
PAGES = "IV:162-172",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231091"}
@inproceedings{bb236127,
AUTHOR = "Li, W. and Yuan, Z.H. and Fang, X.Z. and Wang, C.",
TITLE = "Knowing Where to Look? Analysis on Attention of Visual Question
Answering System",
BOOKTITLE = VL18,
YEAR = "2018",
PAGES = "IV:145-152",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231092"}
@inproceedings{bb236128,
AUTHOR = "Wagner, M. and Basevi, H. and Shetty, R. and Li, W.B. and Malinowski, M. and Fritz, M. and Leonardis, A.",
TITLE = "Answering Visual What-If Questions: From Actions to Predicted Scene
Descriptions",
BOOKTITLE = VLEASE18,
YEAR = "2018",
PAGES = "I:521-537",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231093"}
@inproceedings{bb236129,
AUTHOR = "Duke, B. and Taylor, G.W.",
TITLE = "Generalized Hadamard-Product Fusion Operators for Visual Question
Answering",
BOOKTITLE = CRV18,
YEAR = "2018",
PAGES = "39-46",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231094"}
@inproceedings{bb236130,
AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.",
TITLE = "Embodied Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "1-10",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231095"}
@inproceedings{bb236131,
AUTHOR = "Misra, I. and Girshick, R. and Fergus, R. and Hebert, M. and Gupta, A. and van der Maaten, L.",
TITLE = "Learning by Asking Questions",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "11-20",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231096"}
@inproceedings{bb236132,
AUTHOR = "Gurari, D. and Li, Q. and Stangl, A.J. and Guo, A. and Lin, C. and Grauman, K. and Luo, J. and Bigham, J.P.",
TITLE = "VizWiz Grand Challenge: Answering Visual Questions from Blind People",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "3608-3617",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231097"}
@inproceedings{bb236133,
AUTHOR = "Li, J. and Su, H. and Zhu, J. and Wang, S. and Zhang, B.",
TITLE = "Textbook Question Answering Under Instructor Guidance with Memory
Networks",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "3655-3663",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231098"}
@inproceedings{bb236134,
AUTHOR = "Gordon, D. and Kembhavi, A. and Rastegari, M. and Redmon, J. and Fox, D. and Farhadi, A.",
TITLE = "IQA: Visual Question Answering in Interactive Environments",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4089-4098",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231099"}
@inproceedings{bb236135,
AUTHOR = "Agrawal, A. and Batra, D. and Parikh, D. and Kembhavi, A.",
TITLE = "Don't Just Assume; Look and Answer: Overcoming Priors for Visual
Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4971-4980",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231100"}
@inproceedings{bb236136,
AUTHOR = "Sha, F. and Chao, W. and Hu, H.",
TITLE = "Learning Answer Embeddings for Visual Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "5428-5436",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231101"}
@inproceedings{bb236137,
AUTHOR = "Kafle, K. and Price, B. and Cohen, S. and Kanan, C.",
TITLE = "DVQA: Understanding Data Visualizations via Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "5648-5656",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231102"}
@inproceedings{bb236138,
AUTHOR = "Sha, F. and Hu, H. and Chao, W.",
TITLE = "Cross-Dataset Adaptation for Visual Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "5716-5725",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231103"}
@inproceedings{bb236139,
AUTHOR = "Anderson, P. and He, X. and Buehler, C. and Teney, D. and Johnson, M. and Gould, S. and Zhang, L.",
TITLE = "Bottom-Up and Top-Down Attention for Image Captioning and Visual
Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "6077-6086",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231104"}
@inproceedings{bb236140,
AUTHOR = "Nguyen, D. and Okatani, T.",
TITLE = "Improved Fusion of Visual and Language Representations by Dense
Symmetric Co-attention for Visual Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "6087-6096",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231105"}
@inproceedings{bb236141,
AUTHOR = "Patro, B. and Namboodiri, V.P.",
TITLE = "Differential Attention for Visual Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7680-7688",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231106"}
@inproceedings{bb236142,
AUTHOR = "Su, Z. and Zhu, C. and Dong, Y.P. and Cai, D.Q. and Chen, Y.R. and Li, J.G.",
TITLE = "Learning Visual Knowledge Memory Networks for Visual Question
Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7736-7745",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231107"}
@inproceedings{bb236143,
AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.",
TITLE = "Embodied Question Answering",
BOOKTITLE = DeepLearnRV18,
YEAR = "2018",
PAGES = "2135-213509",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231108"}
@inproceedings{bb236144,
AUTHOR = "Cheng, W. and Huang, Y. and Wang, L.",
TITLE = "Towards Unconstrained Pointing Problem of Visual Question Answering:
A Retrieval-based Method",
BOOKTITLE = ICPR18,
YEAR = "2018",
PAGES = "3303-3308",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231109"}
@inproceedings{bb236145,
AUTHOR = "Zhou, B. and Sun, Y.Y. and Bau, D. and Torralba, A.B.",
TITLE = "Interpretable Basis Decomposition for Visual Explanation",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "VIII: 122-138",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231110"}
@inproceedings{bb236146,
AUTHOR = "Shi, Y. and Furlanello, T. and Zha, S. and Anandkumar, A.",
TITLE = "Question Type Guided Attention in Visual Question Answering",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "II: 158-175",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231111"}
@inproceedings{bb236147,
AUTHOR = "Narasimhan, M. and Schwing, A.G.",
TITLE = "Straight to the Facts: Learning Knowledge Base Retrieval for Factual
Visual Question Answering",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "VIII: 460-477",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231112"}
@inproceedings{bb236148,
AUTHOR = "Malinowski, M. and Doersch, C. and Santoro, A. and Battaglia, P.",
TITLE = "Learning Visual Question Answering by Bootstrapping Hard Attention",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "VI: 3-20",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231113"}
@inproceedings{bb236149,
AUTHOR = "Li, Q. and Tao, Q.Y. and Joty, S. and Cai, J.F. and Luo, J.B.",
TITLE = "VQA-E: Explaining, Elaborating, and Enhancing Your Answers for Visual
Questions",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "VII: 570-586",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231114"}
@inproceedings{bb236150,
AUTHOR = "Yu, D. and Gao, X. and Xiong, H.",
TITLE = "Structured Semantic Representation for Visual Question Answering",
BOOKTITLE = ICIP18,
YEAR = "2018",
PAGES = "2286-2290",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231115"}
@inproceedings{bb236151,
AUTHOR = "Huang, L. and Kulkarni, K. and Jha, A. and Lohit, S. and Jayasuriya, S. and Turaga, P.K.",
TITLE = "CS-VQA: Visual Question Answering with Compressively Sensed Images",
BOOKTITLE = ICIP18,
YEAR = "2018",
PAGES = "1283-1287",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231116"}
@inproceedings{bb236152,
AUTHOR = "Desta, M.T. and Chen, L. and Kornuta, T.",
TITLE = "Object-Based Reasoning in VQA",
BOOKTITLE = WACV18,
YEAR = "2018",
PAGES = "1814-1823",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231117"}
@inproceedings{bb236153,
AUTHOR = "Zhao, H. and Fan, Q. and Gutfreund, D. and Fu, Y.",
TITLE = "Semantically Guided Visual Question Answering",
BOOKTITLE = WACV18,
YEAR = "2018",
PAGES = "1852-1860",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231118"}
@inproceedings{bb236154,
AUTHOR = "Wang, Z. and Liu, X. and Wang, L. and Qiao, Y. and Xie, X. and Fowlkes, C.C.",
TITLE = "Structured Triplet Learning with POS-Tag Guided Attention for Visual
Question Answering",
BOOKTITLE = WACV18,
YEAR = "2018",
PAGES = "1888-1896",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231119"}
@inproceedings{bb236155,
AUTHOR = "Chowdhury, I. and Nguyen, K. and Fookes, C. and Sridharan, S.",
TITLE = "A cascaded long short-term memory (LSTM) driven generic visual
question answering (VQA)",
BOOKTITLE = ICIP17,
YEAR = "2017",
PAGES = "1842-1846",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231120"}
@inproceedings{bb236156,
AUTHOR = "Jain, U. and Zhang, Z.Y. and Schwing, A.",
TITLE = "Creativity: Generating Diverse Questions Using Variational
Autoencoders",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "5415-5424",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231121"}
@inproceedings{bb236157,
AUTHOR = "Zhu, Y. and Lim, J.J. and Fei Fei, L.",
TITLE = "Knowledge Acquisition for Visual Question Answering via Iterative
Querying",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "6146-6155",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231122"}
@inproceedings{bb236158,
AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.",
TITLE = "Simple and effective visual question answering in a single modality",
BOOKTITLE = ICIP16,
YEAR = "2016",
PAGES = "2276-2280",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231123"}
@inproceedings{bb236159,
AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.",
TITLE = "What's in a Question:
Using Visual Questions as a Form of Supervision",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "6422-6431",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231124"}
@inproceedings{bb236160,
AUTHOR = "Xu, H.J. and Saenko, K.",
TITLE = "Ask, Attend and Answer:
Exploring Question-Guided Spatial Attention for Visual Question Answering",
BOOKTITLE = ECCV16,
YEAR = "2016",
PAGES = "VII: 451-466",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231125"}
@inproceedings{bb236161,
AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.",
TITLE = "Revisiting Visual Question Answering Baselines",
BOOKTITLE = ECCV16,
YEAR = "2016",
PAGES = "VIII: 727-739",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231126"}
@inproceedings{bb236162,
AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
TITLE = "Stacked Attention Networks for Image Question Answering",
BOOKTITLE = CVPR16,
YEAR = "2016",
PAGES = "21-29",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231127"}
@inproceedings{bb236163,
AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
BOOKTITLE = CVPR15,
YEAR = "2015",
PAGES = "1456-1464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231128"}
@inproceedings{bb236164,
AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
BOOKTITLE = ICIP12,
YEAR = "2012",
PAGES = "1917-1920",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231129"}
@inproceedings{bb236165,
AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
BOOKTITLE = "3DTV09",
YEAR = "2009",
PAGES = "1-4",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT231130"}
@article{bb236166,
AUTHOR = "Osman, A. and Samek, W.",
TITLE = "DRAU: Dual Recurrent Attention Units for Visual Question Answering",
JOURNAL = CVIU,
VOLUME = "185",
YEAR = "2019",
PAGES = "24-30",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231131"}
@article{bb236167,
AUTHOR = "Li, W. and Sun, J.H. and Liu, G. and Zhao, L.L. and Fang, X.Z.",
TITLE = "Visual question answering with attention transfer and a cross-modal
gating mechanism",
JOURNAL = PRL,
VOLUME = "133",
YEAR = "2020",
PAGES = "334-340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231132"}
@article{bb236168,
AUTHOR = "Yu, J. and Zhu, Z.H. and Wang, Y.J. and Zhang, W.F. and Hu, Y. and Tan, J.L.",
TITLE = "Cross-modal knowledge reasoning for knowledge-based visual question
answering",
JOURNAL = PR,
VOLUME = "108",
YEAR = "2020",
PAGES = "107563",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231133"}
@inproceedings{bb236169,
AUTHOR = "Yang, Z.Q. and Qin, Z.C. and Yu, J. and Wan, T.",
TITLE = "Prior Visual Relationship Reasoning For Visual Question Answering",
BOOKTITLE = ICIP20,
YEAR = "2020",
PAGES = "1411-1415",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231134"}
@article{bb236170,
AUTHOR = "Yu, J. and Zhang, W.F. and Lu, Y.H. and Qin, Z.C. and Hu, Y. and Tan, J.L. and Wu, Q.",
TITLE = "Reasoning on the Relation: Enhancing Visual Representation for Visual
Question Answering and Cross-Modal Retrieval",
JOURNAL = MultMed,
VOLUME = "22",
YEAR = "2020",
NUMBER = "12",
MONTH = "December",
PAGES = "3196-3209",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231135"}
@article{bb236171,
AUTHOR = "Wu, Y.R. and Ma, Y.T. and Wan, S.H.",
TITLE = "Multi-scale relation reasoning for multi-modal Visual Question
Answering",
JOURNAL = SP:IC,
VOLUME = "96",
YEAR = "2021",
PAGES = "116319",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231136"}
@inproceedings{bb236172,
AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.R.",
TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual
Question Answering",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "5642-5649",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231137"}
@article{bb236173,
AUTHOR = "Hu, J. and Qian, S.S. and Fang, Q. and Xu, C.S.",
TITLE = "Heterogeneous Community Question Answering via Social-Aware
Multi-Modal Co-Attention Convolutional Matching",
JOURNAL = MultMed,
VOLUME = "23",
YEAR = "2021",
PAGES = "2321-2334",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231138"}
@article{bb236174,
AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.",
TITLE = "Accuracy vs. complexity: A trade-off in visual question answering
models",
JOURNAL = PR,
VOLUME = "120",
YEAR = "2021",
PAGES = "108106",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231139"}
@article{bb236175,
AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.",
TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality
Interactions",
JOURNAL = MultMed,
VOLUME = "23",
YEAR = "2021",
PAGES = "3518-3529",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231140"}
@article{bb236176,
AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.",
TITLE = "A multimodal attention fusion network with a dynamic vocabulary for
TextVQA",
JOURNAL = PR,
VOLUME = "122",
YEAR = "2022",
PAGES = "108214",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231141"}
@article{bb236177,
AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.",
TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "1",
MONTH = "January",
PAGES = "318-329",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231142"}
@article{bb236178,
AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.H.",
TITLE = "Comprehensive-perception dynamic reasoning for visual question
answering",
JOURNAL = PR,
VOLUME = "131",
YEAR = "2022",
PAGES = "108878",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231143"}
@article{bb236179,
AUTHOR = "Xie, J.Y. and Fang, W.H. and Cai, Y. and Huang, Q.B. and Li, Q.",
TITLE = "Knowledge-Based Visual Question Generation",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "11",
MONTH = "November",
PAGES = "7547-7558",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231144"}
@article{bb236180,
AUTHOR = "Gao, C.Y. and Zhu, Q. and Wang, P. and Li, H. and Liu, Y.L. and van den Hengel, A.J. and Wu, Q.",
TITLE = "Structured Multimodal Attentions for TextVQA",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "9603-9614",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231145"}
@article{bb236181,
AUTHOR = "Xu, F.Z. and Lin, Q. and Liu, J. and Zhang, L.L. and Zhao, T.Z. and Chai, Q. and Pan, Y. and Huang, Y. and Wang, Q.Y.",
TITLE = "MoCA: Incorporating domain pretraining and cross attention for
textbook question answering",
JOURNAL = PR,
VOLUME = "140",
YEAR = "2023",
PAGES = "109588",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231146"}
@article{bb236182,
AUTHOR = "Mohamud, S.A.M. and Jalali, A. and Lee, M.H.",
TITLE = "Encoder-decoder cycle for visual question answering based on
perception-action cycle",
JOURNAL = PR,
VOLUME = "144",
YEAR = "2023",
PAGES = "109848",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231147"}
@article{bb236183,
AUTHOR = "Tito, R. and Karatzas, D. and Valveny, E.",
TITLE = "Hierarchical multimodal transformers for Multipage DocVQA",
JOURNAL = PR,
VOLUME = "144",
YEAR = "2023",
PAGES = "109834",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231148"}
@article{bb236184,
AUTHOR = "Biswas, K. and Shivakumara, P. and Pal, U. and Liu, C.L. and Lu, Y.",
TITLE = "VQAPT: A New visual question answering model for personality traits
in social media images",
JOURNAL = PRL,
VOLUME = "175",
YEAR = "2023",
PAGES = "66-73",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231149"}
@article{bb236185,
AUTHOR = "Cho, J.W. and Argaw, D.M. and Oh, Y. and Kim, D.J. and Kweon, I.S.",
TITLE = "Empirical study on using adapters for debiased Visual Question
Answering",
JOURNAL = CVIU,
VOLUME = "237",
YEAR = "2023",
PAGES = "103842",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231150"}
@inproceedings{bb236186,
AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.",
TITLE = "Dealing with Missing Modalities in the Visual Question
Answer-Difference Prediction Task through Knowledge Distillation",
BOOKTITLE = MULA21,
YEAR = "2021",
PAGES = "1592-1601",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231151"}
@inproceedings{bb236187,
AUTHOR = "Cho, J.W. and Kim, D.J. and Ryu, H. and Kweon, I.S.",
TITLE = "Generative Bias for Robust Visual Question Answering",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11681-11690",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231152"}
@article{bb236188,
AUTHOR = "Mashrur, A. and Luo, W. and Zaidi, N.A. and Robles Kelly, A.",
TITLE = "Robust visual question answering via semantic cross modal
augmentation",
JOURNAL = CVIU,
VOLUME = "238",
YEAR = "2024",
PAGES = "103862",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231153"}
@article{bb236189,
AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Sun, Y.X. and Zhang, Z. and Luo, Y.K.",
TITLE = "Multi-modal spatial relational attention networks for visual question
answering",
JOURNAL = IVC,
VOLUME = "140",
YEAR = "2023",
PAGES = "104840",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231154"}
@article{bb236190,
AUTHOR = "Zheng, W.B. and Yan, L. and Wang, F.Y.",
TITLE = "So Many Heads, So Many Wits: Multimodal Graph Reasoning for
Text-Based Visual Question Answering",
JOURNAL = SMCS,
VOLUME = "54",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "854-865",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231155"}
@article{bb236191,
AUTHOR = "Bi, Y.D. and Jiang, H. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
TITLE = "See and Learn More: Dense Caption-Aware Representation for Visual
Question Answering",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "1135-1146",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231156"}
@article{bb236192,
AUTHOR = "Jiang, J.J. and Liu, Z.Y. and Zheng, N.N.",
TITLE = "Correlation Information Bottleneck: Towards Adapting Pretrained
Multimodal Models for Robust Visual Question Answering",
JOURNAL = IJCV,
VOLUME = "132",
YEAR = "2024",
NUMBER = "1",
MONTH = "January",
PAGES = "185-207",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231157"}
@article{bb236193,
AUTHOR = "Zhang, S. and Chen, Y. and Sun, Y. and Wang, F. and Shi, H.B. and Wang, H.R.",
TITLE = "LOIS: Looking Out of Instance Semantics for Visual Question Answering",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "6202-6214",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231158"}
@article{bb236194,
AUTHOR = "Xie, J.Y. and Cai, Y. and Chen, J.L. and Xu, R.H. and Wang, J.X. and Li, Q.",
TITLE = "Knowledge-Augmented Visual Question Answering With Natural Language
Explanation",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "2652-2664",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231159"}
@article{bb236195,
AUTHOR = "Wang, J.J. and Ma, A.L. and Chen, Z.H. and Zheng, Z. and Wan, Y.T. and Zhang, L.P. and Zhong, Y.F.",
TITLE = "EarthVQANet: Multi-task visual question answering for remote sensing
image understanding",
JOURNAL = PandRS,
VOLUME = "212",
YEAR = "2024",
PAGES = "422-439",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231160"}
@article{bb236196,
AUTHOR = "Qian, S. and Liu, B.Q. and Sun, C.J. and Xu, Z. and Ma, L. and Wang, B.",
TITLE = "CroMIC-QA: The Cross-Modal Information Complementation Based Question
Answering",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "8348-8359",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231161"}
@article{bb236197,
AUTHOR = "Uehara, K. and Harada, T.",
TITLE = "Learning by Asking Questions for Knowledge-Based Novel Object
Recognition",
JOURNAL = IJCV,
VOLUME = "132",
YEAR = "2024",
NUMBER = "6",
MONTH = "June",
PAGES = "2290-2309",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231162"}
@inproceedings{bb236198,
AUTHOR = "Uehara, K. and Harada, T.",
TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense
Acquisition",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "4390-4398",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231163"}
@inproceedings{bb236199,
AUTHOR = "Uehara, K. and Duan, N. and Harada, T.",
TITLE = "Learning to Ask Informative Sub-Questions for Visual Question
Answering",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4680-4689",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT231164"}
Last update:Nov 26, 2025 at 20:24:09