@inproceedings{bb233400,
        AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.",
        TITLE = "Embodied Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "1-10",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228376"}

@inproceedings{bb233401,
        AUTHOR = "Misra, I. and Girshick, R. and Fergus, R. and Hebert, M. and Gupta, A. and van der Maaten, L.",
        TITLE = "Learning by Asking Questions",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "11-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228377"}

@inproceedings{bb233402,
        AUTHOR = "Gurari, D. and Li, Q. and Stangl, A.J. and Guo, A. and Lin, C. and Grauman, K. and Luo, J. and Bigham, J.P.",
        TITLE = "VizWiz Grand Challenge: Answering Visual Questions from Blind People",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "3608-3617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228378"}

@inproceedings{bb233403,
        AUTHOR = "Li, J. and Su, H. and Zhu, J. and Wang, S. and Zhang, B.",
        TITLE = "Textbook Question Answering Under Instructor Guidance with Memory
Networks",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "3655-3663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228379"}

@inproceedings{bb233404,
        AUTHOR = "Gordon, D. and Kembhavi, A. and Rastegari, M. and Redmon, J. and Fox, D. and Farhadi, A.",
        TITLE = "IQA: Visual Question Answering in Interactive Environments",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4089-4098",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228380"}

@inproceedings{bb233405,
        AUTHOR = "Agrawal, A. and Batra, D. and Parikh, D. and Kembhavi, A.",
        TITLE = "Don't Just Assume; Look and Answer: Overcoming Priors for Visual
Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4971-4980",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228381"}

@inproceedings{bb233406,
        AUTHOR = "Sha, F. and Chao, W. and Hu, H.",
        TITLE = "Learning Answer Embeddings for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5428-5436",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228382"}

@inproceedings{bb233407,
        AUTHOR = "Kafle, K. and Price, B. and Cohen, S. and Kanan, C.",
        TITLE = "DVQA: Understanding Data Visualizations via Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5648-5656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228383"}

@inproceedings{bb233408,
        AUTHOR = "Sha, F. and Hu, H. and Chao, W.",
        TITLE = "Cross-Dataset Adaptation for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5716-5725",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228384"}

@inproceedings{bb233409,
        AUTHOR = "Anderson, P. and He, X. and Buehler, C. and Teney, D. and Johnson, M. and Gould, S. and Zhang, L.",
        TITLE = "Bottom-Up and Top-Down Attention for Image Captioning and Visual
Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6077-6086",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228385"}

@inproceedings{bb233410,
        AUTHOR = "Nguyen, D. and Okatani, T.",
        TITLE = "Improved Fusion of Visual and Language Representations by Dense
Symmetric Co-attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6087-6096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228386"}

@inproceedings{bb233411,
        AUTHOR = "Patro, B. and Namboodiri, V.P.",
        TITLE = "Differential Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7680-7688",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228387"}

@inproceedings{bb233412,
        AUTHOR = "Su, Z. and Zhu, C. and Dong, Y.P. and Cai, D.Q. and Chen, Y.R. and Li, J.G.",
        TITLE = "Learning Visual Knowledge Memory Networks for Visual Question
Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7736-7745",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228388"}

@inproceedings{bb233413,
        AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.",
        TITLE = "Embodied Question Answering",
        BOOKTITLE = DeepLearnRV18,
        YEAR = "2018",
        PAGES = "2135-213509",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228389"}

@inproceedings{bb233414,
        AUTHOR = "Cheng, W. and Huang, Y. and Wang, L.",
        TITLE = "Towards Unconstrained Pointing Problem of Visual Question Answering:
A Retrieval-based Method",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "3303-3308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228390"}

@inproceedings{bb233415,
        AUTHOR = "Zhou, B. and Sun, Y.Y. and Bau, D. and Torralba, A.B.",
        TITLE = "Interpretable Basis Decomposition for Visual Explanation",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VIII: 122-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228391"}

@inproceedings{bb233416,
        AUTHOR = "Shi, Y. and Furlanello, T. and Zha, S. and Anandkumar, A.",
        TITLE = "Question Type Guided Attention in Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 158-175",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228392"}

@inproceedings{bb233417,
        AUTHOR = "Narasimhan, M. and Schwing, A.G.",
        TITLE = "Straight to the Facts: Learning Knowledge Base Retrieval for Factual
Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VIII: 460-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228393"}

@inproceedings{bb233418,
        AUTHOR = "Malinowski, M. and Doersch, C. and Santoro, A. and Battaglia, P.",
        TITLE = "Learning Visual Question Answering by Bootstrapping Hard Attention",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VI: 3-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228394"}

@inproceedings{bb233419,
        AUTHOR = "Li, Q. and Tao, Q.Y. and Joty, S. and Cai, J.F. and Luo, J.B.",
        TITLE = "VQA-E: Explaining, Elaborating, and Enhancing Your Answers for Visual
Questions",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VII: 570-586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228395"}

@inproceedings{bb233420,
        AUTHOR = "Yu, D. and Gao, X. and Xiong, H.",
        TITLE = "Structured Semantic Representation for Visual Question Answering",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "2286-2290",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228396"}

@inproceedings{bb233421,
        AUTHOR = "Huang, L. and Kulkarni, K. and Jha, A. and Lohit, S. and Jayasuriya, S. and Turaga, P.K.",
        TITLE = "CS-VQA: Visual Question Answering with Compressively Sensed Images",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "1283-1287",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228397"}

@inproceedings{bb233422,
        AUTHOR = "Desta, M.T. and Chen, L. and Kornuta, T.",
        TITLE = "Object-Based Reasoning in VQA",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1814-1823",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228398"}

@inproceedings{bb233423,
        AUTHOR = "Zhao, H. and Fan, Q. and Gutfreund, D. and Fu, Y.",
        TITLE = "Semantically Guided Visual Question Answering",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1852-1860",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228399"}

@inproceedings{bb233424,
        AUTHOR = "Wang, Z. and Liu, X. and Wang, L. and Qiao, Y. and Xie, X. and Fowlkes, C.C.",
        TITLE = "Structured Triplet Learning with POS-Tag Guided Attention for Visual
Question Answering",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1888-1896",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228400"}

@inproceedings{bb233425,
        AUTHOR = "Chowdhury, I. and Nguyen, K. and Fookes, C. and Sridharan, S.",
        TITLE = "A cascaded long short-term memory (LSTM) driven generic visual
question answering (VQA)",
        BOOKTITLE = ICIP17,
        YEAR = "2017",
        PAGES = "1842-1846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228401"}

@inproceedings{bb233426,
        AUTHOR = "Jain, U. and Zhang, Z.Y. and Schwing, A.",
        TITLE = "Creativity: Generating Diverse Questions Using Variational
Autoencoders",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "5415-5424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228402"}

@inproceedings{bb233427,
        AUTHOR = "Zhu, Y. and Lim, J.J. and Fei Fei, L.",
        TITLE = "Knowledge Acquisition for Visual Question Answering via Iterative
Querying",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6146-6155",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228403"}

@inproceedings{bb233428,
        AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.",
        TITLE = "Simple and effective visual question answering in a single modality",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "2276-2280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228404"}

@inproceedings{bb233429,
        AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.",
        TITLE = "What's in a Question:
Using Visual Questions as a Form of Supervision",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6422-6431",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228405"}

@inproceedings{bb233430,
        AUTHOR = "Xu, H.J. and Saenko, K.",
        TITLE = "Ask, Attend and Answer:
Exploring Question-Guided Spatial Attention for Visual Question Answering",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VII: 451-466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228406"}

@inproceedings{bb233431,
        AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.",
        TITLE = "Revisiting Visual Question Answering Baselines",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VIII: 727-739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228407"}

@inproceedings{bb233432,
        AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
        TITLE = "Stacked Attention Networks for Image Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "21-29",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228408"}

@inproceedings{bb233433,
        AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
        TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1456-1464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228409"}

@inproceedings{bb233434,
        AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
        TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
        BOOKTITLE = ICIP12,
        YEAR = "2012",
        PAGES = "1917-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228410"}

@inproceedings{bb233435,
        AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
        TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
        BOOKTITLE = "3DTV09",
        YEAR = "2009",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228411"}

@article{bb233436,
        AUTHOR = "Osman, A. and Samek, W.",
        TITLE = "DRAU: Dual Recurrent Attention Units for Visual Question Answering",
        JOURNAL = CVIU,
        VOLUME = "185",
        YEAR = "2019",
        PAGES = "24-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228412"}

@article{bb233437,
        AUTHOR = "Li, W. and Sun, J.H. and Liu, G. and Zhao, L.L. and Fang, X.Z.",
        TITLE = "Visual question answering with attention transfer and a cross-modal
gating mechanism",
        JOURNAL = PRL,
        VOLUME = "133",
        YEAR = "2020",
        PAGES = "334-340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228413"}

@article{bb233438,
        AUTHOR = "Yu, J. and Zhu, Z.H. and Wang, Y.J. and Zhang, W.F. and Hu, Y. and Tan, J.L.",
        TITLE = "Cross-modal knowledge reasoning for knowledge-based visual question
answering",
        JOURNAL = PR,
        VOLUME = "108",
        YEAR = "2020",
        PAGES = "107563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228414"}

@inproceedings{bb233439,
        AUTHOR = "Yang, Z.Q. and Qin, Z.C. and Yu, J. and Wan, T.",
        TITLE = "Prior Visual Relationship Reasoning For Visual Question Answering",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1411-1415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228415"}

@article{bb233440,
        AUTHOR = "Yu, J. and Zhang, W.F. and Lu, Y.H. and Qin, Z.C. and Hu, Y. and Tan, J.L. and Wu, Q.",
        TITLE = "Reasoning on the Relation: Enhancing Visual Representation for Visual
Question Answering and Cross-Modal Retrieval",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3196-3209",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228416"}

@article{bb233441,
        AUTHOR = "Wu, Y.R. and Ma, Y.T. and Wan, S.H.",
        TITLE = "Multi-scale relation reasoning for multi-modal Visual Question
Answering",
        JOURNAL = SP:IC,
        VOLUME = "96",
        YEAR = "2021",
        PAGES = "116319",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228417"}

@inproceedings{bb233442,
        AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.R.",
        TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual
Question Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "5642-5649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228418"}

@article{bb233443,
        AUTHOR = "Hu, J. and Qian, S.S. and Fang, Q. and Xu, C.S.",
        TITLE = "Heterogeneous Community Question Answering via Social-Aware
Multi-Modal Co-Attention Convolutional Matching",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228419"}

@article{bb233444,
        AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.",
        TITLE = "Accuracy vs. complexity: A trade-off in visual question answering
models",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228420"}

@article{bb233445,
        AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.",
        TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality
Interactions",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "3518-3529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228421"}

@article{bb233446,
        AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.",
        TITLE = "A multimodal attention fusion network with a dynamic vocabulary for
TextVQA",
        JOURNAL = PR,
        VOLUME = "122",
        YEAR = "2022",
        PAGES = "108214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228422"}

@article{bb233447,
        AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.",
        TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "318-329",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228423"}

@article{bb233448,
        AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.H.",
        TITLE = "Comprehensive-perception dynamic reasoning for visual question
answering",
        JOURNAL = PR,
        VOLUME = "131",
        YEAR = "2022",
        PAGES = "108878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228424"}

@article{bb233449,
        AUTHOR = "Xie, J.Y. and Fang, W.H. and Cai, Y. and Huang, Q.B. and Li, Q.",
        TITLE = "Knowledge-Based Visual Question Generation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7547-7558",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228425"}

@article{bb233450,
        AUTHOR = "Gao, C.Y. and Zhu, Q. and Wang, P. and Li, H. and Liu, Y.L. and van den Hengel, A.J. and Wu, Q.",
        TITLE = "Structured Multimodal Attentions for TextVQA",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9603-9614",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228426"}

@article{bb233451,
        AUTHOR = "Xu, F.Z. and Lin, Q. and Liu, J. and Zhang, L.L. and Zhao, T.Z. and Chai, Q. and Pan, Y. and Huang, Y. and Wang, Q.Y.",
        TITLE = "MoCA: Incorporating domain pretraining and cross attention for
textbook question answering",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109588",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228427"}

@article{bb233452,
        AUTHOR = "Mohamud, S.A.M. and Jalali, A. and Lee, M.H.",
        TITLE = "Encoder-decoder cycle for visual question answering based on
perception-action cycle",
        JOURNAL = PR,
        VOLUME = "144",
        YEAR = "2023",
        PAGES = "109848",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228428"}

@article{bb233453,
        AUTHOR = "Tito, R. and Karatzas, D. and Valveny, E.",
        TITLE = "Hierarchical multimodal transformers for Multipage DocVQA",
        JOURNAL = PR,
        VOLUME = "144",
        YEAR = "2023",
        PAGES = "109834",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228429"}

@article{bb233454,
        AUTHOR = "Biswas, K. and Shivakumara, P. and Pal, U. and Liu, C.L. and Lu, Y.",
        TITLE = "VQAPT: A New visual question answering model for personality traits
in social media images",
        JOURNAL = PRL,
        VOLUME = "175",
        YEAR = "2023",
        PAGES = "66-73",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228430"}

@article{bb233455,
        AUTHOR = "Cho, J.W. and Argaw, D.M. and Oh, Y. and Kim, D.J. and Kweon, I.S.",
        TITLE = "Empirical study on using adapters for debiased Visual Question
Answering",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103842",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228431"}

@inproceedings{bb233456,
        AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.",
        TITLE = "Dealing with Missing Modalities in the Visual Question
Answer-Difference Prediction Task through Knowledge Distillation",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1592-1601",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228432"}

@inproceedings{bb233457,
        AUTHOR = "Cho, J.W. and Kim, D.J. and Ryu, H. and Kweon, I.S.",
        TITLE = "Generative Bias for Robust Visual Question Answering",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11681-11690",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228433"}

@article{bb233458,
        AUTHOR = "Mashrur, A. and Luo, W. and Zaidi, N.A. and Robles Kelly, A.",
        TITLE = "Robust visual question answering via semantic cross modal
augmentation",
        JOURNAL = CVIU,
        VOLUME = "238",
        YEAR = "2024",
        PAGES = "103862",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228434"}

@article{bb233459,
        AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Sun, Y.X. and Zhang, Z. and Luo, Y.K.",
        TITLE = "Multi-modal spatial relational attention networks for visual question
answering",
        JOURNAL = IVC,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "104840",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228435"}

@article{bb233460,
        AUTHOR = "Zheng, W.B. and Yan, L. and Wang, F.Y.",
        TITLE = "So Many Heads, So Many Wits: Multimodal Graph Reasoning for
Text-Based Visual Question Answering",
        JOURNAL = SMCS,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "854-865",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228436"}

@article{bb233461,
        AUTHOR = "Bi, Y.D. and Jiang, H. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
        TITLE = "See and Learn More: Dense Caption-Aware Representation for Visual
Question Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1135-1146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228437"}

@article{bb233462,
        AUTHOR = "Jiang, J.J. and Liu, Z.Y. and Zheng, N.N.",
        TITLE = "Correlation Information Bottleneck: Towards Adapting Pretrained
Multimodal Models for Robust Visual Question Answering",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "185-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228438"}

@article{bb233463,
        AUTHOR = "Zhang, S. and Chen, Y. and Sun, Y. and Wang, F. and Shi, H.B. and Wang, H.R.",
        TITLE = "LOIS: Looking Out of Instance Semantics for Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "6202-6214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228439"}

@article{bb233464,
        AUTHOR = "Xie, J.Y. and Cai, Y. and Chen, J.L. and Xu, R.H. and Wang, J.X. and Li, Q.",
        TITLE = "Knowledge-Augmented Visual Question Answering With Natural Language
Explanation",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "2652-2664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228440"}

@article{bb233465,
        AUTHOR = "Wang, J.J. and Ma, A.L. and Chen, Z.H. and Zheng, Z. and Wan, Y.T. and Zhang, L.P. and Zhong, Y.F.",
        TITLE = "EarthVQANet: Multi-task visual question answering for remote sensing
image understanding",
        JOURNAL = PandRS,
        VOLUME = "212",
        YEAR = "2024",
        PAGES = "422-439",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228441"}

@article{bb233466,
        AUTHOR = "Qian, S. and Liu, B.Q. and Sun, C.J. and Xu, Z. and Ma, L. and Wang, B.",
        TITLE = "CroMIC-QA: The Cross-Modal Information Complementation Based Question
Answering",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "8348-8359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228442"}

@article{bb233467,
        AUTHOR = "Uehara, K. and Harada, T.",
        TITLE = "Learning by Asking Questions for Knowledge-Based Novel Object
Recognition",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2290-2309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228443"}

@inproceedings{bb233468,
        AUTHOR = "Uehara, K. and Harada, T.",
        TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense
Acquisition",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4390-4398",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228444"}

@inproceedings{bb233469,
        AUTHOR = "Uehara, K. and Duan, N. and Harada, T.",
        TITLE = "Learning to Ask Informative Sub-Questions for Visual Question
Answering",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4680-4689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228445"}

@inproceedings{bb233470,
        AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.",
        TITLE = "Visual Question Generation as Dual Task of Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6116-6124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228446"}

@inproceedings{bb233471,
        AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.",
        TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 485-501",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228447"}

@inproceedings{bb233472,
        AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.",
        TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual
Question Answering",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6632-6641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228448"}

@article{bb233473,
        AUTHOR = "Vosoughi, A. and Deng, S.J. and Zhang, S.Y. and Tian, Y.P. and Xu, C.L. and Luo, J.B.",
        TITLE = "Cross Modality Bias in Visual Question Answering:
A Causal View With Possible Worlds VQA",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "8609-8624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228449"}

@article{bb233474,
        AUTHOR = "Guo, Y.Y. and Jiao, F. and Shen, Z.Q. and Nie, L.Q. and Kankanhalli, M.",
        TITLE = "UNK-VQA: A Dataset and a Probe Into the Abstention Ability of
Multi-Modal Large Models",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10284-10296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228450"}

@article{bb233475,
        AUTHOR = "Chen, F.Y. and Tang, X.S. and Hao, K.R.",
        TITLE = "GEXMERT: Geometrically enhanced cross-modality encoder representations
from transformers inspired by higher-order visual percepts",
        JOURNAL = PR,
        VOLUME = "158",
        YEAR = "2025",
        PAGES = "111047",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228451"}

@article{bb233476,
        AUTHOR = "Zhang, B. and Li, J.X. and Shi, Y.C. and Han, Y. and Hu, Q.H.",
        TITLE = "VADS: Visuo-Adaptive DualStrike attack on visual question answer",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228452"}

@article{bb233477,
        AUTHOR = "Peng, D. and Li, Z.X.",
        TITLE = "Unbiased VQA via modal information interaction and question
transformation",
        JOURNAL = PR,
        VOLUME = "162",
        YEAR = "2025",
        PAGES = "111394",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228453"}

@article{bb233478,
        AUTHOR = "Fan, L. and Gong, X. and Zheng, C.Y. and Tan, X.L. and Li, J. and Ou, Y.F.",
        TITLE = "Cycle-VQA: A Cycle-Consistent Framework for Robust Medical Visual
Question Answering",
        JOURNAL = PR,
        VOLUME = "165",
        YEAR = "2025",
        PAGES = "111609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228454"}

@article{bb233479,
        AUTHOR = "Lin, Q. and He, K. and Zhu, Y.F. and Xu, F.Z. and Cambria, E. and Feng, M.L.",
        TITLE = "Cross-Modal Knowledge Diffusion-Based Generation for Difference-Aware
Medical VQA",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "2421-2434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228455"}

@article{bb233480,
        AUTHOR = "Kim, B.S. and Kim, J. and Lee, D. and Jang, B.",
        TITLE = "Visual Question Answering: A Survey of Methods, Datasets, Evaluation,
and Challenges",
        JOURNAL = Surveys,
        VOLUME = "57",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "May",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228456"}

@inproceedings{bb233481,
        AUTHOR = "Kim, H.Y. and Jung, I. and Suh, D. and Zhang, Y. and Lee, S. and Hong, S.",
        TITLE = "Question-Aware Gaussian Experts for Audio-Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13681-13690",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228457"}

@inproceedings{bb233482,
        AUTHOR = "Huang, C.Y. and Maneechotesuwan, B. and Chopra, S. and Kira, Z.",
        TITLE = "FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal
Shifts in Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3909-3918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228458"}

@inproceedings{bb233483,
        AUTHOR = "Wu, K.X. and Li, X. and Li, X. and Hu, C. and Wu, G.L.",
        TITLE = "AVQACL: A Novel Benchmark for Audio-Visual Question Answering
Continual Learning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3252-3261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228459"}

@inproceedings{bb233484,
        AUTHOR = "Zhao, X.Y. and Bai, Z.W. and Zhou, M.L. and Ren, X.C. and Wang, Y.Q. and Wang, L.C.",
        TITLE = "Integrating Dynamic Routing with Reinforcement Learning and
Multimodal Techniques for Visual Question Answering",
        BOOKTITLE = ICIVC24,
        YEAR = "2024",
        PAGES = "295-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228460"}

@inproceedings{bb233485,
        AUTHOR = "Park, K.R. and Lee, H.J. and Kim, J.U.",
        TITLE = "Learning Trimodal Relation for Audio-visual Question Answering with
Missing Modality",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XV: 42-59",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228461"}

@inproceedings{bb233486,
        AUTHOR = "Mishra, A. and Agarwala, A. and Tiwari, U. and Rajendiran, V.N. and Miriyala, S.S.",
        TITLE = "Efficient Visual Question Answering on Embedded Devices:
Cross-Modality Attention with Evolutionary Quantization",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2142-2148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228462"}

@inproceedings{bb233487,
        AUTHOR = "Jiang, X. and Wang, G.M. and Guo, J.H. and Li, J.C. and Zhang, W.Q. and Lu, R.X. and Tang, S.L.",
        TITLE = "DIEM: Decomposition-Integration Enhancing Multimodal Insights",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "27294-27303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228463"}

@inproceedings{bb233488,
        AUTHOR = "Reichman, B. and Heck, L.",
        TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual
Question Answering",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2829-2834",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228464"}

@inproceedings{bb233489,
        AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.",
        TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual
Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2941-2950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228465"}

@inproceedings{bb233490,
        AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.",
        TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual
Question Answering",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:658-674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228466"}

@inproceedings{bb233491,
        AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.",
        TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal
Interaction",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 255-266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228467"}

@inproceedings{bb233492,
        AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.",
        TITLE = "Health-Oriented Multimodal Food Question Answering",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 191-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228468"}

@inproceedings{bb233493,
        AUTHOR = "Zhang, H.T. and Wu, W.",
        TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1471-1477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228469"}

@inproceedings{bb233494,
        AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.",
        TITLE = "Beyond Question-Based Biases:
Assessing Multimodal Shortcut Learning in Visual Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1554-1563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228470"}

@inproceedings{bb233495,
        AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.",
        TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the
International Conference on Digital Image Computing: Techniques and
Applications (DICTA 2021)",
        BOOKTITLE = DICTA21,
        YEAR = "2021",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228471"}

@inproceedings{bb233496,
        AUTHOR = "Chen, H.Y. and Liu, R.F. and Peng, B.",
        TITLE = "Cross-modal Relational Reasoning Network for Visual Question
Answering",
        BOOKTITLE = MAIR2-21,
        YEAR = "2021",
        PAGES = "3939-3948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228472"}

@inproceedings{bb233497,
        AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.",
        TITLE = "Question-Agnostic Attention for Visual Question Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "3542-3549",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228473"}

@inproceedings{bb233498,
        AUTHOR = "Li, Y. and Lin, Y. and Zhao, H.H. and Wang, D.H.",
        TITLE = "Dual Path Multi-Modal High-Order Features for Textual Content based
Visual Question Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "4324-4331",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228474"}

@inproceedings{bb233499,
        AUTHOR = "Huang, H.T. and Han, T. and Han, W. and Yap, D. and Chiang, C.M.",
        TITLE = "Answer-checking in Context:
A Multi-modal Fully Attention Network for Visual Question Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "1173-1180",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228475"}

Last update:Sep 10, 2025 at 12:00:25