@inproceedings{bb233400, AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.", TITLE = "Embodied Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "1-10", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228376"} @inproceedings{bb233401, AUTHOR = "Misra, I. and Girshick, R. and Fergus, R. and Hebert, M. and Gupta, A. and van der Maaten, L.", TITLE = "Learning by Asking Questions", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "11-20", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228377"} @inproceedings{bb233402, AUTHOR = "Gurari, D. and Li, Q. and Stangl, A.J. and Guo, A. and Lin, C. and Grauman, K. and Luo, J. and Bigham, J.P.", TITLE = "VizWiz Grand Challenge: Answering Visual Questions from Blind People", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "3608-3617", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228378"} @inproceedings{bb233403, AUTHOR = "Li, J. and Su, H. and Zhu, J. and Wang, S. and Zhang, B.", TITLE = "Textbook Question Answering Under Instructor Guidance with Memory Networks", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "3655-3663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228379"} @inproceedings{bb233404, AUTHOR = "Gordon, D. and Kembhavi, A. and Rastegari, M. and Redmon, J. and Fox, D. and Farhadi, A.", TITLE = "IQA: Visual Question Answering in Interactive Environments", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4089-4098", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228380"} @inproceedings{bb233405, AUTHOR = "Agrawal, A. and Batra, D. and Parikh, D. and Kembhavi, A.", TITLE = "Don't Just Assume; Look and Answer: Overcoming Priors for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4971-4980", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228381"} @inproceedings{bb233406, AUTHOR = "Sha, F. and Chao, W. and Hu, H.", TITLE = "Learning Answer Embeddings for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5428-5436", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228382"} @inproceedings{bb233407, AUTHOR = "Kafle, K. and Price, B. and Cohen, S. and Kanan, C.", TITLE = "DVQA: Understanding Data Visualizations via Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5648-5656", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228383"} @inproceedings{bb233408, AUTHOR = "Sha, F. and Hu, H. and Chao, W.", TITLE = "Cross-Dataset Adaptation for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5716-5725", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228384"} @inproceedings{bb233409, AUTHOR = "Anderson, P. and He, X. and Buehler, C. and Teney, D. and Johnson, M. and Gould, S. and Zhang, L.", TITLE = "Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6077-6086", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228385"} @inproceedings{bb233410, AUTHOR = "Nguyen, D. and Okatani, T.", TITLE = "Improved Fusion of Visual and Language Representations by Dense Symmetric Co-attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6087-6096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228386"} @inproceedings{bb233411, AUTHOR = "Patro, B. and Namboodiri, V.P.", TITLE = "Differential Attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7680-7688", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228387"} @inproceedings{bb233412, AUTHOR = "Su, Z. and Zhu, C. and Dong, Y.P. and Cai, D.Q. and Chen, Y.R. and Li, J.G.", TITLE = "Learning Visual Knowledge Memory Networks for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7736-7745", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228388"} @inproceedings{bb233413, AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.", TITLE = "Embodied Question Answering", BOOKTITLE = DeepLearnRV18, YEAR = "2018", PAGES = "2135-213509", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228389"} @inproceedings{bb233414, AUTHOR = "Cheng, W. and Huang, Y. and Wang, L.", TITLE = "Towards Unconstrained Pointing Problem of Visual Question Answering: A Retrieval-based Method", BOOKTITLE = ICPR18, YEAR = "2018", PAGES = "3303-3308", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228390"} @inproceedings{bb233415, AUTHOR = "Zhou, B. and Sun, Y.Y. and Bau, D. and Torralba, A.B.", TITLE = "Interpretable Basis Decomposition for Visual Explanation", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VIII: 122-138", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228391"} @inproceedings{bb233416, AUTHOR = "Shi, Y. and Furlanello, T. and Zha, S. and Anandkumar, A.", TITLE = "Question Type Guided Attention in Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "II: 158-175", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228392"} @inproceedings{bb233417, AUTHOR = "Narasimhan, M. and Schwing, A.G.", TITLE = "Straight to the Facts: Learning Knowledge Base Retrieval for Factual Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VIII: 460-477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228393"} @inproceedings{bb233418, AUTHOR = "Malinowski, M. and Doersch, C. and Santoro, A. and Battaglia, P.", TITLE = "Learning Visual Question Answering by Bootstrapping Hard Attention", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VI: 3-20", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228394"} @inproceedings{bb233419, AUTHOR = "Li, Q. and Tao, Q.Y. and Joty, S. and Cai, J.F. and Luo, J.B.", TITLE = "VQA-E: Explaining, Elaborating, and Enhancing Your Answers for Visual Questions", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VII: 570-586", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228395"} @inproceedings{bb233420, AUTHOR = "Yu, D. and Gao, X. and Xiong, H.", TITLE = "Structured Semantic Representation for Visual Question Answering", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "2286-2290", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228396"} @inproceedings{bb233421, AUTHOR = "Huang, L. and Kulkarni, K. and Jha, A. and Lohit, S. and Jayasuriya, S. and Turaga, P.K.", TITLE = "CS-VQA: Visual Question Answering with Compressively Sensed Images", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "1283-1287", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228397"} @inproceedings{bb233422, AUTHOR = "Desta, M.T. and Chen, L. and Kornuta, T.", TITLE = "Object-Based Reasoning in VQA", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1814-1823", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228398"} @inproceedings{bb233423, AUTHOR = "Zhao, H. and Fan, Q. and Gutfreund, D. and Fu, Y.", TITLE = "Semantically Guided Visual Question Answering", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1852-1860", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228399"} @inproceedings{bb233424, AUTHOR = "Wang, Z. and Liu, X. and Wang, L. and Qiao, Y. and Xie, X. and Fowlkes, C.C.", TITLE = "Structured Triplet Learning with POS-Tag Guided Attention for Visual Question Answering", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1888-1896", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228400"} @inproceedings{bb233425, AUTHOR = "Chowdhury, I. and Nguyen, K. and Fookes, C. and Sridharan, S.", TITLE = "A cascaded long short-term memory (LSTM) driven generic visual question answering (VQA)", BOOKTITLE = ICIP17, YEAR = "2017", PAGES = "1842-1846", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228401"} @inproceedings{bb233426, AUTHOR = "Jain, U. and Zhang, Z.Y. and Schwing, A.", TITLE = "Creativity: Generating Diverse Questions Using Variational Autoencoders", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "5415-5424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228402"} @inproceedings{bb233427, AUTHOR = "Zhu, Y. and Lim, J.J. and Fei Fei, L.", TITLE = "Knowledge Acquisition for Visual Question Answering via Iterative Querying", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "6146-6155", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228403"} @inproceedings{bb233428, AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.", TITLE = "Simple and effective visual question answering in a single modality", BOOKTITLE = ICIP16, YEAR = "2016", PAGES = "2276-2280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228404"} @inproceedings{bb233429, AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.", TITLE = "What's in a Question: Using Visual Questions as a Form of Supervision", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "6422-6431", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228405"} @inproceedings{bb233430, AUTHOR = "Xu, H.J. and Saenko, K.", TITLE = "Ask, Attend and Answer: Exploring Question-Guided Spatial Attention for Visual Question Answering", BOOKTITLE = ECCV16, YEAR = "2016", PAGES = "VII: 451-466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228406"} @inproceedings{bb233431, AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.", TITLE = "Revisiting Visual Question Answering Baselines", BOOKTITLE = ECCV16, YEAR = "2016", PAGES = "VIII: 727-739", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228407"} @inproceedings{bb233432, AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.", TITLE = "Stacked Attention Networks for Image Question Answering", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "21-29", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228408"} @inproceedings{bb233433, AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.", TITLE = "VisKE: Visual knowledge extraction and question answering by visual verification of relation phrases", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "1456-1464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228409"} @inproceedings{bb233434, AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.", TITLE = "A robust multivariate reranking algorithm for Question Answering enrichment", BOOKTITLE = ICIP12, YEAR = "2012", PAGES = "1917-1920", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228410"} @inproceedings{bb233435, AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.", TITLE = "Question interface for 3D picture creation on an autostereoscopic digital picture frame", BOOKTITLE = "3DTV09", YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT228411"} @article{bb233436, AUTHOR = "Osman, A. and Samek, W.", TITLE = "DRAU: Dual Recurrent Attention Units for Visual Question Answering", JOURNAL = CVIU, VOLUME = "185", YEAR = "2019", PAGES = "24-30", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228412"} @article{bb233437, AUTHOR = "Li, W. and Sun, J.H. and Liu, G. and Zhao, L.L. and Fang, X.Z.", TITLE = "Visual question answering with attention transfer and a cross-modal gating mechanism", JOURNAL = PRL, VOLUME = "133", YEAR = "2020", PAGES = "334-340", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228413"} @article{bb233438, AUTHOR = "Yu, J. and Zhu, Z.H. and Wang, Y.J. and Zhang, W.F. and Hu, Y. and Tan, J.L.", TITLE = "Cross-modal knowledge reasoning for knowledge-based visual question answering", JOURNAL = PR, VOLUME = "108", YEAR = "2020", PAGES = "107563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228414"} @inproceedings{bb233439, AUTHOR = "Yang, Z.Q. and Qin, Z.C. and Yu, J. and Wan, T.", TITLE = "Prior Visual Relationship Reasoning For Visual Question Answering", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "1411-1415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228415"} @article{bb233440, AUTHOR = "Yu, J. and Zhang, W.F. and Lu, Y.H. and Qin, Z.C. and Hu, Y. and Tan, J.L. and Wu, Q.", TITLE = "Reasoning on the Relation: Enhancing Visual Representation for Visual Question Answering and Cross-Modal Retrieval", JOURNAL = MultMed, VOLUME = "22", YEAR = "2020", NUMBER = "12", MONTH = "December", PAGES = "3196-3209", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228416"} @article{bb233441, AUTHOR = "Wu, Y.R. and Ma, Y.T. and Wan, S.H.", TITLE = "Multi-scale relation reasoning for multi-modal Visual Question Answering", JOURNAL = SP:IC, VOLUME = "96", YEAR = "2021", PAGES = "116319", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228417"} @inproceedings{bb233442, AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.R.", TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "5642-5649", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228418"} @article{bb233443, AUTHOR = "Hu, J. and Qian, S.S. and Fang, Q. and Xu, C.S.", TITLE = "Heterogeneous Community Question Answering via Social-Aware Multi-Modal Co-Attention Convolutional Matching", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2321-2334", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228419"} @article{bb233444, AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.", TITLE = "Accuracy vs. complexity: A trade-off in visual question answering models", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108106", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228420"} @article{bb233445, AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.", TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality Interactions", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "3518-3529", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228421"} @article{bb233446, AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.", TITLE = "A multimodal attention fusion network with a dynamic vocabulary for TextVQA", JOURNAL = PR, VOLUME = "122", YEAR = "2022", PAGES = "108214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228422"} @article{bb233447, AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.", TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "318-329", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228423"} @article{bb233448, AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.H.", TITLE = "Comprehensive-perception dynamic reasoning for visual question answering", JOURNAL = PR, VOLUME = "131", YEAR = "2022", PAGES = "108878", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228424"} @article{bb233449, AUTHOR = "Xie, J.Y. and Fang, W.H. and Cai, Y. and Huang, Q.B. and Li, Q.", TITLE = "Knowledge-Based Visual Question Generation", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "11", MONTH = "November", PAGES = "7547-7558", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228425"} @article{bb233450, AUTHOR = "Gao, C.Y. and Zhu, Q. and Wang, P. and Li, H. and Liu, Y.L. and van den Hengel, A.J. and Wu, Q.", TITLE = "Structured Multimodal Attentions for TextVQA", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "9603-9614", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228426"} @article{bb233451, AUTHOR = "Xu, F.Z. and Lin, Q. and Liu, J. and Zhang, L.L. and Zhao, T.Z. and Chai, Q. and Pan, Y. and Huang, Y. and Wang, Q.Y.", TITLE = "MoCA: Incorporating domain pretraining and cross attention for textbook question answering", JOURNAL = PR, VOLUME = "140", YEAR = "2023", PAGES = "109588", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228427"} @article{bb233452, AUTHOR = "Mohamud, S.A.M. and Jalali, A. and Lee, M.H.", TITLE = "Encoder-decoder cycle for visual question answering based on perception-action cycle", JOURNAL = PR, VOLUME = "144", YEAR = "2023", PAGES = "109848", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228428"} @article{bb233453, AUTHOR = "Tito, R. and Karatzas, D. and Valveny, E.", TITLE = "Hierarchical multimodal transformers for Multipage DocVQA", JOURNAL = PR, VOLUME = "144", YEAR = "2023", PAGES = "109834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228429"} @article{bb233454, AUTHOR = "Biswas, K. and Shivakumara, P. and Pal, U. and Liu, C.L. and Lu, Y.", TITLE = "VQAPT: A New visual question answering model for personality traits in social media images", JOURNAL = PRL, VOLUME = "175", YEAR = "2023", PAGES = "66-73", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228430"} @article{bb233455, AUTHOR = "Cho, J.W. and Argaw, D.M. and Oh, Y. and Kim, D.J. and Kweon, I.S.", TITLE = "Empirical study on using adapters for debiased Visual Question Answering", JOURNAL = CVIU, VOLUME = "237", YEAR = "2023", PAGES = "103842", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228431"} @inproceedings{bb233456, AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.", TITLE = "Dealing with Missing Modalities in the Visual Question Answer-Difference Prediction Task through Knowledge Distillation", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1592-1601", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228432"} @inproceedings{bb233457, AUTHOR = "Cho, J.W. and Kim, D.J. and Ryu, H. and Kweon, I.S.", TITLE = "Generative Bias for Robust Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11681-11690", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228433"} @article{bb233458, AUTHOR = "Mashrur, A. and Luo, W. and Zaidi, N.A. and Robles Kelly, A.", TITLE = "Robust visual question answering via semantic cross modal augmentation", JOURNAL = CVIU, VOLUME = "238", YEAR = "2024", PAGES = "103862", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228434"} @article{bb233459, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Sun, Y.X. and Zhang, Z. and Luo, Y.K.", TITLE = "Multi-modal spatial relational attention networks for visual question answering", JOURNAL = IVC, VOLUME = "140", YEAR = "2023", PAGES = "104840", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228435"} @article{bb233460, AUTHOR = "Zheng, W.B. and Yan, L. and Wang, F.Y.", TITLE = "So Many Heads, So Many Wits: Multimodal Graph Reasoning for Text-Based Visual Question Answering", JOURNAL = SMCS, VOLUME = "54", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "854-865", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228436"} @article{bb233461, AUTHOR = "Bi, Y.D. and Jiang, H. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.", TITLE = "See and Learn More: Dense Caption-Aware Representation for Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "1135-1146", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228437"} @article{bb233462, AUTHOR = "Jiang, J.J. and Liu, Z.Y. and Zheng, N.N.", TITLE = "Correlation Information Bottleneck: Towards Adapting Pretrained Multimodal Models for Robust Visual Question Answering", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "185-207", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228438"} @article{bb233463, AUTHOR = "Zhang, S. and Chen, Y. and Sun, Y. and Wang, F. and Shi, H.B. and Wang, H.R.", TITLE = "LOIS: Looking Out of Instance Semantics for Visual Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "6202-6214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228439"} @article{bb233464, AUTHOR = "Xie, J.Y. and Cai, Y. and Chen, J.L. and Xu, R.H. and Wang, J.X. and Li, Q.", TITLE = "Knowledge-Augmented Visual Question Answering With Natural Language Explanation", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "2652-2664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228440"} @article{bb233465, AUTHOR = "Wang, J.J. and Ma, A.L. and Chen, Z.H. and Zheng, Z. and Wan, Y.T. and Zhang, L.P. and Zhong, Y.F.", TITLE = "EarthVQANet: Multi-task visual question answering for remote sensing image understanding", JOURNAL = PandRS, VOLUME = "212", YEAR = "2024", PAGES = "422-439", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228441"} @article{bb233466, AUTHOR = "Qian, S. and Liu, B.Q. and Sun, C.J. and Xu, Z. and Ma, L. and Wang, B.", TITLE = "CroMIC-QA: The Cross-Modal Information Complementation Based Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8348-8359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228442"} @article{bb233467, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "Learning by Asking Questions for Knowledge-Based Novel Object Recognition", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "6", MONTH = "June", PAGES = "2290-2309", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228443"} @inproceedings{bb233468, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense Acquisition", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4390-4398", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228444"} @inproceedings{bb233469, AUTHOR = "Uehara, K. and Duan, N. and Harada, T.", TITLE = "Learning to Ask Informative Sub-Questions for Visual Question Answering", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4680-4689", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228445"} @inproceedings{bb233470, AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.", TITLE = "Visual Question Generation as Dual Task of Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6116-6124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228446"} @inproceedings{bb233471, AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.", TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "I: 485-501", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228447"} @inproceedings{bb233472, AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.", TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6632-6641", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228448"} @article{bb233473, AUTHOR = "Vosoughi, A. and Deng, S.J. and Zhang, S.Y. and Tian, Y.P. and Xu, C.L. and Luo, J.B.", TITLE = "Cross Modality Bias in Visual Question Answering: A Causal View With Possible Worlds VQA", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8609-8624", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228449"} @article{bb233474, AUTHOR = "Guo, Y.Y. and Jiao, F. and Shen, Z.Q. and Nie, L.Q. and Kankanhalli, M.", TITLE = "UNK-VQA: A Dataset and a Probe Into the Abstention Ability of Multi-Modal Large Models", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "10284-10296", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228450"} @article{bb233475, AUTHOR = "Chen, F.Y. and Tang, X.S. and Hao, K.R.", TITLE = "GEXMERT: Geometrically enhanced cross-modality encoder representations from transformers inspired by higher-order visual percepts", JOURNAL = PR, VOLUME = "158", YEAR = "2025", PAGES = "111047", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228451"} @article{bb233476, AUTHOR = "Zhang, B. and Li, J.X. and Shi, Y.C. and Han, Y. and Hu, Q.H.", TITLE = "VADS: Visuo-Adaptive DualStrike attack on visual question answer", JOURNAL = CVIU, VOLUME = "249", YEAR = "2024", PAGES = "104137", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228452"} @article{bb233477, AUTHOR = "Peng, D. and Li, Z.X.", TITLE = "Unbiased VQA via modal information interaction and question transformation", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111394", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228453"} @article{bb233478, AUTHOR = "Fan, L. and Gong, X. and Zheng, C.Y. and Tan, X.L. and Li, J. and Ou, Y.F.", TITLE = "Cycle-VQA: A Cycle-Consistent Framework for Robust Medical Visual Question Answering", JOURNAL = PR, VOLUME = "165", YEAR = "2025", PAGES = "111609", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228454"} @article{bb233479, AUTHOR = "Lin, Q. and He, K. and Zhu, Y.F. and Xu, F.Z. and Cambria, E. and Feng, M.L.", TITLE = "Cross-Modal Knowledge Diffusion-Based Generation for Difference-Aware Medical VQA", JOURNAL = IP, VOLUME = "34", YEAR = "2025", PAGES = "2421-2434", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228455"} @article{bb233480, AUTHOR = "Kim, B.S. and Kim, J. and Lee, D. and Jang, B.", TITLE = "Visual Question Answering: A Survey of Methods, Datasets, Evaluation, and Challenges", JOURNAL = Surveys, VOLUME = "57", YEAR = "2025", NUMBER = "10", MONTH = "May", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228456"} @inproceedings{bb233481, AUTHOR = "Kim, H.Y. and Jung, I. and Suh, D. and Zhang, Y. and Lee, S. and Hong, S.", TITLE = "Question-Aware Gaussian Experts for Audio-Visual Question Answering", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "13681-13690", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228457"} @inproceedings{bb233482, AUTHOR = "Huang, C.Y. and Maneechotesuwan, B. and Chopra, S. and Kira, Z.", TITLE = "FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "3909-3918", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228458"} @inproceedings{bb233483, AUTHOR = "Wu, K.X. and Li, X. and Li, X. and Hu, C. and Wu, G.L.", TITLE = "AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "3252-3261", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228459"} @inproceedings{bb233484, AUTHOR = "Zhao, X.Y. and Bai, Z.W. and Zhou, M.L. and Ren, X.C. and Wang, Y.Q. and Wang, L.C.", TITLE = "Integrating Dynamic Routing with Reinforcement Learning and Multimodal Techniques for Visual Question Answering", BOOKTITLE = ICIVC24, YEAR = "2024", PAGES = "295-301", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228460"} @inproceedings{bb233485, AUTHOR = "Park, K.R. and Lee, H.J. and Kim, J.U.", TITLE = "Learning Trimodal Relation for Audio-visual Question Answering with Missing Modality", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XV: 42-59", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228461"} @inproceedings{bb233486, AUTHOR = "Mishra, A. and Agarwala, A. and Tiwari, U. and Rajendiran, V.N. and Miriyala, S.S.", TITLE = "Efficient Visual Question Answering on Embedded Devices: Cross-Modality Attention with Evolutionary Quantization", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2142-2148", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228462"} @inproceedings{bb233487, AUTHOR = "Jiang, X. and Wang, G.M. and Guo, J.H. and Li, J.C. and Zhang, W.Q. and Lu, R.X. and Tang, S.L.", TITLE = "DIEM: Decomposition-Integration Enhancing Multimodal Insights", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27294-27303", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228463"} @inproceedings{bb233488, AUTHOR = "Reichman, B. and Heck, L.", TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2829-2834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228464"} @inproceedings{bb233489, AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.", TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2941-2950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228465"} @inproceedings{bb233490, AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.", TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual Question Answering", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:658-674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228466"} @inproceedings{bb233491, AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.", TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal Interaction", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 255-266", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228467"} @inproceedings{bb233492, AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.", TITLE = "Health-Oriented Multimodal Food Question Answering", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 191-203", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228468"} @inproceedings{bb233493, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1471-1477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228469"} @inproceedings{bb233494, AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.", TITLE = "Beyond Question-Based Biases: Assessing Multimodal Shortcut Learning in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1554-1563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228470"} @inproceedings{bb233495, AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.", TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the International Conference on Digital Image Computing: Techniques and Applications (DICTA 2021)", BOOKTITLE = DICTA21, YEAR = "2021", PAGES = "1-9", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228471"} @inproceedings{bb233496, AUTHOR = "Chen, H.Y. and Liu, R.F. and Peng, B.", TITLE = "Cross-modal Relational Reasoning Network for Visual Question Answering", BOOKTITLE = MAIR2-21, YEAR = "2021", PAGES = "3939-3948", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228472"} @inproceedings{bb233497, AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.", TITLE = "Question-Agnostic Attention for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "3542-3549", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228473"} @inproceedings{bb233498, AUTHOR = "Li, Y. and Lin, Y. and Zhao, H.H. and Wang, D.H.", TITLE = "Dual Path Multi-Modal High-Order Features for Textual Content based Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "4324-4331", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228474"} @inproceedings{bb233499, AUTHOR = "Huang, H.T. and Han, T. and Han, W. and Yap, D. and Chiang, C.M.", TITLE = "Answer-checking in Context: A Multi-modal Fully Attention Network for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "1173-1180", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT228475"}