@article{bb229600, AUTHOR = "Xu, N. and Lu, Z. and Tian, H.S. and Kang, R. and Cao, J. and Zhang, Y.D. and Liu, A.A.", TITLE = "Learning to Supervise Knowledge Retrieval Over a Tree Structure for Visual Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "6689-6700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224585"} @article{bb229601, AUTHOR = "Pan, Y.H. and Liu, J. and Jin, L. and Li, Z.C.", TITLE = "Unbiased Visual Question Answering by Leveraging Instrumental Variable", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "6648-6662", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224586"} @article{bb229602, AUTHOR = "Zhang, S. and Chen, Y. and Sun, Y. and Wang, F. and Shi, H.B. and Wang, H.R.", TITLE = "LOIS: Looking Out of Instance Semantics for Visual Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "6202-6214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224587"} @article{bb229603, AUTHOR = "Xie, J.Y. and Cai, Y. and Chen, J.L. and Xu, R.H. and Wang, J.X. and Li, Q.", TITLE = "Knowledge-Augmented Visual Question Answering With Natural Language Explanation", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "2652-2664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224588"} @article{bb229604, AUTHOR = "Wang, Q. and Liu, J. and Wu, W.J.", TITLE = "Coordinating explicit and implicit knowledge for knowledge-based VQA", JOURNAL = PR, VOLUME = "151", YEAR = "2024", PAGES = "110368", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224589"} @article{bb229605, AUTHOR = "Wei, M. and Chen, L. and Ji, W. and Yue, X.Y. and Zimmermann, R.", TITLE = "In Defense of Clip-Based Video Relation Detection", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "2759-2769", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224590"} @article{bb229606, AUTHOR = "Ma, J. and Liu, J. and Chai, Q. and Wang, P.H. and Tao, J.", TITLE = "Diagram Perception Networks for Textbook Question Answering via Joint Optimization", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "1578-1591", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224591"} @article{bb229607, AUTHOR = "Wang, J. and Ma, A. and Chen, Z.H. and Zheng, Z. and Wan, Y.T. and Zhang, L.P. and Zhong, Y.F.", TITLE = "EarthVQANet: Multi-task visual question answering for remote sensing image understanding", JOURNAL = PandRS, VOLUME = "212", YEAR = "2024", PAGES = "422-439", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224592"} @article{bb229608, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "Learning by Asking Questions for Knowledge-Based Novel Object Recognition", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "6", MONTH = "June", PAGES = "2290-2309", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224593"} @inproceedings{bb229609, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense Acquisition", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4390-4398", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224594"} @inproceedings{bb229610, AUTHOR = "Uehara, K. and Duan, N. and Harada, T.", TITLE = "Learning to Ask Informative Sub-Questions for Visual Question Answering", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4680-4689", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224595"} @inproceedings{bb229611, AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.", TITLE = "Visual Question Generation as Dual Task of Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6116-6124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224596"} @inproceedings{bb229612, AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.", TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "I: 485-501", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224597"} @inproceedings{bb229613, AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.", TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6632-6641", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224598"} @article{bb229614, AUTHOR = "Qian, S. and Liu, B.Q. and Sun, C.J. and Xu, Z. and Ma, L. and Wang, B.", TITLE = "CroMIC-QA: The Cross-Modal Information Complementation Based Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8348-8359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224599"} @article{bb229615, AUTHOR = "Li, L.J. and Jin, T. and Lin, W. and Jiang, H. and Pan, W.W. and Wang, J. and Xiao, S.W. and Xia, Y. and Jiang, W.H. and Zhao, Z.", TITLE = "Multi-Granularity Relational Attention Network for Audio-Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "7080-7094", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224600"} @article{bb229616, AUTHOR = "Vosoughi, A. and Deng, S.J. and Zhang, S.Y. and Tian, Y. and Xu, C.L. and Luo, J.B.", TITLE = "Cross Modality Bias in Visual Question Answering: A Causal View With Possible Worlds VQA", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8609-8624", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224601"} @article{bb229617, AUTHOR = "Bi, Y.D. and Jiang, H.J. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.", TITLE = "Fair Attention Network for Robust Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "7870-7881", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224602"} @article{bb229618, AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.", TITLE = "Integrating Neural-Symbolic Reasoning With Variational Causal Inference Network for Explanatory Visual Question Answering", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "7893-7908", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224603"} @inproceedings{bb229619, AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.", TITLE = "Variational Causal Inference Network for Explanatory Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2515-2525", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224604"} @article{bb229620, AUTHOR = "Xue, D. and Qian, S.S. and Fang, Q. and Xu, C.S.", TITLE = "LININ: Logic Integrated Neural Inference Network for Explanatory Visual Question Answering", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "16-27", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224605"} @article{bb229621, AUTHOR = "Shen, K. and Wu, L.F. and Tang, S.L. and Xu, F. and Long, B. and Zhuang, Y.T. and Pei, J.", TITLE = "Ask Questions With Double Hints: Visual Question Generation With Answer-Awareness and Region-Reference", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "9648-9660", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224606"} @article{bb229622, AUTHOR = "Yamada, M. and d'Amario, V. and Takemoto, K. and Boix, X. and Sasaki, T.", TITLE = "Transformer Module Networks for Systematic Generalization in Visual Question Answering", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "10096-10105", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224607"} @article{bb229623, AUTHOR = "Guo, Y.Y. and Jiao, F. and Shen, Z.Q. and Nie, L.Q. and Kankanhalli, M.", TITLE = "UNK-VQA: A Dataset and a Probe Into the Abstention Ability of Multi-Modal Large Models", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "10284-10296", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224608"} @article{bb229624, AUTHOR = "Chen, F.Y. and Tang, X.S. and Hao, K.R.", TITLE = "GEXMERT: Geometrically enhanced cross-modality encoder representations from transformers inspired by higher-order visual percepts", JOURNAL = PR, VOLUME = "158", YEAR = "2025", PAGES = "111047", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224609"} @article{bb229625, AUTHOR = "Zhang, B. and Li, J.X. and Shi, Y.C. and Han, Y. and Hu, Q.H.", TITLE = "VADS: Visuo-Adaptive DualStrike attack on visual question answer", JOURNAL = CVIU, VOLUME = "249", YEAR = "2024", PAGES = "104137", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224610"} @article{bb229626, AUTHOR = "Liu, J. and Xie, J. and Zhou, F.Y. and He, S.F.", TITLE = "Question Type-Aware Debiasing for Test-Time Visual Question Answering Model Adaptation", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "11", MONTH = "November", PAGES = "10805-10816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224611"} @article{bb229627, AUTHOR = "Cao, R.L. and Li, Z.X. and Tang, Z.J. and Zhang, C.L. and Ma, H.F.", TITLE = "Enhancing robust VQA via contrastive and self-supervised learning", JOURNAL = PR, VOLUME = "159", YEAR = "2025", PAGES = "111129", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224612"} @article{bb229628, AUTHOR = "Liu, F. and Dai, W.W. and Zhang, C.Y. and Zhu, J. and Yao, L. and Li, X.", TITLE = "Co-LLaVA: Efficient Remote Sensing Visual Question Answering via Model Collaboration", JOURNAL = RS, VOLUME = "17", YEAR = "2025", NUMBER = "3", PAGES = "466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224613"} @article{bb229629, AUTHOR = "Peng, D. and Li, Z.X.", TITLE = "Unbiased VQA via modal information interaction and question transformation", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111394", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224614"} @article{bb229630, AUTHOR = "Gao, Y.Q. and Bai, Z.W. and Zhou, M.L. and Jia, B.L. and Gao, P.Q. and Zhu, R.", TITLE = "Adaptive Conditional Reasoning for Remote Sensing Visual Question Answering", JOURNAL = RS, VOLUME = "17", YEAR = "2025", NUMBER = "8", PAGES = "1338", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224615"} @article{bb229631, AUTHOR = "Fan, L. and Gong, X. and Zheng, C.Y. and Tan, X.L. and Li, J. and Ou, Y.F.", TITLE = "Cycle-VQA: A Cycle-Consistent Framework for Robust Medical Visual Question Answering", JOURNAL = PR, VOLUME = "165", YEAR = "2025", PAGES = "111609", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224616"} @article{bb229632, AUTHOR = "Lin, Q. and He, K. and Zhu, Y.F. and Xu, F.Z. and Cambria, E. and Feng, M.L.", TITLE = "Cross-Modal Knowledge Diffusion-Based Generation for Difference-Aware Medical VQA", JOURNAL = IP, VOLUME = "34", YEAR = "2025", PAGES = "2421-2434", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224617"} @inproceedings{bb229633, AUTHOR = "Choudhary, N. and Goyal, P. and Siwatch, D. and Chandak, A. and Mahajan, H. and Khurana, V. and Kumar, Y.", TITLE = "AdQuestA: Knowledge-Guided Visual Question Answer Framework for Advertisements", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "5812-5821", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224618"} @inproceedings{bb229634, AUTHOR = "Zhao, X.Y. and Bai, Z.W. and Zhou, M.L. and Ren, X.C. and Wang, Y.Q. and Wang, L.C.", TITLE = "Integrating Dynamic Routing with Reinforcement Learning and Multimodal Techniques for Visual Question Answering", BOOKTITLE = ICIVC24, YEAR = "2024", PAGES = "295-301", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224619"} @inproceedings{bb229635, AUTHOR = "Ge, J.X. and Subramanian, S. and Shi, B. and Herzig, R. and Darrell, T.J.", TITLE = "Recursive Visual Programming", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIII: 1-18", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224620"} @inproceedings{bb229636, AUTHOR = "Lu, C. and Lu, Q. and Luo, J.", TITLE = "An Explainable Vision Question Answer Model via Diffusion Chain-of-thought", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXVII: 146-162", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224621"} @inproceedings{bb229637, AUTHOR = "Wang, H.B. and Ge, W.F.", TITLE = "Q&A Prompts: Discovering Rich Visual Clues through Mining Question-answer Prompts for VQA requiring Diverse World Knowledge", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLII: 274-292", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224622"} @inproceedings{bb229638, AUTHOR = "Liu, H. and Ma, X. and Zhong, C. and Zhang, Y. and Lin, W.Y.", TITLE = "Timecraft: Navigate Weakly-supervised Temporal Grounded Video Question Answering via Bi-directional Reasoning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "V: 92-107", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224623"} @inproceedings{bb229639, AUTHOR = "Park, K.R. and Lee, H.J. and Kim, J.U.", TITLE = "Learning Trimodal Relation for Audio-visual Question Answering with Missing Modality", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XV: 42-59", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224624"} @inproceedings{bb229640, AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.", TITLE = "Grace: Graph-based Contextual Debiasing for Fair Visual Question Answering", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XVII: 176-194", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224625"} @inproceedings{bb229641, AUTHOR = "Mishra, A. and Agarwala, A. and Tiwari, U. and Rajendiran, V.N. and Miriyala, S.S.", TITLE = "Efficient Visual Question Answering on Embedded Devices: Cross-Modality Attention with Evolutionary Quantization", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2142-2148", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224626"} @inproceedings{bb229642, AUTHOR = "Jiang, X. and Wang, G.M. and Guo, J.H. and Li, J.C. and Zhang, W.Q. and Lu, R.X. and Tang, S.L.", TITLE = "DIEM: Decomposition-Integration Enhancing Multimodal Insights", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27294-27303", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224627"} @inproceedings{bb229643, AUTHOR = "Liu, X.L. and Dong, Z.K. and Zhang, P.", TITLE = "Tackling Data Bias in MUSIC-AVQA: Crafting a Balanced Dataset for Unbiased Question-Answering", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "4466-4475", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224628"} @inproceedings{bb229644, AUTHOR = "Shi, X.X. and Lee, S.", TITLE = "Benchmarking Out-of-Distribution Detection in Visual Question Answering", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "5473-5483", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224629"} @inproceedings{bb229645, AUTHOR = "Venkataraman, S.R. and Rao, R.S. and Balasubramanian, S. and Sarma, R.R. and Vorugunti, C.S.", TITLE = "Can you even tell left from right? Presenting a new challenge for VQA", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "4486-4495", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224630"} @inproceedings{bb229646, AUTHOR = "Sahu, P.P. and Raut, A. and Samant, J.S. and Gorijala, M. and Lakshminarayanan, V. and Bhaskar, P.", TITLE = "POP-VQA: Privacy preserving, On-device, Personalized Visual Question Answering", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "8455-8464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224631"} @inproceedings{bb229647, AUTHOR = "Li, J.P. and Wei, P. and Han, W.J. and Fan, L.F.", TITLE = "IntentQA: Context-aware Video Intent Reasoning", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "11929-11940", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224632"} @inproceedings{bb229648, AUTHOR = "Hu, Y.S. and Hua, H. and Yang, Z.Y. and Shi, W.J. and Smith, N.A. and Luo, J.B.", TITLE = "PromptCap: Prompt-Guided Image Captioning for VQA with GPT-3", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2951-2963", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224633"} @inproceedings{bb229649, AUTHOR = "Reichman, B. and Heck, L.", TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2829-2834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224634"} @inproceedings{bb229650, AUTHOR = "Naik, N. and Potts, C. and Kreiss, E.", TITLE = "Context-VQA: Towards Context-Aware and Purposeful Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2813-2817", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224635"} @inproceedings{bb229651, AUTHOR = "Hu, Y.S. and Liu, B. and Kasai, J. and Wang, Y.Z. and Ostendorf, M. and Krishna, R. and Smith, N.A.", TITLE = "TIFA: Accurate and Interpretable Text-to-Image Faithfulness Evaluation with Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20349-20360", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224636"} @inproceedings{bb229652, AUTHOR = "Zhang, Y.W. and Ho, C.H. and Vasconcelos, N.M.", TITLE = "Toward Unsupervised Realistic Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15567-15578", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224637"} @inproceedings{bb229653, AUTHOR = "Liang, K. and Albanie, S.", TITLE = "Simple Baselines for Interactive Video Retrieval with Questions and Answers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "11057-11067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224638"} @inproceedings{bb229654, AUTHOR = "Mensink, T. and Uijlings, J. and Castrejon, L. and Goel, A. and Cadar, F. and Zhou, H. and Sha, F. and Araujo, A. and Ferrari, V.", TITLE = "Encyclopedic VQA: Visual questions about detailed properties of fine-grained categories", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "3090-3101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224639"} @inproceedings{bb229655, AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.", TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2941-2950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224640"} @inproceedings{bb229656, AUTHOR = "Bruni, P. and Falcon, A. and Radeva, P.", TITLE = "Time-aware Circulant Matrices for Question-based Temporal Localization", BOOKTITLE = CIAP23, YEAR = "2023", PAGES = "II:182-195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224641"} @inproceedings{bb229657, AUTHOR = "Ferreira, B.C.L. and Oliveira, H.G. and Silva, C.", TITLE = "Leveraging Question Answering for Domain-Agnostic Information Extraction", BOOKTITLE = CIARP23, YEAR = "2023", PAGES = "I:244-256", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224642"} @inproceedings{bb229658, AUTHOR = "Wu, Z.H. and Shu, X.Y. and Yan, S.Y. and Lu, Z.Y.", TITLE = "FGCVQA: Fine-Grained Cross-Attention for Medical VQA", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "975-979", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224643"} @inproceedings{bb229659, AUTHOR = "Zhu, H. and Togo, R. and Ogawa, T. and Haseyama, M.", TITLE = "Interpretable Visual Question Answering Referring to Outside Knowledge", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "2140-2144", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224644"} @inproceedings{bb229660, AUTHOR = "Parelli, M. and Mallis, D. and Diomataris, M. and Pitsikalis, V.", TITLE = "Interpretable Visual Question Answering Via Reasoning Supervision", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "2525-2529", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224645"} @inproceedings{bb229661, AUTHOR = "Hegde, S. and Jahagirdar, S. and Gangisetty, S.", TITLE = "Making the V in Text-VQA Matter", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5580-5588", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224646"} @inproceedings{bb229662, AUTHOR = "Suo, W. and Sun, M.Y. and Liu, W.S. and Gao, Y.Q. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "S3C: Semi-Supervised VQA Natural Language Explanation via Self-Critical Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2646-2656", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224647"} @inproceedings{bb229663, AUTHOR = "Alampalle, C. and Hegde, S. and Jahagirdar, S. and Gangisetty, S.", TITLE = "Weakly Supervised Visual Question Answer Generation", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5589-5597", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224648"} @inproceedings{bb229664, AUTHOR = "Jiang, J.J. and Zheng, N.N.", TITLE = "MixPHM: Redundancy-Aware Parameter-Efficient Tuning for Low-Resource Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "24203-24213", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224649"} @inproceedings{bb229665, AUTHOR = "Wang, Y. and Pfeiffer, J. and Carion, N. and Le Cun, Y.L. and Kamath, A.", TITLE = "Adapting Grounded Visual Question Answering Models to Low Resource Languages", BOOKTITLE = MULA23, YEAR = "2023", PAGES = "2596-2605", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224650"} @inproceedings{bb229666, AUTHOR = "Wang, M. and Mahjoubfar, A. and Joshi, A.", TITLE = "FashionVQA: A Domain-Specific Visual Question Answering System", BOOKTITLE = CVFAD23, YEAR = "2023", PAGES = "3514-3519", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224651"} @inproceedings{bb229667, AUTHOR = "Tascon Morales, S. and Marquez Neila, P. and Sznitman, R.", TITLE = "Logical Implications for Visual Question Answering Consistency", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6725-6735", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224652"} @inproceedings{bb229668, AUTHOR = "Chen, S. and Zhao, Q.", TITLE = "Divide and Conquer: Answering Questions with Object Factorization and Compositional Reasoning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6736-6745", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224653"} @inproceedings{bb229669, AUTHOR = "Basu, A. and Addepalli, S. and Babu, R.V.", TITLE = "RMLVQA: A Margin Loss Approach For Visual Question Answering with Language Biases", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11671-11680", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224654"} @inproceedings{bb229670, AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.", TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual Question Answering", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:658-674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224655"} @inproceedings{bb229671, AUTHOR = "Vivoli, E. and Biten, A.F. and Mafla, A. and Karatzas, D. and Gomez, L.", TITLE = "MUST-VQA: Multilingual Scene-Text VQA", BOOKTITLE = TextEvery22, YEAR = "2022", PAGES = "345-358", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224656"} @inproceedings{bb229672, AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.", TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal Interaction", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 255-266", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224657"} @inproceedings{bb229673, AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.", TITLE = "Health-Oriented Multimodal Food Question Answering", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 191-203", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224658"} @inproceedings{bb229674, AUTHOR = "Bongini, P. and Becattini, F. and del Bimbo, A.", TITLE = "Is GPT-3 All You Need for Visual Question Answering in Cultural Heritage?", BOOKTITLE = VisArt22, YEAR = "2022", PAGES = "268-281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224659"} @inproceedings{bb229675, AUTHOR = "Jha, A. and Patro, B. and Van Gool, L.J. and Tuytelaars, T.", TITLE = "Barlow constrained optimization for Visual Question Answering", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "1084-1093", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224660"} @inproceedings{bb229676, AUTHOR = "Ravi, S. and Chinchure, A. and Sigal, L. and Liao, R.J. and Shwartz, V.", TITLE = "VLC-BERT: Visual Question Answering with Contextualized Commonsense Knowledge", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "1155-1165", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224661"} @inproceedings{bb229677, AUTHOR = "Etesam, Y. and Kochiev, L. and Chang, A.X.", TITLE = "3DVQA: Visual Question Answering for 3D Environments", BOOKTITLE = CRV22, YEAR = "2022", PAGES = "233-240", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224662"} @inproceedings{bb229678, AUTHOR = "Ramamurthy, P. and Aakur, S.N.", TITLE = "ISD-QA: Iterative Distillation of Commonsense Knowledge from General Language Models for Unsupervised Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1229-1235", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224663"} @inproceedings{bb229679, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1471-1477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224664"} @inproceedings{bb229680, AUTHOR = "Liu, L. and Su, X.D. and Guo, H. and Zhu, D.", TITLE = "A Transformer-based Medical Visual Question Answering Model", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1712-1718", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224665"} @inproceedings{bb229681, AUTHOR = "Wu, X.Y. and Lu, J.F. and Li, Z.F. and Xiong, F.C.", TITLE = "Ques-to-Visual Guided Visual Question Answering", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "4193-4197", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224666"} @inproceedings{bb229682, AUTHOR = "Sarkar, A. and Rahnemoonfar, M.", TITLE = "Grad-Cam Aware Supervised Attention for Visual Question Answering for Post-Disaster Damage Assessment", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3783-3787", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224667"} @inproceedings{bb229683, AUTHOR = "Whitehead, S. and Petryk, S. and Shakib, V. and Gonzalez, J. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.", TITLE = "Reliable Visual Question Answering: Abstain Rather Than Answer Incorrectly", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:148-166", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224668"} @inproceedings{bb229684, AUTHOR = "Chen, L. and Zheng, Y.H. and Xiao, J.", TITLE = "Rethinking Data Augmentation for Robust Visual Question Answering", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:95-112", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224669"} @inproceedings{bb229685, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "Context Relation Fusion Model for Visual Question Answering", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2112-2116", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224670"} @inproceedings{bb229686, AUTHOR = "Biten, A.F. and Litman, R. and Xie, Y.S. and Appalaraju, S. and Manmatha, R.", TITLE = "LaTr: Layout-Aware Transformer for Scene-Text VQA", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16527-16537", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224671"} @inproceedings{bb229687, AUTHOR = "Lu, J.Y. and Ye, X. and Ren, Y. and Yang, Y.Z.", TITLE = "Good, Better, Best: Textual Distractors Generation for Multiple-Choice Visual Question Answering via Reinforcement Learning", BOOKTITLE = ODRUM22, YEAR = "2022", PAGES = "4917-4926", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224672"} @inproceedings{bb229688, AUTHOR = "Ding, Y.H. and Huang, Z. and Wang, R. and Zhang, Y.H. and Chen, X. and Ma, Y.Z. and Chung, H. and Han, S.C.", TITLE = "V-Doc: Visual questions answers with Documents", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "21460-21466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224673"} @inproceedings{bb229689, AUTHOR = "Azuma, D. and Miyanishi, T. and Kurita, S.H. and Kawanabe, M.", TITLE = "ScanQA: 3D Question Answering for Spatial Scene Understanding", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19107-19117", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224674"} @inproceedings{bb229690, AUTHOR = "Li, G.Y. and Wei, Y. and Tian, Y. and Xu, C.L. and Wen, J.R. and Hu, D.", TITLE = "Learning to Answer Questions in Dynamic Audio-Visual Scenarios", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19086-19096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224675"} @inproceedings{bb229691, AUTHOR = "Chen, C. and Anjum, S. and Gurari, D.", TITLE = "Grounding Answers for Visual Questions Asked by Visually Impaired People", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19076-19085", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224676"} @inproceedings{bb229692, AUTHOR = "Li, C.H. and Li, Z. and Jing, C.C. and Wu, Y.W. and Zhai, M.L. and Jia, Y.D.", TITLE = "Compositional Substitutivity of Visual Reasoning for Visual Question Answering", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLVIII: 143-160", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224677"} @inproceedings{bb229693, AUTHOR = "Jing, C.C. and Jia, Y.D. and Wu, Y.W. and Liu, X.Y. and Wu, Q.", TITLE = "Maintaining Reasoning Consistency in Compositional Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5089-5098", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224678"} @inproceedings{bb229694, AUTHOR = "Cascante Bonilla, P. and Wu, H. and Wang, L. and Feris, R.S. and Ordonez, V.", TITLE = "Sim VQA: Exploring Simulated Environments for Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5046-5056", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224679"} @inproceedings{bb229695, AUTHOR = "Gupta, V. and Li, Z.W. and Kortylewski, A. and Zhang, C.Y. and Li, Y.W. and Yuille, A.L.", TITLE = "SwapMix: Diagnosing and Regularizing the Over-Reliance on Visual Context in Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5068-5078", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224680"} @inproceedings{bb229696, AUTHOR = "Burghouts, G.J. and Huizinga, W.", TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional Refinement", BOOKTITLE = CIAP22, YEAR = "2022", PAGES = "II:418-428", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224681"} @inproceedings{bb229697, AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.", TITLE = "Contrast and Classify: Training Robust VQA Models", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1584-1593", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224682"} @inproceedings{bb229698, AUTHOR = "Han, X.Z. and Wang, S.H. and Su, C. and Huang, Q.M. and Tian, Q.", TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1564-1573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224683"} @inproceedings{bb229699, AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.", TITLE = "Beyond Question-Based Biases: Assessing Multimodal Shortcut Learning in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1554-1563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224684"}