@article{bb240500,
        AUTHOR = "Wang, Y.X. and Wei, B. and Liu, J. and Zhang, L.L. and Wang, J.X. and Wang, Q.Y.",
        TITLE = "DisAVR: Disentangled Adaptive Visual Reasoning Network for Diagram
Question Answering",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "4812-4827",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235415"}

@article{bb240501,
        AUTHOR = "Han, Y.D. and Yin, J.H. and Wu, J.L. and Wei, Y.W. and Nie, L.Q.",
        TITLE = "Semantic-Aware Modular Capsule Routing for Visual Question Answering",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5537-5549",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235416"}

@article{bb240502,
        AUTHOR = "Qian, T.W. and Chen, J.J. and Chen, S.X. and Wu, B. and Jiang, Y.G.",
        TITLE = "Scene Graph Refinement Network for Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3950-3961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235417"}

@article{bb240503,
        AUTHOR = "Qin, B.S. and Hu, H.J. and Zhuang, Y.T.",
        TITLE = "Deep Residual Weight-Sharing Attention Network With Low-Rank
Attention for Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "4282-4295",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235418"}

@article{bb240504,
        AUTHOR = "Zhou, S. and Guo, D. and Li, J. and Yang, X. and Wang, M.",
        TITLE = "Exploring Sparse Spatial Relation in Graph Inference for Text-Based
VQA",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5060-5074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235419"}

@article{bb240505,
        AUTHOR = "Liu, Y.H. and Wei, W. and Peng, D. and Mao, X.L. and He, Z.Y. and Zhou, P.",
        TITLE = "Depth-Aware and Semantic Guided Relational Attention Network for
Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "5344-5357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235420"}

@article{bb240506,
        AUTHOR = "Mao, A. and Yang, Z. and Lin, K. and Xuan, J. and Liu, Y.J.",
        TITLE = "Positional Attention Guided Transformer-Like Architecture for Visual
Question Answering",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6997-7009",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235421"}

@article{bb240507,
        AUTHOR = "Sun, H. and Wang, S. and Zhu, Y.Q. and Yuan, W. and Zou, Z.Q.",
        TITLE = "Question Classification for Intelligent Question Answering:
A Comprehensive Survey",
        JOURNAL = IJGI,
        VOLUME = "12",
        YEAR = "2023",
        NUMBER = "10",
        PAGES = "415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235422"}

@article{bb240508,
        AUTHOR = "Cao, B.W. and Cao, J.X. and Gui, J. and Shen, J.Y. and Liu, B. and He, L. and Tang, Y.Y. and Kwok, J.T.Y.",
        TITLE = "AlignVE: Visual Entailment Recognition Based on Alignment Relations",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7378-7387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235423"}

@article{bb240509,
        AUTHOR = "Yu, Z. and Jin, Z. and Yu, J. and Xu, M.L. and Wang, H.B. and Fan, J.P.",
        TITLE = "Bilaterally Slimmable Transformer for Elastic and Efficient Visual
Question Answering",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "9543-9556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235424"}

@article{bb240510,
        AUTHOR = "Huang, X.F. and Gong, H.F.",
        TITLE = "A Dual-Attention Learning Network With Word and Sentence Embedding
for Medical Visual Question Answering",
        JOURNAL = MedImg,
        VOLUME = "43",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "832-845",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235425"}

@article{bb240511,
        AUTHOR = "Song, Y.G. and Yang, X.S. and Wang, Y.W. and Xu, C.S.",
        TITLE = "Recovering Generalization via Pre-Training-Like Knowledge
Distillation for Out-of-Distribution Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "837-851",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235426"}

@article{bb240512,
        AUTHOR = "Wu, S. and Zhao, G.S. and Qian, X.M.",
        TITLE = "Resolving Zero-Shot and Fact-Based Visual Question Answering via
Enhanced Fact Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1790-1800",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235427"}

@article{bb240513,
        AUTHOR = "Wen, Z.Q. and Niu, S.C. and Li, G. and Wu, Q.Y. and Tan, M.K. and Wu, Q.",
        TITLE = "Test-Time Model Adaptation for Visual Question Answering With
Debiased Self-Supervisions",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2137-2147",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235428"}

@article{bb240514,
        AUTHOR = "Huai, T.Y. and Yang, S.W. and Zhang, J.H. and Zhao, J.B. and He, L.",
        TITLE = "Debiased Visual Question Answering via the perspective of question
types",
        JOURNAL = PRL,
        VOLUME = "178",
        YEAR = "2024",
        PAGES = "181-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235429"}

@article{bb240515,
        AUTHOR = "Xu, N. and Lu, Z. and Tian, H.S. and Kang, R.B. and Cao, J. and Zhang, Y.D. and Liu, A.A.",
        TITLE = "Learning to Supervise Knowledge Retrieval Over a Tree Structure for
Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "6689-6700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235430"}

@article{bb240516,
        AUTHOR = "Pan, Y.H. and Liu, J. and Jin, L. and Li, Z.C.",
        TITLE = "Unbiased Visual Question Answering by Leveraging Instrumental
Variable",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "6648-6662",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235431"}

@article{bb240517,
        AUTHOR = "Wang, Q. and Liu, J. and Wu, W.J.",
        TITLE = "Coordinating explicit and implicit knowledge for knowledge-based VQA",
        JOURNAL = PR,
        VOLUME = "151",
        YEAR = "2024",
        PAGES = "110368",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235432"}

@article{bb240518,
        AUTHOR = "Wei, M. and Chen, L. and Ji, W. and Yue, X.Y. and Zimmermann, R.",
        TITLE = "In Defense of Clip-Based Video Relation Detection",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "2759-2769",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235433"}

@article{bb240519,
        AUTHOR = "Ma, J. and Liu, J. and Chai, Q. and Wang, P.H. and Tao, J.",
        TITLE = "Diagram Perception Networks for Textbook Question Answering via Joint
Optimization",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1578-1591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235434"}

@article{bb240520,
        AUTHOR = "Li, L.J. and Jin, T. and Lin, W. and Jiang, H. and Pan, W.W. and Wang, J. and Xiao, S.W. and Xia, Y. and Jiang, W.H. and Zhao, Z.",
        TITLE = "Multi-Granularity Relational Attention Network for Audio-Visual
Question Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "7080-7094",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235435"}

@article{bb240521,
        AUTHOR = "Bi, Y.D. and Jiang, H.J. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
        TITLE = "Fair Attention Network for Robust Visual Question Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "7870-7881",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235436"}

@article{bb240522,
        AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.",
        TITLE = "Integrating Neural-Symbolic Reasoning With Variational Causal
Inference Network for Explanatory Visual Question Answering",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "7893-7908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235437"}

@inproceedings{bb240523,
        AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.",
        TITLE = "Variational Causal Inference Network for Explanatory Visual Question
Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2515-2525",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235438"}

@article{bb240524,
        AUTHOR = "Xue, D. and Qian, S.S. and Fang, Q. and Xu, C.S.",
        TITLE = "LININ: Logic Integrated Neural Inference Network for Explanatory
Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "16-27",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235439"}

@article{bb240525,
        AUTHOR = "Shen, K. and Wu, L.F. and Tang, S.L. and Xu, F.L. and Long, B. and Zhuang, Y.T. and Pei, J.",
        TITLE = "Ask Questions With Double Hints: Visual Question Generation With
Answer-Awareness and Region-Reference",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9648-9660",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235440"}

@article{bb240526,
        AUTHOR = "Yamada, M. and d'Amario, V. and Takemoto, K. and Boix, X. and Sasaki, T.",
        TITLE = "Transformer Module Networks for Systematic Generalization in Visual
Question Answering",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10096-10105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235441"}

@article{bb240527,
        AUTHOR = "Liu, J. and Xie, J. and Zhou, F.Y. and He, S.F.",
        TITLE = "Question Type-Aware Debiasing for Test-Time Visual Question Answering
Model Adaptation",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10805-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235442"}

@article{bb240528,
        AUTHOR = "Cao, R.L. and Li, Z.X. and Tang, Z.J. and Zhang, C.L. and Ma, H.F.",
        TITLE = "Enhancing robust VQA via contrastive and self-supervised learning",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235443"}

@article{bb240529,
        AUTHOR = "Liu, F. and Dai, W.W. and Zhang, C.Y. and Zhu, J. and Yao, L. and Li, X.",
        TITLE = "Co-LLaVA: Efficient Remote Sensing Visual Question Answering via
Model Collaboration",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "3",
        PAGES = "466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235444"}

@article{bb240530,
        AUTHOR = "Gao, Y.Q. and Bai, Z.W. and Zhou, M.L. and Jia, B.L. and Gao, P.Q. and Zhu, R.",
        TITLE = "Adaptive Conditional Reasoning for Remote Sensing Visual Question
Answering",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "8",
        PAGES = "1338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235445"}

@article{bb240531,
        AUTHOR = "Lan, N. and Ou, B.S. and Xie, X.M. and Shi, G.M.",
        TITLE = "Visual Environment-Interactive Planning for Embodied Complex-Question
Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6481-6493",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235446"}

@article{bb240532,
        AUTHOR = "Welde, T.M. and Liao, L.",
        TITLE = "Counting in Visual Question Answering:
Methods, Datasets, and Future Work",
        JOURNAL = IJIG,
        VOLUME = "25",
        YEAR = "2025",
        NUMBER = "5",
        MONTH = "September",
        PAGES = "2550044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235447"}

@article{bb240533,
        AUTHOR = "Bi, C. and Wang, S.H. and Li, N. and Huang, Q.M.",
        TITLE = "Inferential and Commonsense Visual Question Generation",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "7796-7809",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235448"}

@article{bb240534,
        AUTHOR = "Zhao, Z.C. and Zhou, C.F. and Zhang, Y. and Li, C.L. and Ma, X.L. and Tang, J.",
        TITLE = "Text-Guided Coarse-to-Fine Fusion Network for robust remote sensing
visual question answering",
        JOURNAL = PandRS,
        VOLUME = "230",
        YEAR = "2025",
        PAGES = "1-17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235449"}

@article{bb240535,
        AUTHOR = "Lu, Z. and Zeng, Q.J. and Lu, M.K. and Chen, G. and Xia, Y.",
        TITLE = "Bridging the Semantic Gap in Medical Visual Question Answering with
Prompt Learning",
        JOURNAL = MedImg,
        VOLUME = "44",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4605-4616",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235450"}

@article{bb240536,
        AUTHOR = "Du, Z. and Yuan, Z.Q. and Wu, X. and Xu, C.S.",
        TITLE = "Disentanglement-Based Equivariant Learning for Compositional VQA",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "8160-8173",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235451"}

@article{bb240537,
        AUTHOR = "Jin, Z.X. and Qin, P. and Lin, S. and Qin, J. and Zhai, S.J. and Zeng, J.C. and Yin, X.C.",
        TITLE = "Robust scene text understanding with OCR token and word alignment for
Text-VQA and text-caption",
        JOURNAL = PR,
        VOLUME = "172",
        YEAR = "2026",
        PAGES = "112362",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235452"}

@article{bb240538,
        AUTHOR = "Mao, A. and Wen, S.Y. and Chen, F. and Yi, R. and Liu, Y.J.",
        TITLE = "Robust 3D Visual Question Answering via Bias Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "12492-12507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235453"}

@article{bb240539,
        AUTHOR = "Liu, J. and Zhang, L.Z. and Mu, C. and Lu, G.X. and Zhang, B. and Li, J.S.",
        TITLE = "Question-Guided Multigranular Visual Augmentation for Knowledge-Based
Visual Question Answering",
        JOURNAL = CVIU,
        VOLUME = "263",
        YEAR = "2026",
        PAGES = "104569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235454"}

@article{bb240540,
        AUTHOR = "Zhang, X.Y. and Zhang, L.L. and Wu, Y.R. and Wang, S.W. and Wu, W.J. and Huang, M. and Wang, Q.Y. and Liu, J.",
        TITLE = "Memory-enriched thought-by-thought framework for complex Diagram
Question Answering",
        JOURNAL = CVIU,
        VOLUME = "264",
        YEAR = "2026",
        PAGES = "104608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235455"}

@article{bb240541,
        AUTHOR = "Chen, T.Y. and Liu, H.B. and Wang, Y. and Chen, Y.H. and He, T.Y. and Gan, C.F. and He, H.Y. and Lin, W.Y.",
        TITLE = "MECD+: Unlocking Event-Level Causal Graph Discovery for Video
Reasoning",
        JOURNAL = PAMI,
        VOLUME = "48",
        YEAR = "2026",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2628-2645",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235456"}

@article{bb240542,
        AUTHOR = "Liang, J.F. and Jiang, S.X. and Tang, W. and Wang, N. and Wang, Z.K. and Mao, X. and Lv, K. and Liu, M. and Qin, B.",
        TITLE = "APSam: An Aggregating-Then-Pruning Sampler for Question-Conditional
Denoising",
        JOURNAL = CirSysVideo,
        VOLUME = "36",
        YEAR = "2026",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1754-1765",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235457"}

@article{bb240543,
        AUTHOR = "Wang, F.J. and Liu, J.Y. and Zhang, R.N. and Li, Z.X. and Zhang, F. and An, G.Y.",
        TITLE = "CKCR: Context-aware knowledge construction and retrieval for
knowledge-based visual question answering",
        JOURNAL = JVCIR,
        VOLUME = "116",
        YEAR = "2026",
        PAGES = "104711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235458"}

@inproceedings{bb240544,
        AUTHOR = "Ren, H.Y. and Chen, W. and Liu, C.P. and Ji, Y. and Li, Y.",
        TITLE = "MedKI: Knowledge Dual Injections for Medical Visual Question
Answering",
        BOOKTITLE = ICIP25,
        YEAR = "2025",
        PAGES = "79-84",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235459"}

@inproceedings{bb240545,
        AUTHOR = "Shah, K. and J, S. and Bhutani, G. and Singh, K. and Singh, S.",
        TITLE = "Visual Prompting Through Image Mines",
        BOOKTITLE = ICIP25,
        YEAR = "2025",
        PAGES = "713-718",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235460"}

@inproceedings{bb240546,
        AUTHOR = "He, Z.L. and Pan, Y.J. and Li, H. and Ma, F.P. and Peng, Y.S. and Wu, S.Y. and Sun, X.Y.",
        TITLE = "Enhancing Visual Question Answering Via Clustered In-Context Sequence
Configuration",
        BOOKTITLE = ICIP25,
        YEAR = "2025",
        PAGES = "935-940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235461"}

@inproceedings{bb240547,
        AUTHOR = "Mijiyawa, A. and Sadat, F.",
        TITLE = "Visual Question Answering Using Multimodal Data Augmentation for
Hausa",
        BOOKTITLE = IVCNZ25,
        YEAR = "2025",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235462"}

@inproceedings{bb240548,
        AUTHOR = "Cheng, Y. and Goel, A. and Bilen, H.",
        TITLE = "Visually Interpretable Subtask Reasoning for Visual Question
Answering",
        BOOKTITLE = XAI4CV25,
        YEAR = "2025",
        PAGES = "2751-2771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235463"}

@inproceedings{bb240549,
        AUTHOR = "Jang, Y. and Kong, H. and Kim, G. and Lee, Y. and Choi, J. and Bae, K.",
        TITLE = "ICT-QA: Question Answering Over Multi-Modal Contexts Including Image,
Chart, and Text Modalities",
        BOOKTITLE = "MULA25",
        YEAR = "2025",
        PAGES = "138-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235464"}

@inproceedings{bb240550,
        AUTHOR = "Li, K. and Vosselman, G. and Yang, M.Y.",
        TITLE = "Multimodal Rationales for Explainable Visual Question Answering",
        BOOKTITLE = "MULA25",
        YEAR = "2025",
        PAGES = "191-201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235465"}

@inproceedings{bb240551,
        AUTHOR = "Zou, Y.H. and Yin, Z.Z.",
        TITLE = "MVCM: Enhancing Multi-View and Cross-Modality Alignment for Medical
Visual Question Answering and Medical Image-Text Retrieval",
        BOOKTITLE = "MULA25",
        YEAR = "2025",
        PAGES = "180-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235466"}

@inproceedings{bb240552,
        AUTHOR = "Luo, J.Z. and Liu, Y. and Chen, W.X. and Li, Z. and Wang, Y.W. and Li, G.B. and Lin, L.",
        TITLE = "DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14169-14178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235467"}

@inproceedings{bb240553,
        AUTHOR = "Zou, Y.H. and Yin, Z.Z.",
        TITLE = "Alignment, Mining and Fusion: Representation Alignment with Hard
Negative Mining and Selective Knowledge Fusion for Medical Visual
Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29623-29633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235468"}

@inproceedings{bb240554,
        AUTHOR = "Liu, H. and Ilievski, F. and Snoek, C.G.M.",
        TITLE = "Commonsense Video Question Answering through Video-Grounded
Entailment Tree Reasoning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3262-3271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235469"}

@inproceedings{bb240555,
        AUTHOR = "Choudhary, N. and Goyal, P. and Siwatch, D. and Chandak, A. and Mahajan, H. and Khurana, V. and Kumar, Y.",
        TITLE = "AdQuestA: Knowledge-Guided Visual Question Answer Framework for
Advertisements",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5812-5821",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235470"}

@inproceedings{bb240556,
        AUTHOR = "Ge, J.X. and Subramanian, S. and Shi, B.F. and Herzig, R. and Darrell, T.J.",
        TITLE = "Recursive Visual Programming",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIII: 1-18",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235471"}

@inproceedings{bb240557,
        AUTHOR = "Lu, C. and Lu, Q. and Luo, J.",
        TITLE = "An Explainable Vision Question Answer Model via Diffusion
Chain-of-thought",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVII: 146-162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235472"}

@inproceedings{bb240558,
        AUTHOR = "Wang, H.B. and Ge, W.F.",
        TITLE = "Q&A Prompts: Discovering Rich Visual Clues through Mining
Question-answer Prompts for VQA requiring Diverse World Knowledge",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLII: 274-292",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235473"}

@inproceedings{bb240559,
        AUTHOR = "Liu, H. and Ma, X. and Zhong, C. and Zhang, Y. and Lin, W.Y.",
        TITLE = "Timecraft: Navigate Weakly-supervised Temporal Grounded Video Question
Answering via Bi-directional Reasoning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "V: 92-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235474"}

@inproceedings{bb240560,
        AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
        TITLE = "Grace: Graph-based Contextual Debiasing for Fair Visual Question
Answering",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVII: 176-194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235475"}

@inproceedings{bb240561,
        AUTHOR = "Liu, X.L. and Dong, Z.K. and Zhang, P.",
        TITLE = "Tackling Data Bias in MUSIC-AVQA: Crafting a Balanced Dataset for
Unbiased Question-Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "4466-4475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235476"}

@inproceedings{bb240562,
        AUTHOR = "Shi, X.X. and Lee, S.",
        TITLE = "Benchmarking Out-of-Distribution Detection in Visual Question
Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5473-5483",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235477"}

@inproceedings{bb240563,
        AUTHOR = "Venkataraman, S.R. and Rao, R.S. and Balasubramanian, S. and Sarma, R.R. and Vorugunti, C.S.",
        TITLE = "Can you even tell left from right? Presenting a new challenge for VQA",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "4486-4495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235478"}

@inproceedings{bb240564,
        AUTHOR = "Sahu, P.P. and Raut, A. and Samant, J.S. and Gorijala, M. and Lakshminarayanan, V. and Bhaskar, P.",
        TITLE = "POP-VQA: Privacy preserving, On-device, Personalized Visual Question
Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "8455-8464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235479"}

@inproceedings{bb240565,
        AUTHOR = "Li, J.P. and Wei, P. and Han, W.J. and Fan, L.F.",
        TITLE = "IntentQA: Context-aware Video Intent Reasoning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11929-11940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235480"}

@inproceedings{bb240566,
        AUTHOR = "Hu, Y.S. and Hua, H. and Yang, Z.Y. and Shi, W.J. and Smith, N.A. and Luo, J.B.",
        TITLE = "PromptCap: Prompt-Guided Image Captioning for VQA with GPT-3",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2951-2963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235481"}

@inproceedings{bb240567,
        AUTHOR = "Naik, N. and Potts, C. and Kreiss, E.",
        TITLE = "Context-VQA: Towards Context-Aware and Purposeful Visual Question
Answering",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2813-2817",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235482"}

@inproceedings{bb240568,
        AUTHOR = "Hu, Y.S. and Liu, B. and Kasai, J. and Wang, Y.Z. and Ostendorf, M. and Krishna, R. and Smith, N.A.",
        TITLE = "TIFA: Accurate and Interpretable Text-to-Image Faithfulness
Evaluation with Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20349-20360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235483"}

@inproceedings{bb240569,
        AUTHOR = "Zhang, Y.W. and Ho, C.H. and Vasconcelos, N.M.",
        TITLE = "Toward Unsupervised Realistic Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15567-15578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235484"}

@inproceedings{bb240570,
        AUTHOR = "Liang, K. and Albanie, S.",
        TITLE = "Simple Baselines for Interactive Video Retrieval with Questions and
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11057-11067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235485"}

@inproceedings{bb240571,
        AUTHOR = "Mensink, T. and Uijlings, J. and Castrejon, L. and Goel, A. and Cadar, F. and Zhou, H. and Sha, F. and Araujo, A. and Ferrari, V.",
        TITLE = "Encyclopedic VQA: Visual questions about detailed properties of
fine-grained categories",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3090-3101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235486"}

@inproceedings{bb240572,
        AUTHOR = "Bruni, P. and Falcon, A. and Radeva, P.",
        TITLE = "Time-aware Circulant Matrices for Question-based Temporal Localization",
        BOOKTITLE = CIAP23,
        YEAR = "2023",
        PAGES = "II:182-195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235487"}

@inproceedings{bb240573,
        AUTHOR = "Ferreira, B.C.L. and Oliveira, H.G. and Silva, C.",
        TITLE = "Leveraging Question Answering for Domain-Agnostic Information
Extraction",
        BOOKTITLE = CIARP23,
        YEAR = "2023",
        PAGES = "I:244-256",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235488"}

@inproceedings{bb240574,
        AUTHOR = "Wu, Z.H. and Shu, X.Y. and Yan, S.Y. and Lu, Z.Y.",
        TITLE = "FGCVQA: Fine-Grained Cross-Attention for Medical VQA",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "975-979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235489"}

@inproceedings{bb240575,
        AUTHOR = "Zhu, H. and Togo, R. and Ogawa, T. and Haseyama, M.",
        TITLE = "Interpretable Visual Question Answering Referring to Outside
Knowledge",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "2140-2144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235490"}

@inproceedings{bb240576,
        AUTHOR = "Parelli, M. and Mallis, D. and Diomataris, M. and Pitsikalis, V.",
        TITLE = "Interpretable Visual Question Answering Via Reasoning Supervision",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "2525-2529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235491"}

@inproceedings{bb240577,
        AUTHOR = "Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
        TITLE = "Making the V in Text-VQA Matter",
        BOOKTITLE = ODRUM23,
        YEAR = "2023",
        PAGES = "5580-5588",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235492"}

@inproceedings{bb240578,
        AUTHOR = "Suo, W. and Sun, M.Y. and Liu, W.S. and Gao, Y.Q. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "S3C: Semi-Supervised VQA Natural Language Explanation via
Self-Critical Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2646-2656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235493"}

@inproceedings{bb240579,
        AUTHOR = "Alampalle, C. and Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
        TITLE = "Weakly Supervised Visual Question Answer Generation",
        BOOKTITLE = ODRUM23,
        YEAR = "2023",
        PAGES = "5589-5597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235494"}

@inproceedings{bb240580,
        AUTHOR = "Jiang, J.J. and Zheng, N.N.",
        TITLE = "MixPHM: Redundancy-Aware Parameter-Efficient Tuning for Low-Resource
Visual Question Answering",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "24203-24213",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235495"}

@inproceedings{bb240581,
        AUTHOR = "Wang, Y. and Pfeiffer, J. and Carion, N. and Le Cun, Y.L. and Kamath, A.",
        TITLE = "Adapting Grounded Visual Question Answering Models to Low Resource
Languages",
        BOOKTITLE = MULA23,
        YEAR = "2023",
        PAGES = "2596-2605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235496"}

@inproceedings{bb240582,
        AUTHOR = "Wang, M. and Mahjoubfar, A. and Joshi, A.",
        TITLE = "FashionVQA: A Domain-Specific Visual Question Answering System",
        BOOKTITLE = CVFAD23,
        YEAR = "2023",
        PAGES = "3514-3519",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235497"}

@inproceedings{bb240583,
        AUTHOR = "Tascon Morales, S. and Marquez Neila, P. and Sznitman, R.",
        TITLE = "Logical Implications for Visual Question Answering Consistency",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6725-6735",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235498"}

@inproceedings{bb240584,
        AUTHOR = "Chen, S. and Zhao, Q.",
        TITLE = "Divide and Conquer: Answering Questions with Object Factorization and
Compositional Reasoning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6736-6745",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235499"}

@inproceedings{bb240585,
        AUTHOR = "Basu, A. and Addepalli, S. and Babu, R.V.",
        TITLE = "RMLVQA: A Margin Loss Approach For Visual Question Answering with
Language Biases",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11671-11680",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235500"}

@inproceedings{bb240586,
        AUTHOR = "Vivoli, E. and Biten, A.F. and Mafla, A. and Karatzas, D. and Gomez, L.",
        TITLE = "MUST-VQA: Multilingual Scene-Text VQA",
        BOOKTITLE = TextEvery22,
        YEAR = "2022",
        PAGES = "345-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235501"}

@inproceedings{bb240587,
        AUTHOR = "Bongini, P. and Becattini, F. and del Bimbo, A.",
        TITLE = "Is GPT-3 All You Need for Visual Question Answering in Cultural
Heritage?",
        BOOKTITLE = VisArt22,
        YEAR = "2022",
        PAGES = "268-281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235502"}

@inproceedings{bb240588,
        AUTHOR = "Jha, A. and Patro, B. and Van Gool, L.J. and Tuytelaars, T.",
        TITLE = "Barlow constrained optimization for Visual Question Answering",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1084-1093",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235503"}

@inproceedings{bb240589,
        AUTHOR = "Ravi, S. and Chinchure, A. and Sigal, L. and Liao, R.J. and Shwartz, V.",
        TITLE = "VLC-BERT: Visual Question Answering with Contextualized Commonsense
Knowledge",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1155-1165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235504"}

@inproceedings{bb240590,
        AUTHOR = "Etesam, Y. and Kochiev, L. and Chang, A.X.",
        TITLE = "3DVQA: Visual Question Answering for 3D Environments",
        BOOKTITLE = CRV22,
        YEAR = "2022",
        PAGES = "233-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235505"}

@inproceedings{bb240591,
        AUTHOR = "Ramamurthy, P. and Aakur, S.N.",
        TITLE = "ISD-QA: Iterative Distillation of Commonsense Knowledge from General
Language Models for Unsupervised Question Answering",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1229-1235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235506"}

@inproceedings{bb240592,
        AUTHOR = "Liu, L. and Su, X.D. and Guo, H. and Zhu, D.",
        TITLE = "A Transformer-based Medical Visual Question Answering Model",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1712-1718",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235507"}

@inproceedings{bb240593,
        AUTHOR = "Wu, X.Y. and Lu, J.F. and Li, Z.F. and Xiong, F.C.",
        TITLE = "Ques-to-Visual Guided Visual Question Answering",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "4193-4197",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235508"}

@inproceedings{bb240594,
        AUTHOR = "Sarkar, A. and Rahnemoonfar, M.",
        TITLE = "Grad-Cam Aware Supervised Attention for Visual Question Answering for
Post-Disaster Damage Assessment",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3783-3787",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235509"}

@inproceedings{bb240595,
        AUTHOR = "Whitehead, S. and Petryk, S. and Shakib, V. and Gonzalez, J. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
        TITLE = "Reliable Visual Question Answering: Abstain Rather Than Answer
Incorrectly",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:148-166",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235510"}

@inproceedings{bb240596,
        AUTHOR = "Chen, L. and Zheng, Y.H. and Xiao, J.",
        TITLE = "Rethinking Data Augmentation for Robust Visual Question Answering",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:95-112",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235511"}

@inproceedings{bb240597,
        AUTHOR = "Zhang, H.T. and Wu, W.",
        TITLE = "Context Relation Fusion Model for Visual Question Answering",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2112-2116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235512"}

@inproceedings{bb240598,
        AUTHOR = "Biten, A.F. and Litman, R. and Xie, Y.S. and Appalaraju, S. and Manmatha, R.",
        TITLE = "LaTr: Layout-Aware Transformer for Scene-Text VQA",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16527-16537",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235513"}

@inproceedings{bb240599,
        AUTHOR = "Lu, J.Y. and Ye, X. and Ren, Y. and Yang, Y.Z.",
        TITLE = "Good, Better, Best: Textual Distractors Generation for
Multiple-Choice Visual Question Answering via Reinforcement Learning",
        BOOKTITLE = ODRUM22,
        YEAR = "2022",
        PAGES = "4917-4926",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT235514"}

Last update:Apr 6, 2026 at 11:28:57