@article{bb229500, AUTHOR = "Agrawal, A. and Lu, J. and Antol, S. and Mitchell, M. and Zitnick, C.L. and Parikh, D. and Batra, D.", TITLE = "VQA: Visual Question Answering", JOURNAL = IJCV, VOLUME = "123", YEAR = "2017", NUMBER = "1", MONTH = "May", PAGES = "4-31", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224486"} @article{bb229501, AUTHOR = "Lioutas, V. and Passalis, N. and Tefas, A.", TITLE = "Explicit ensemble attention learning for improving visual question answering", JOURNAL = PRL, VOLUME = "111", YEAR = "2018", PAGES = "51-57", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224487"} @article{bb229502, AUTHOR = "Garg, S. and Srivastava, R.", TITLE = "Object sequences: encoding categorical and spatial information for a yes/no visual question answering task", JOURNAL = IET-CV, VOLUME = "12", YEAR = "2018", NUMBER = "8", MONTH = "December", PAGES = "1141-1150", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224488"} @article{bb229503, AUTHOR = "Goyal, Y. and Khot, T. and Agrawal, A. and Summers Stay, D. and Batra, D. and Parikh, D.", TITLE = "Making the V in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering", JOURNAL = IJCV, VOLUME = "127", YEAR = "2019", NUMBER = "4", MONTH = "April", PAGES = "398-414", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224489"} @inproceedings{bb229504, AUTHOR = "Goyal, Y. and Khot, T. and Summers Stay, D. and Batra, D. and Parikh, D.", TITLE = "Making the V in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "6325-6334", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224489"} @article{bb229505, AUTHOR = "Fang, Z.W. and Liu, J. and Li, Y. and Qiao, Y.Y. and Lu, H.Q.", TITLE = "Improving visual question answering using dropout and enhanced question encoder", JOURNAL = PR, VOLUME = "90", YEAR = "2019", PAGES = "404-414", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224490"} @article{bb229506, AUTHOR = "Osman, A. and Samek, W.", TITLE = "DRAU: Dual Recurrent Attention Units for Visual Question Answering", JOURNAL = CVIU, VOLUME = "185", YEAR = "2019", PAGES = "24-30", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224491"} @article{bb229507, AUTHOR = "Toor, A.S. and Wechsler, H. and Nappi, M.", TITLE = "Biometric surveillance using visual question answering", JOURNAL = PRL, VOLUME = "126", YEAR = "2019", PAGES = "111-118", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224492"} @article{bb229508, AUTHOR = "Li, W.W. and Song, M.M. and Tian, Y.Y.", TITLE = "An Ontology-Driven Cyberinfrastructure for Intelligent Spatiotemporal Question Answering and Open Knowledge Discovery", JOURNAL = IJGI, VOLUME = "8", YEAR = "2019", NUMBER = "11", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224493"} @article{bb229509, AUTHOR = "Xi, Y.L. and Zhang, Y.N. and Ding, S.T. and Wan, S.H.", TITLE = "Visual Question Answering Model Based on Visual Relationship Detection", JOURNAL = SP:IC, VOLUME = "80", YEAR = "2020", PAGES = "115648", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224494"} @article{bb229510, AUTHOR = "Wu, Y. and Jiang, L. and Yang, Y.", TITLE = "Revisiting EmbodiedQA: A Simple Baseline and Beyond", JOURNAL = IP, VOLUME = "29", YEAR = "2020", PAGES = "3984-3992", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224495"} @article{bb229511, AUTHOR = "Huang, C.R. and Yao, L. and Wang, X.Z. and Benatallah, B. and Zhang, X.", TITLE = "Software expert discovery via knowledge domain embeddings in a collaborative network", JOURNAL = PRL, VOLUME = "130", YEAR = "2020", PAGES = "46-53", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224496"} @article{bb229512, AUTHOR = "Li, W. and Sun, J.H. and Liu, G. and Zhao, L. and Fang, X.Z.", TITLE = "Visual question answering with attention transfer and a cross-modal gating mechanism", JOURNAL = PRL, VOLUME = "133", YEAR = "2020", PAGES = "334-340", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224497"} @article{bb229513, AUTHOR = "Messina, N. and Amato, G. and Carrara, F. and Falchi, F. and Gennaro, C.", TITLE = "Learning visual features for relational CBIR", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "2", MONTH = "June", PAGES = "113-124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224498"} @inproceedings{bb229514, AUTHOR = "Messina, N. and Amato, G. and Carrara, F. and Falchi, F. and Gennaro, C.", TITLE = "Learning Relationship-Aware Visual Features", BOOKTITLE = CEFR-LCV18, YEAR = "2018", PAGES = "IV:486-501", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224499"} @inproceedings{bb229515, AUTHOR = "Methani, N. and Ganguly, P. and Khapra, M.M. and Kumar, P.", TITLE = "PlotQA: Reasoning over Scientific Plots", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1516-1525", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224500"} @article{bb229516, AUTHOR = "Yu, J. and Zhu, Z.H. and Wang, Y.J. and Zhang, W.F. and Hu, Y. and Tan, J.L.", TITLE = "Cross-modal knowledge reasoning for knowledge-based visual question answering", JOURNAL = PR, VOLUME = "108", YEAR = "2020", PAGES = "107563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224501"} @inproceedings{bb229517, AUTHOR = "Yang, Z.Q. and Qin, Z.C. and Yu, J. and Wan, T.", TITLE = "Prior Visual Relationship Reasoning For Visual Question Answering", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "1411-1415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224502"} @article{bb229518, AUTHOR = "Farazi, M.R. and Khan, S.H. and Barnes, N.M.", TITLE = "From known to the unknown: Transferring knowledge to answer questions about novel visual and semantic concepts", JOURNAL = IVC, VOLUME = "103", YEAR = "2020", PAGES = "103985", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224503"} @article{bb229519, AUTHOR = "Terao, K. and Tamaki, T. and Raytchev, B. and Kaneda, K. and Satoh, S.", TITLE = "Rephrasing Visual Questions by Specifying the Entropy of the Answer Distribution", JOURNAL = IEICE, VOLUME = "E103-D", YEAR = "2020", NUMBER = "11", MONTH = "November", PAGES = "2362-2370", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224504"} @article{bb229520, AUTHOR = "Yu, J. and Zhang, W.F. and Lu, Y.H. and Qin, Z.C. and Hu, Y. and Tan, J.L. and Wu, Q.", TITLE = "Reasoning on the Relation: Enhancing Visual Representation for Visual Question Answering and Cross-Modal Retrieval", JOURNAL = MultMed, VOLUME = "22", YEAR = "2020", NUMBER = "12", MONTH = "December", PAGES = "3196-3209", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224505"} @article{bb229521, AUTHOR = "Lobry, S. and Marcos, D. and Murray, J. and Tuia, D.", TITLE = "RSVQA: Visual Question Answering for Remote Sensing Data", JOURNAL = GeoRS, VOLUME = "58", YEAR = "2020", NUMBER = "12", MONTH = "December", PAGES = "8555-8566", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224506"} @inproceedings{bb229522, AUTHOR = "Faure, M. and Lobry, S. and Kurtz, C. and Wendling, L.", TITLE = "Embedding Spatial Relations in Visual Question Answering for Remote Sensing", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "310-316", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224507"} @inproceedings{bb229523, AUTHOR = "Chappuis, C. and Zermatten, V. and Lobry, S. and Le Saux, B. and Tuia, D.", TITLE = "Prompt-RSVQA: Prompting visual context to a language model for Remote Sensing Visual Question Answering", BOOKTITLE = EarthVision22, YEAR = "2022", PAGES = "1371-1380", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224508"} @article{bb229524, AUTHOR = "Sun, B. and Yao, Z. and Zhang, Y.H. and Yu, L.J.", TITLE = "Local relation network with multilevel attention for visual question answering", JOURNAL = JVCIR, VOLUME = "73", YEAR = "2020", PAGES = "102762", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224509"} @article{bb229525, AUTHOR = "Li, X. and Yuan, A. and Lu, X.", TITLE = "Vision-to-Language Tasks Based on Attributes and Attention Mechanism", JOURNAL = Cyber, VOLUME = "51", YEAR = "2021", NUMBER = "2", MONTH = "February", PAGES = "913-926", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224510"} @article{bb229526, AUTHOR = "Shao, Y. and Lin, J.C.W. and Srivastava, G. and Jolfaei, A. and Guo, D.D. and Hu, Y.", TITLE = "Self-attention-based conditional random fields latent variables model for sequence labeling", JOURNAL = PRL, VOLUME = "145", YEAR = "2021", PAGES = "157-164", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224511"} @article{bb229527, AUTHOR = "Wu, Y. and Ma, Y.T. and Wan, S.H.", TITLE = "Multi-scale relation reasoning for multi-modal Visual Question Answering", JOURNAL = SP:IC, VOLUME = "96", YEAR = "2021", PAGES = "116319", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224512"} @inproceedings{bb229528, AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.", TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "5642-5649", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224513"} @article{bb229529, AUTHOR = "dos S Silva, F.H. and Bezerra, G.M. and Holanda, G.B. and de Souza, J.W.M. and Rego, P.A.L. and Lira Neto, A.V. and de Albuquerque, V.H.C. and Reboucas Filho, P.P.", TITLE = "A novel feature extractor for human action recognition in visual question answering", JOURNAL = PRL, VOLUME = "147", YEAR = "2021", PAGES = "41-47", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224514"} @article{bb229530, AUTHOR = "Guo, W. and Zhang, Y. and Yang, J.F. and Yuan, X.J.", TITLE = "Re-Attention for Visual Question Answering", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "6730-6743", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224515"} @article{bb229531, AUTHOR = "Hu, J. and Qian, S.S. and Fang, Q. and Xu, C.S.", TITLE = "Heterogeneous Community Question Answering via Social-Aware Multi-Modal Co-Attention Convolutional Matching", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2321-2334", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224516"} @article{bb229532, AUTHOR = "Zhang, X. and Zhang, F.F. and Xu, C.S.", TITLE = "NExT-OOD: Overcoming Dual Multiple-Choice VQA Biases", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "4", MONTH = "April", PAGES = "1913-1931", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224517"} @article{bb229533, AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.", TITLE = "Accuracy vs. complexity: A trade-off in visual question answering models", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108106", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224518"} @article{bb229534, AUTHOR = "Barra, S. and Bisogni, C. and de Marsico, M. and Ricciardi, S.", TITLE = "Visual question answering: Which investigated applications?", JOURNAL = PRL, VOLUME = "151", YEAR = "2021", PAGES = "325-331", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224519"} @article{bb229535, AUTHOR = "Manmadhan, S. and Kovoor, B.C.", TITLE = "Multi-Tier Attention Network using Term-weighted Question Features for Visual Question Answering", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104291", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224520"} @article{bb229536, AUTHOR = "Liu, A.A. and Lu, Z. and Xu, N. and Nie, W.Z. and Li, W.H.", TITLE = "Multi-type decision fusion network for visual Q&A", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224521"} @article{bb229537, AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.", TITLE = "MUMC: Minimizing uncertainty of mixture of cues", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224522"} @article{bb229538, AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.", TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality Interactions", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "3518-3529", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224523"} @article{bb229539, AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.", TITLE = "A multimodal attention fusion network with a dynamic vocabulary for TextVQA", JOURNAL = PR, VOLUME = "122", YEAR = "2022", PAGES = "108214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224524"} @article{bb229540, AUTHOR = "Narayanan, A. and Rao, A. and Prasad, A. and Natarajan, S.", TITLE = "VQA as a factoid question answering problem: A novel approach for knowledge-aware and explainable visual question answering", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104328", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224525"} @article{bb229541, AUTHOR = "Guo, Y.Y. and Nie, L.Q. and Cheng, Z.Y. and Tian, Q. and Zhang, M.", TITLE = "Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a Class-Imbalance View", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "227-238", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224526"} @article{bb229542, AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.", TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "318-329", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224527"} @article{bb229543, AUTHOR = "Manogaran, G. and Shakeel, P.M. and Burhanuddin, M.A. and Baskar, S. and Saravanan, V. and Crespo, R.G. and Martinez, O.S.", TITLE = "ADCCF: Adaptive deep concatenation coder framework for visual question answering", JOURNAL = PRL, VOLUME = "152", YEAR = "2021", PAGES = "348-355", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224528"} @article{bb229544, AUTHOR = "Zhou, Y. and Ji, R.R. and Sun, X.S. and Su, J.S. and Meng, D.Y. and Gao, Y. and Shen, C.H.", TITLE = "Plenty is Plague: Fine-Grained Learning for Visual Question Answering", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "2", MONTH = "February", PAGES = "697-709", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224529"} @article{bb229545, AUTHOR = "E, W.N. and Zhou, Y.J.", TITLE = "A Mathematical Model for Universal Semantics", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "3", MONTH = "March", PAGES = "1124-1132", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224530"} @article{bb229546, AUTHOR = "Li, X.P. and Wu, B. and Song, J.K. and Gao, L.L. and Zeng, P.P. and Gan, C.", TITLE = "Text-instance graph: Exploring the relational semantics for text-based visual question answering", JOURNAL = PR, VOLUME = "124", YEAR = "2022", PAGES = "108455", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224531"} @article{bb229547, AUTHOR = "Shao, X.J. and Xiang, Z.L. and Li, Y.X.", TITLE = "Visual question answering with gated relation-aware auxiliary", JOURNAL = IET-IPR, VOLUME = "16", YEAR = "2022", NUMBER = "5", PAGES = "1424-1432", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224532"} @article{bb229548, AUTHOR = "Liu, Y. and Zhang, X.M. and Zhao, Z.Y. and Zhang, B. and Cheng, L. and Li, Z.J.", TITLE = "ALSA: Adversarial Learning of Supervised Attentions for Visual Question Answering", JOURNAL = Cyber, VOLUME = "52", YEAR = "2022", NUMBER = "6", MONTH = "June", PAGES = "4520-4533", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224533"} @article{bb229549, AUTHOR = "Ouyang, N.L. and Huang, Q.B. and Li, P.J. and Cai, Y. and Liu, B. and Leung, H.F. and Li, Q.", TITLE = "Suppressing Biased Samples for Robust VQA", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "3405-3415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224534"} @article{bb229550, AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.H.", TITLE = "Comprehensive-perception dynamic reasoning for visual question answering", JOURNAL = PR, VOLUME = "131", YEAR = "2022", PAGES = "108878", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224535"} @article{bb229551, AUTHOR = "Gouthaman, K.V. and Mittal, A.", TITLE = "On the role of question encoder sequence model in robust visual question answering", JOURNAL = PR, VOLUME = "131", YEAR = "2022", PAGES = "108883", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224536"} @article{bb229552, AUTHOR = "Chen, C.Q. and Han, D.Z. and Chang, C.C.", TITLE = "CAAN: Context-Aware attention network for visual question answering", JOURNAL = PR, VOLUME = "132", YEAR = "2022", PAGES = "108980", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224537"} @article{bb229553, AUTHOR = "Xie, J.Y. and Fang, W.H. and Cai, Y. and Huang, Q.B. and Li, Q.", TITLE = "Knowledge-Based Visual Question Generation", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "11", MONTH = "November", PAGES = "7547-7558", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224538"} @article{bb229554, AUTHOR = "Gao, C.Y. and Zhu, Q. and Wang, P. and Li, H. and Liu, Y.L. and van den Hengel, A.J. and Wu, Q.", TITLE = "Structured Multimodal Attentions for TextVQA", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "9603-9614", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224539"} @article{bb229555, AUTHOR = "Jin, Z.X. and Wu, H. and Yang, C. and Zhou, F. and Qin, J.Y. and Xiao, L. and Yin, X.C.", TITLE = "RUArt: A Novel Text-Centered Solution for Text-Based Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "1-12", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224540"} @article{bb229556, AUTHOR = "Beckham, C. and Weiss, M. and Golemo, F. and Honari, S. and Nowrouzezahrai, D. and Pal, C.", TITLE = "Visual question answering from another perspective: CLEVR mental rotation tests", JOURNAL = PR, VOLUME = "136", YEAR = "2023", PAGES = "109209", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224541"} @article{bb229557, AUTHOR = "Zhang, H.N. and Zeng, P.P. and Hu, Y.X. and Qian, J. and Song, J.K. and Gao, L.", TITLE = "Learning visual question answering on controlled semantic noisy labels", JOURNAL = PR, VOLUME = "138", YEAR = "2023", PAGES = "109339", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224542"} @article{bb229558, AUTHOR = "Zeng, G. and Zhang, Y. and Zhou, Y. and Yang, X.M. and Jiang, N. and Zhao, G.Q. and Wang, W.P. and Yin, X.C.", TITLE = "Beyond OCR + VQA: Towards end-to-end reading and reasoning for robust and accurate textvqa", JOURNAL = PR, VOLUME = "138", YEAR = "2023", PAGES = "109337", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224543"} @article{bb229559, AUTHOR = "Gao, D.F. and Wang, R.P. and Shan, S.G. and Chen, X.L.", TITLE = "CRIC: A VQA Dataset for Compositional Reasoning on Vision and Commonsense", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "5561-5578", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224544"} @article{bb229560, AUTHOR = "Xu, F.Z. and Lin, Q. and Liu, J. and Zhang, L.L. and Zhao, T.Z. and Chai, Q. and Pan, Y. and Huang, Y. and Wang, Q.Y.", TITLE = "MoCA: Incorporating domain pretraining and cross attention for textbook question answering", JOURNAL = PR, VOLUME = "140", YEAR = "2023", PAGES = "109588", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224545"} @article{bb229561, AUTHOR = "Li, P.J. and Tan, Z.Y. and Bao, B.K.", TITLE = "Multiview Language Bias Reduction for Visual Question Answering", JOURNAL = MultMedMag, VOLUME = "30", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "91-99", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224546"} @article{bb229562, AUTHOR = "Li, H.M. and Han, D.Z. and Chen, C.Q. and Chang, C.C. and Li, K.C. and Li, D.", TITLE = "A Visual Question Answering Network Merging High- and Low-Level Semantic Information", JOURNAL = IEICE, VOLUME = "E106-D", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "581-589", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224547"} @article{bb229563, AUTHOR = "Liu, B. and Zhan, L.M. and Xu, L. and Wu, X.M.", TITLE = "Medical Visual Question Answering via Conditional Reasoning and Contrastive Learning", JOURNAL = MedImg, VOLUME = "42", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "1532-1545", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224548"} @article{bb229564, AUTHOR = "Wu, J.M. and Ge, F. and Hong, H.Y. and Shi, Y. and Hao, Y.B. and Ma, L.", TITLE = "Question-aware dynamic scene graph of local semantic representation learning for visual question answering", JOURNAL = PRL, VOLUME = "170", YEAR = "2023", PAGES = "93-99", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224549"} @article{bb229565, AUTHOR = "Li, H. and Huang, J. and Jin, P. and Song, G. and Wu, Q. and Chen, J.", TITLE = "Weakly-Supervised 3D Spatial Reasoning for Text-Based Visual Question Answering", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "3367-3382", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224550"} @article{bb229566, AUTHOR = "Li, Z.Y. and Guo, Y.Y. and Wang, K. and Wei, Y.W. and Nie, L.Q. and Kankanhalli, M.", TITLE = "Joint Answering and Explanation for Visual Commonsense Reasoning", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "3836-3846", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224551"} @article{bb229567, AUTHOR = "Yang, X.F. and Lv, F. and Liu, F. and Lin, G.S.", TITLE = "Self-Training Vision Language BERTs With a Unified Conditional Model", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "8", MONTH = "August", PAGES = "3560-3569", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224552"} @article{bb229568, AUTHOR = "Chen, L. and Zheng, Y.H. and Niu, Y. and Zhang, H.W. and Xiao, J.", TITLE = "Counterfactual Samples Synthesizing and Training for Robust Visual Question Answering", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "13218-13234", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224553"} @inproceedings{bb229569, AUTHOR = "Chen, L. and Yan, X. and Xiao, J. and Zhang, H.W. and Pu, S. and Zhuang, Y.", TITLE = "Counterfactual Samples Synthesizing for Robust Visual Question Answering", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10797-10806", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224554"} @article{bb229570, AUTHOR = "Wang, B.Y. and Ma, Y.J. and Li, X.Y. and Liu, H. and Hu, Y.L. and Yin, B.C.", TITLE = "DSGEM: Dual scene graph enhancement module-based visual question answering", JOURNAL = IET-CV, VOLUME = "17", YEAR = "2023", NUMBER = "6", PAGES = "638-651", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224555"} @article{bb229571, AUTHOR = "Bi, Y.D. and Jiang, H. and Zhang, H. and Hu, Y.L. and Yin, B.C.", TITLE = "Self-supervised knowledge distillation in counterfactual learning for VQA", JOURNAL = PRL, VOLUME = "177", YEAR = "2024", PAGES = "33-39", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224556"} @article{bb229572, AUTHOR = "Tan, S. and Ge, M.M. and Guo, D. and Liu, H.P. and Sun, F.C.", TITLE = "Knowledge-Based Embodied Question Answering", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "11948-11960", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224557"} @inproceedings{bb229573, AUTHOR = "Tan, S. and Xiang, W.L. and Liu, H.P. and Guo, D. and Sun, F.C.", TITLE = "Multi-agent Embodied Question Answering in Interactive Environments", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIII:663-678", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224558"} @article{bb229574, AUTHOR = "Mohamud, S.A.M. and Jalali, A. and Lee, M.H.", TITLE = "Encoder-decoder cycle for visual question answering based on perception-action cycle", JOURNAL = PR, VOLUME = "144", YEAR = "2023", PAGES = "109848", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224559"} @article{bb229575, AUTHOR = "Tito, R. and Karatzas, D. and Valveny, E.", TITLE = "Hierarchical multimodal transformers for Multipage DocVQA", JOURNAL = PR, VOLUME = "144", YEAR = "2023", PAGES = "109834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224560"} @article{bb229576, AUTHOR = "Wang, Y.X. and Wei, B. and Liu, J. and Zhang, L.L. and Wang, J.X. and Wang, Q.Y.", TITLE = "DisAVR: Disentangled Adaptive Visual Reasoning Network for Diagram Question Answering", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "4812-4827", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224561"} @article{bb229577, AUTHOR = "Han, Y.D. and Yin, J.H. and Wu, J.L. and Wei, Y.W. and Nie, L.Q.", TITLE = "Semantic-Aware Modular Capsule Routing for Visual Question Answering", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "5537-5549", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224562"} @article{bb229578, AUTHOR = "Qian, T.W. and Chen, J.J. and Chen, S.X. and Wu, B. and Jiang, Y.G.", TITLE = "Scene Graph Refinement Network for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3950-3961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224563"} @article{bb229579, AUTHOR = "Qin, B.S. and Hu, H.J. and Zhuang, Y.T.", TITLE = "Deep Residual Weight-Sharing Attention Network With Low-Rank Attention for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "4282-4295", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224564"} @article{bb229580, AUTHOR = "Zhou, S. and Guo, D. and Li, J. and Yang, X. and Wang, M.", TITLE = "Exploring Sparse Spatial Relation in Graph Inference for Text-Based VQA", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "5060-5074", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224565"} @article{bb229581, AUTHOR = "Biswas, K. and Shivakumara, P. and Pal, U. and Liu, C.L. and Lu, Y.", TITLE = "VQAPT: A New visual question answering model for personality traits in social media images", JOURNAL = PRL, VOLUME = "175", YEAR = "2023", PAGES = "66-73", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224566"} @article{bb229582, AUTHOR = "Cho, J.W. and Argaw, D.M. and Oh, Y. and Kim, D.J. and Kweon, I.S.", TITLE = "Empirical study on using adapters for debiased Visual Question Answering", JOURNAL = CVIU, VOLUME = "237", YEAR = "2023", PAGES = "103842", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224567"} @inproceedings{bb229583, AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.", TITLE = "Dealing with Missing Modalities in the Visual Question Answer-Difference Prediction Task through Knowledge Distillation", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1592-1601", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224568"} @inproceedings{bb229584, AUTHOR = "Cho, J.W. and Kim, D.J. and Ryu, H. and Kweon, I.S.", TITLE = "Generative Bias for Robust Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11681-11690", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224569"} @article{bb229585, AUTHOR = "Liu, Y.H. and Wei, W. and Peng, D. and Mao, X.L. and He, Z.Y. and Zhou, P.", TITLE = "Depth-Aware and Semantic Guided Relational Attention Network for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "5344-5357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224570"} @article{bb229586, AUTHOR = "Mao, A. and Yang, Z. and Lin, K. and Xuan, J. and Liu, Y.J.", TITLE = "Positional Attention Guided Transformer-Like Architecture for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "6997-7009", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224571"} @article{bb229587, AUTHOR = "Sun, H. and Wang, S. and Zhu, Y.Q. and Yuan, W. and Zou, Z.Q.", TITLE = "Question Classification for Intelligent Question Answering: A Comprehensive Survey", JOURNAL = IJGI, VOLUME = "12", YEAR = "2023", NUMBER = "10", PAGES = "415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224572"} @article{bb229588, AUTHOR = "Cao, B.W. and Cao, J.X. and Gui, J. and Shen, J. and Liu, B. and He, L. and Tang, Y.Y. and Kwok, J.T.Y.", TITLE = "AlignVE: Visual Entailment Recognition Based on Alignment Relations", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "7378-7387", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224573"} @article{bb229589, AUTHOR = "Mashrur, A. and Luo, W. and Zaidi, N.A. and Robles Kelly, A.", TITLE = "Robust visual question answering via semantic cross modal augmentation", JOURNAL = CVIU, VOLUME = "238", YEAR = "2024", PAGES = "103862", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224574"} @article{bb229590, AUTHOR = "Yu, Z. and Jin, Z. and Yu, J. and Xu, M.L. and Wang, H.B. and Fan, J.P.", TITLE = "Bilaterally Slimmable Transformer for Elastic and Efficient Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "9543-9556", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224575"} @article{bb229591, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Sun, Y.X. and Zhang, Z. and Luo, Y.K.", TITLE = "Multi-modal spatial relational attention networks for visual question answering", JOURNAL = IVC, VOLUME = "140", YEAR = "2023", PAGES = "104840", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224576"} @article{bb229592, AUTHOR = "Huang, X.F. and Gong, H.F.", TITLE = "A Dual-Attention Learning Network With Word and Sentence Embedding for Medical Visual Question Answering", JOURNAL = MedImg, VOLUME = "43", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "832-845", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224577"} @article{bb229593, AUTHOR = "Zheng, W.B. and Yan, L. and Wang, F.Y.", TITLE = "So Many Heads, So Many Wits: Multimodal Graph Reasoning for Text-Based Visual Question Answering", JOURNAL = SMCS, VOLUME = "54", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "854-865", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224578"} @article{bb229594, AUTHOR = "Bi, Y.D. and Jiang, H. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.", TITLE = "See and Learn More: Dense Caption-Aware Representation for Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "1135-1146", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224579"} @article{bb229595, AUTHOR = "Song, Y. and Yang, X.S. and Wang, Y. and Xu, C.S.", TITLE = "Recovering Generalization via Pre-Training-Like Knowledge Distillation for Out-of-Distribution Visual Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "837-851", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224580"} @article{bb229596, AUTHOR = "Wu, S. and Zhao, G.S. and Qian, X.M.", TITLE = "Resolving Zero-Shot and Fact-Based Visual Question Answering via Enhanced Fact Retrieval", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "1790-1800", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224581"} @article{bb229597, AUTHOR = "Wen, Z.Q. and Niu, S.C. and Li, G. and Wu, Q.Y. and Tan, M.K. and Wu, Q.", TITLE = "Test-Time Model Adaptation for Visual Question Answering With Debiased Self-Supervisions", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2137-2147", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224582"} @article{bb229598, AUTHOR = "Huai, T.Y. and Yang, S.W. and Zhang, J.H. and Zhao, J.B. and He, L.", TITLE = "Debiased Visual Question Answering via the perspective of question types", JOURNAL = PRL, VOLUME = "178", YEAR = "2024", PAGES = "181-187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224583"} @article{bb229599, AUTHOR = "Jiang, J.J. and Liu, Z.Y. and Zheng, N.N.", TITLE = "Correlation Information Bottleneck: Towards Adapting Pretrained Multimodal Models for Robust Visual Question Answering", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "185-207", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT224584"}