@article{bb187700,
        AUTHOR = "Sharma, H. and Jalal, A.S.",
        TITLE = "Visual question answering model based on graph neural network and
contextual attention",
        JOURNAL = IVC,
        VOLUME = "110",
        YEAR = "2021",
        PAGES = "104165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182983"}

@article{bb187701,
        AUTHOR = "Yuan, Z. and Sun, S.Y. and Duan, L.X. and Li, C.S. and Wu, X. and Xu, C.S.",
        TITLE = "Adversarial Multimodal Network for Movie Story Question Answering",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "1744-1756",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182984"}

@article{bb187702,
        AUTHOR = "Wu, Y. and Ma, Y.T. and Wan, S.H.",
        TITLE = "Multi-scale relation reasoning for multi-modal Visual Question
Answering",
        JOURNAL = SP:IC,
        VOLUME = "96",
        YEAR = "2021",
        PAGES = "116319",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182985"}

@inproceedings{bb187703,
        AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.",
        TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual
Question Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "5642-5649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182986"}

@article{bb187704,
        AUTHOR = "dos S Silva, F.H. and Bezerra, G.M. and Holanda, G.B. and de Souza, J.W.M. and Rego, P.A.L. and Lira Neto, A.V. and de Albuquerque, V.H.C. and Reboucas Filho, P.P.",
        TITLE = "A novel feature extractor for human action recognition in visual
question answering",
        JOURNAL = PRL,
        VOLUME = "147",
        YEAR = "2021",
        PAGES = "41-47",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182987"}

@article{bb187705,
        AUTHOR = "Liu, Y. and Zhang, X.M. and Zhang, Q.Y. and Li, C.Z. and Huang, F. and Tang, X.H. and Li, Z.J.",
        TITLE = "Dual self-attention with co-attention networks for visual question
answering",
        JOURNAL = PR,
        VOLUME = "117",
        YEAR = "2021",
        PAGES = "107956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182988"}

@article{bb187706,
        AUTHOR = "Liu, Y. and Zhang, X.M. and Huang, F. and Zhang, B. and Li, Z.J.",
        TITLE = "Cross-Attentional Spatio-Temporal Semantic Graph Networks for Video
Question Answering",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "1684-1696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182989"}

@article{bb187707,
        AUTHOR = "Jin, W. and Zhao, Z. and Cao, X.C. and Zhu, J.M. and He, X.Q. and Zhuang, Y.T.",
        TITLE = "Adaptive Spatio-Temporal Graph Enhanced Vision-Language
Representation for Video QA",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "5477-5489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182990"}

@article{bb187708,
        AUTHOR = "He, F.J. and Wang, Y.X. and Miao, X.L. and Sun, X.",
        TITLE = "Interpretable visual reasoning: A survey",
        JOURNAL = IVC,
        VOLUME = "112",
        YEAR = "2021",
        PAGES = "104194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182991"}

@article{bb187709,
        AUTHOR = "Guo, W. and Zhang, Y. and Yang, J.F. and Yuan, X.J.",
        TITLE = "Re-Attention for Visual Question Answering",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "6730-6743",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182992"}

@article{bb187710,
        AUTHOR = "Hu, J. and Qian, S. and Fang, Q. and Xu, C.S.",
        TITLE = "Heterogeneous Community Question Answering via Social-Aware
Multi-Modal Co-Attention Convolutional Matching",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182993"}

@article{bb187711,
        AUTHOR = "Gao, L. and Chen, T.M. and Li, X.P. and Zeng, P.P. and Zhao, L. and Li, Y.F.",
        TITLE = "Generalized pyramid co-attention with learnable aggregation net for
video question answering",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108145",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182994"}

@article{bb187712,
        AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.",
        TITLE = "Accuracy vs. complexity: A trade-off in visual question answering
models",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182995"}

@article{bb187713,
        AUTHOR = "Zheng, W.F. and Yin, L.R. and Chen, X.B. and Ma, Z. and Liu, S. and Yang, B.",
        TITLE = "Knowledge base graph embedding module design for Visual question
answering model",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182996"}

@article{bb187714,
        AUTHOR = "Al Sadi, A. and Al Ayyoub, M. and Jararweh, Y. and Costen, F.",
        TITLE = "Visual question answering in the medical domain based on deep
learning approaches: A comprehensive study",
        JOURNAL = PRL,
        VOLUME = "150",
        YEAR = "2021",
        PAGES = "57-75",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182997"}

@article{bb187715,
        AUTHOR = "Barra, S. and Bisogni, C. and de Marsico, M. and Ricciardi, S.",
        TITLE = "Visual question answering: Which investigated applications?",
        JOURNAL = PRL,
        VOLUME = "151",
        YEAR = "2021",
        PAGES = "325-331",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182998"}

@article{bb187716,
        AUTHOR = "Le, T.M. and Le, V. and Venkatesh, S. and Tran, T.",
        TITLE = "Hierarchical Conditional Relation Networks for Multimodal Video
Question Answering",
        JOURNAL = IJCV,
        VOLUME = "129",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3027-3050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182999"}

@inproceedings{bb187717,
        AUTHOR = "Le, T.M. and Le, V. and Venkatesh, S. and Tran, T.",
        TITLE = "Hierarchical Conditional Relation Networks for Video Question
Answering",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "9969-9978",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183000"}

@article{bb187718,
        AUTHOR = "Manmadhan, S. and Kovoor, B.C.",
        TITLE = "Multi-Tier Attention Network using Term-weighted Question Features
for Visual Question Answering",
        JOURNAL = IVC,
        VOLUME = "115",
        YEAR = "2021",
        PAGES = "104291",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183001"}

@article{bb187719,
        AUTHOR = "Liu, A.A. and Lu, Z. and Xu, N. and Nie, W.Z. and Li, W.H.",
        TITLE = "Multi-type decision fusion network for visual Q&A",
        JOURNAL = IVC,
        VOLUME = "115",
        YEAR = "2021",
        PAGES = "104281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183002"}

@article{bb187720,
        AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.",
        TITLE = "MUMC: Minimizing uncertainty of mixture of cues",
        JOURNAL = IVC,
        VOLUME = "115",
        YEAR = "2021",
        PAGES = "104280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183003"}

@article{bb187721,
        AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.",
        TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality
Interactions",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "3518-3529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183004"}

@article{bb187722,
        AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.",
        TITLE = "A multimodal attention fusion network with a dynamic vocabulary for
TextVQA",
        JOURNAL = PR,
        VOLUME = "122",
        YEAR = "2022",
        PAGES = "108214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183005"}

@article{bb187723,
        AUTHOR = "Su, H.T. and Chang, C.H. and Shen, P.W. and Wang, Y.S. and Chang, Y.L. and Chang, Y.C. and Cheng, P.J. and Hsu, W.H.",
        TITLE = "End-to-End Video Question-Answer Generation With Generator-Pretester
Network",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4497-4507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183006"}

@article{bb187724,
        AUTHOR = "Narayanan, A. and Rao, A. and Prasad, A. and Natarajan, S.",
        TITLE = "VQA as a factoid question answering problem: A novel approach for
knowledge-aware and explainable visual question answering",
        JOURNAL = IVC,
        VOLUME = "116",
        YEAR = "2021",
        PAGES = "104328",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183007"}

@article{bb187725,
        AUTHOR = "Guo, Y.Y. and Nie, L.Q. and Cheng, Z.Y. and Tian, Q. and Zhang, M.",
        TITLE = "Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a
Class-Imbalance View",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "227-238",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183008"}

@article{bb187726,
        AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.",
        TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "318-329",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183009"}

@inproceedings{bb187727,
        AUTHOR = "Zhang, M.X. and Yang, Y. and Chen, X. and Ji, Y.L. and Xu, X. and Li, J.J. and Shen, H.T.",
        TITLE = "Multi-stage Aggregated Transformer Network for Temporal Language
Localization in Videos",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12664-12673",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183010"}

@article{bb187728,
        AUTHOR = "Gao, L.L. and Lei, Y. and Zeng, P.P. and Song, J.K. and Wang, M. and Shen, H.T.",
        TITLE = "Hierarchical Representation Network With Auxiliary Tasks for Video
Captioning and Video Question Answering",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "202-215",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183011"}

@article{bb187729,
        AUTHOR = "Sharma, H. and Jalal, A.S.",
        TITLE = "A survey of methods, datasets and evaluation metrics for visual
question answering",
        JOURNAL = IVC,
        VOLUME = "116",
        YEAR = "2021",
        PAGES = "104327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183012"}

@article{bb187730,
        AUTHOR = "Wu, Z.K. and Liu, Z. and Wang, T. and Wang, D.L.",
        TITLE = "Improved Speaker and Navigator for Vision-and-Language Navigation",
        JOURNAL = MultMedMag,
        VOLUME = "28",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "55-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183013"}

@article{bb187731,
        AUTHOR = "Jiang, T.L. and Shao, H.L. and Tian, X. and Ji, Y. and Liu, C.P.",
        TITLE = "Aligning vision-language for graph inference in visual dialog",
        JOURNAL = IVC,
        VOLUME = "116",
        YEAR = "2021",
        PAGES = "104316",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183014"}

@article{bb187732,
        AUTHOR = "Manogaran, G. and Shakeel, P.M. and Burhanuddin, M.A. and Baskar, S. and Saravanan, V. and Crespo, R.G. and Martinez, O.S.",
        TITLE = "ADCCF: Adaptive deep concatenation coder framework for visual
question answering",
        JOURNAL = PRL,
        VOLUME = "152",
        YEAR = "2021",
        PAGES = "348-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183015"}

@article{bb187733,
        AUTHOR = "Zhang, J. and Shao, J. and Cao, R. and Gao, L. and Xu, X. and Shen, H.T.",
        TITLE = "Action-Centric Relation Transformer Network for Video Question
Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "63-74",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183016"}

@article{bb187734,
        AUTHOR = "Zhou, Y. and Ji, R.R. and Sun, X.S. and Su, J.S. and Meng, D.Y. and Gao, Y. and Shen, C.H.",
        TITLE = "Plenty is Plague: Fine-Grained Learning for Visual Question Answering",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "697-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183017"}

@article{bb187735,
        AUTHOR = "E, W.N. and Zhou, Y.",
        TITLE = "A Mathematical Model for Universal Semantics",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1124-1132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183018"}

@article{bb187736,
        AUTHOR = "Li, X.P. and Wu, B. and Song, J.K. and Gao, L.L. and Zeng, P.P. and Gan, C.",
        TITLE = "Text-instance graph: Exploring the relational semantics for
text-based visual question answering",
        JOURNAL = PR,
        VOLUME = "124",
        YEAR = "2022",
        PAGES = "108455",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183019"}

@article{bb187737,
        AUTHOR = "Shao, X.J. and Xiang, Z.L. and Li, Y.X.",
        TITLE = "Visual question answering with gated relation-aware auxiliary",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "5",
        PAGES = "1424-1432",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183020"}

@article{bb187738,
        AUTHOR = "Zhang, H. and Sun, A. and Jing, W. and Zhen, L.L. and Zhou, J.T.Y. and Goh, R.S.M.",
        TITLE = "Natural Language Video Localization: A Revisit in Span-Based Question
Answering Framework",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "4252-4266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183021"}

@article{bb187739,
        AUTHOR = "Liu, Y. and Zhang, X.M. and Zhao, Z.Y. and Zhang, B. and Cheng, L. and Li, Z.",
        TITLE = "ALSA: Adversarial Learning of Supervised Attentions for Visual
Question Answering",
        JOURNAL = Cyber,
        VOLUME = "52",
        YEAR = "2022",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4520-4533",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183022"}

@article{bb187740,
        AUTHOR = "Wang, J.Y. and Bao, B.K. and Xu, C.S.",
        TITLE = "DualVGR: A Dual-Visual Graph Reasoning Unit for Video Question
Answering",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "3369-3380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183023"}

@article{bb187741,
        AUTHOR = "Ouyang, N.L. and Huang, Q.B. and Li, P.J. and Cai, Y. and Liu, B. and Leung, H.F. and Li, Q.",
        TITLE = "Suppressing Biased Samples for Robust VQA",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "3405-3415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183024"}

@article{bb187742,
        AUTHOR = "Yang, L. and Jiang, H. and Song, Q. and Guo, J.",
        TITLE = "A Survey on Long-Tailed Visual Recognition",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1837-1872",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183025"}

@article{bb187743,
        AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.",
        TITLE = "Comprehensive-perception dynamic reasoning for visual question
answering",
        JOURNAL = PR,
        VOLUME = "131",
        YEAR = "2022",
        PAGES = "108878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183026"}

@article{bb187744,
        AUTHOR = "Gouthaman, K.V. and Mittal, A.",
        TITLE = "On the role of question encoder sequence model in robust visual
question answering",
        JOURNAL = PR,
        VOLUME = "131",
        YEAR = "2022",
        PAGES = "108883",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183027"}

@inproceedings{bb187745,
        AUTHOR = "Burghouts, G.J. and Huizinga, W.",
        TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional
Refinement",
        BOOKTITLE = CIAP22,
        YEAR = "2022",
        PAGES = "II:418-428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183028"}

@inproceedings{bb187746,
        AUTHOR = "Li, Z.W. and Stengel Eskin, E. and Zhang, Y.X. and Xie, C. and Tran, Q. and van Durme, B. and Yuille, A.L.",
        TITLE = "Calibrating Concepts and Operations:
Towards Symbolic Reasoning on Real Images",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "14890-14899",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183029"}

@inproceedings{bb187747,
        AUTHOR = "Li, J.C. and Tang, S.L. and Zhu, L.C. and Shi, H. and Huang, X. and Wu, F. and Yang, Y. and Zhuang, Y.T.",
        TITLE = "Adaptive Hierarchical Graph Reasoning with Semantic Coherence for
Video-and-Language Inference",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1847-1857",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183030"}

@inproceedings{bb187748,
        AUTHOR = "Salewski, L. and Emde, C. and Do, V. and Akata, Z. and Lukasiewicz, T.",
        TITLE = "e-ViL: A Dataset and Benchmark for Natural Language Explanations in
Vision-Language Tasks",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1224-1234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183031"}

@inproceedings{bb187749,
        AUTHOR = "Teney, D. and Abbasnejad, E. and van den Hengel, A.J.",
        TITLE = "Unshuffling Data for Improved Generalization in Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1397-1407",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183032"}

@inproceedings{bb187750,
        AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.",
        TITLE = "Contrast and Classify: Training Robust VQA Models",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1584-1593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183033"}

@inproceedings{bb187751,
        AUTHOR = "Han, X.Z. and Wang, S. and Su, C. and Huang, Q.M. and Tian, Q.",
        TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1564-1573",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183034"}

@inproceedings{bb187752,
        AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F. and Liang, X.D. and Ye, Q.X. and Lu, Y. and Jiao, J.B.",
        TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog
Navigation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1574-1583",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183035"}

@inproceedings{bb187753,
        AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.",
        TITLE = "Beyond Question-Based Biases:
Assessing Multimodal Shortcut Learning in Visual Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1554-1563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183036"}

@inproceedings{bb187754,
        AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.",
        TITLE = "On the hidden treasure of dialog in video question answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2044-2053",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183037"}

@inproceedings{bb187755,
        AUTHOR = "Zhou, Y. and Ren, T. and Zhu, C.Y. and Sun, X.S. and Liu, J.Z. and Ding, X.H. and Xu, M.L. and Ji, R.R.",
        TITLE = "TRAR: Routing the Attention Spans in Transformer for Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2054-2064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183038"}

@inproceedings{bb187756,
        AUTHOR = "Yang, X. and Gao, C. and Zhang, H. and Cai, J.F.",
        TITLE = "Auto-Parsing Network for Image Captioning and Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2177-2187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183039"}

@inproceedings{bb187757,
        AUTHOR = "Banerjee, P. and Gokhale, T. and Yang, Y.Z. and Baral, C.",
        TITLE = "Weakly Supervised Relative Spatial Reasoning for Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1888-1898",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183040"}

@inproceedings{bb187758,
        AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.",
        TITLE = "Unified Questioner Transformer for Descriptive Question Generation in
Goal-Oriented Visual Dialogue",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1878-1887",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183041"}

@inproceedings{bb187759,
        AUTHOR = "Kim, N. and Ha, S.J. and Kang, J.W.",
        TITLE = "Video Question Answering Using Language-Guided Deep Compressed-Domain
Video Feature",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1688-1697",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183042"}

@inproceedings{bb187760,
        AUTHOR = "Liu, F. and Liu, J. and Wang, W.N. and Lu, H.Q.",
        TITLE = "HAIR: Hierarchical Visual-Semantic Relational Reasoning for Video
Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1678-1687",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183043"}

@inproceedings{bb187761,
        AUTHOR = "Yang, A. and Miech, A. and Sivic, J. and Laptev, I. and Schmid, C.",
        TITLE = "Just Ask:
Learning to Answer Questions from Millions of Narrated Videos",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1666-1677",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183044"}

@inproceedings{bb187762,
        AUTHOR = "Gao, D.F. and Wang, R.P. and Bai, Z. and Chen, X.L.",
        TITLE = "Env-QA: A Video Question Answering Benchmark for Comprehensive
Understanding of Dynamic Environments",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183045"}

@inproceedings{bb187763,
        AUTHOR = "Cao, Q.X. and Wan, W.T. and Wang, K. and Liang, X.D. and Lin, L.",
        TITLE = "Linguistically Routing Capsule Network for Out-of-distribution Visual
Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1594-1603",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183046"}

@inproceedings{bb187764,
        AUTHOR = "Li, L.J. and Lei, J. and Gan, Z. and Liu, J.J.",
        TITLE = "Adversarial VQA:
A New Benchmark for Evaluating the Robustness of VQA Models",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2022-2031",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183047"}

@inproceedings{bb187765,
        AUTHOR = "Yun, H. and Yu, Y. and Yang, W. and Lee, K. and Kim, G.",
        TITLE = "Pano-AVQA: Grounded Audio-Visual Question Answering on 360° Videos",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2011-2021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183048"}

@inproceedings{bb187766,
        AUTHOR = "Askarian, N. and Abbasnejad, E. and Zukerman, I. and Buntine, W. and Haffari, G.",
        TITLE = "Inductive Biases for Low Data VQA: A Data Augmentation Approach",
        BOOKTITLE = Novelty22,
        YEAR = "2022",
        PAGES = "231-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183049"}

@inproceedings{bb187767,
        AUTHOR = "Mathew, M. and Bagal, V. and Tito, R. and Karatzas, D. and Valveny, E. and Jawahar, C.V.",
        TITLE = "InfographicVQA",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2582-2591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183050"}

@inproceedings{bb187768,
        AUTHOR = "Kumar, S. and Patro, B.N. and Namboodiri, V.P.",
        TITLE = "Auto QA: The Question Is Not Only What, but Also Where",
        BOOKTITLE = Novelty22,
        YEAR = "2022",
        PAGES = "272-281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183051"}

@inproceedings{bb187769,
        AUTHOR = "Gupta, V. and Patro, B.N. and Parihar, H. and Namboodiri, V.P.",
        TITLE = "VQuAD: Video Question Answering Diagnostic Dataset",
        BOOKTITLE = Novelty22,
        YEAR = "2022",
        PAGES = "282-291",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183052"}

@inproceedings{bb187770,
        AUTHOR = "Kolling, C. and More, M. and Gavenski, N. and Pooch, E. and Parraga, O. and Barros, R.C.",
        TITLE = "Efficient Counterfactual Debiasing for Visual Question Answering",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2572-2581",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183053"}

@inproceedings{bb187771,
        AUTHOR = "Jung, S.J. and Byun, J. and Shim, K. and Hwang, S.Y. and Kim, C.",
        TITLE = "Understanding VQA for Negative Answers Through Visual and Linguistic
Inference",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2873-2877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183054"}

@inproceedings{bb187772,
        AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.",
        TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the
International Conference on Digital Image Computing: Techniques and
Applications (DICTA 2021)",
        BOOKTITLE = DICTA21,
        YEAR = "2021",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183055"}

@inproceedings{bb187773,
        AUTHOR = "Le, T. and Nguyen, H.T. and Nguyen, M.L.",
        TITLE = "Vision and Text Transformer for Predicting Answerability on Visual
Question Answering",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "934-938",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183056"}

@inproceedings{bb187774,
        AUTHOR = "Huang, Z.Q. and Zhu, H.Y. and Sun, Y. and Choi, D. and Tan, C. and Lim, J.H.",
        TITLE = "A Diagnostic Study of Visual Question Answering With Analogical
Reasoning",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2463-2467",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183057"}

@inproceedings{bb187775,
        AUTHOR = "Chen, H.Y. and Liu, R. and Peng, B.",
        TITLE = "Cross-modal Relational Reasoning Network for Visual Question
Answering",
        BOOKTITLE = MAIR2-21,
        YEAR = "2021",
        PAGES = "3939-3948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183058"}

@inproceedings{bb187776,
        AUTHOR = "Wang, Z.X. and Miao, Y. and Specia, L.",
        TITLE = "Latent Variable Models for Visual Question Answering",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3137-3141",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183059"}

@inproceedings{bb187777,
        AUTHOR = "Hirota, Y. and Garcia, N. and Otani, M. and Chu, C. and Nakashima, Y. and Taniguchi, I. and Onoye, T.",
        TITLE = "Visual Question Answering with Textual Representations for Images",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3147-3150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183060"}

@inproceedings{bb187778,
        AUTHOR = "Nishimura, T. and Sakoda, K. and Hashimoto, A. and Ushiku, Y. and Tanaka, N. and Ono, F. and Kameko, H. and Mori, S.",
        TITLE = "Egocentric Biochemical Video-and-Language Dataset",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3122-3126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183061"}

@inproceedings{bb187779,
        AUTHOR = "Ye, K. and Kovashka, A.",
        TITLE = "Linguistic Structures as Weak Supervision for Visual Scene Graph
Generation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8285-8295",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183062"}

@inproceedings{bb187780,
        AUTHOR = "Lei, J. and Li, L.J. and Zhou, L. and Gan, Z. and Berg, T.L. and Bansal, M. and Liu, J.J.",
        TITLE = "Less is More:
CLIPBERT for Video-and-Language Learning via Sparse Sampling",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7327-7337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183063"}

@inproceedings{bb187781,
        AUTHOR = "Yang, X. and Zhang, H. and Qi, G.J. and Cai, J.F.",
        TITLE = "Causal Attention for Vision-Language Tasks",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9842-9852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183064"}

@inproceedings{bb187782,
        AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.",
        TITLE = "Learning Better Visual Dialog Agents with Pretrained
Visual-Linguistic Representation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5618-5627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183065"}

@inproceedings{bb187783,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yao, A. and Chua, T.S.",
        TITLE = "NExT-QA: Next Phase of Question-Answering to Explaining Temporal
Actions",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9772-9781",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183066"}

@inproceedings{bb187784,
        AUTHOR = "Xu, L. and Huang, H. and Liu, J.",
        TITLE = "SUTD-TrafficQA: A Question Answering Benchmark and an Efficient
Network for Video Reasoning over Traffic Events",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9873-9883",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183067"}

@inproceedings{bb187785,
        AUTHOR = "Chen, X.Y. and Jiang, M. and Zhao, Q.",
        TITLE = "Predicting Human Scanpaths in Visual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "10871-10880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183068"}

@inproceedings{bb187786,
        AUTHOR = "Qi, Y.G. and Zhang, K. and Sain, A. and Song, Y.Z.",
        TITLE = "PQA: Perceptual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12051-12059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183069"}

@inproceedings{bb187787,
        AUTHOR = "Park, J. and Lee, J.Y. and Sohn, K.H.",
        TITLE = "Bridge to Answer: Structure-aware Graph Interaction Network for Video
Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15521-15530",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183070"}

@inproceedings{bb187788,
        AUTHOR = "Yuan, Y.Y. and Wang, S. and Jiang, M.Y. and Chen, T.Y.",
        TITLE = "Perception Matters: Detecting Perception Failures of VQA Models Using
Metamorphic Testing",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16903-16912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183071"}

@inproceedings{bb187789,
        AUTHOR = "Marino, K. and Chen, X.L. and Parikh, D. and Gupta, A. and Rohrbach, M.",
        TITLE = "KRISP: Integrating Implicit and Symbolic Knowledge for Open-Domain
Knowledge-Based VQA",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14106-14116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183072"}

@inproceedings{bb187790,
        AUTHOR = "Niu, Y. and Tang, K. and Zhang, H. and Lu, Z.W. and Hua, X.S. and Wen, J.R.",
        TITLE = "Counterfactual VQA: A Cause-Effect Look at Language Bias",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12695-12705",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183073"}

@inproceedings{bb187791,
        AUTHOR = "Yang, Z.Y. and Lu, Y. and Wang, J.F. and Yin, X. and Florencio, D. and Wang, L. and Zhang, C. and Zhang, L. and Luo, J.B.",
        TITLE = "TAP: Text-Aware Pre-training for Text-VQA and Text-Caption",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8747-8757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183074"}

@inproceedings{bb187792,
        AUTHOR = "Zhang, M. and Maidment, T. and Diab, A. and Kovashka, A. and Hwa, R.",
        TITLE = "Domain-robust VQA with diverse datasets and methods but no target
labels",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7042-7052",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183075"}

@inproceedings{bb187793,
        AUTHOR = "Kervadec, C. and Jaunet, T. and Antipov, G. and Baccouche, M. and Vuillemot, R. and Wolf, C.",
        TITLE = "How Transferable are Reasoning Patterns in VQA?",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4205-4214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183076"}

@inproceedings{bb187794,
        AUTHOR = "Kervadec, C. and Antipov, G. and Baccouche, M. and Wolf, C.",
        TITLE = "Roses are Red, Violets are Blue… But Should VQA expect Them To?",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "2775-2784",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183077"}

@inproceedings{bb187795,
        AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.",
        TITLE = "Dealing with Missing Modalities in the Visual Question
Answer-Difference Prediction Task through Knowledge Distillation",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1592-1601",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183078"}

@inproceedings{bb187796,
        AUTHOR = "Dua, R. and Kancheti, S.S. and Balasubramanian, V.N.",
        TITLE = "Beyond VQA: Generating Multi-word Answers and Rationales to Visual
Questions",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1623-1632",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183079"}

@inproceedings{bb187797,
        AUTHOR = "Rahman, T. and Chou, S.H. and Sigal, L. and Carenini, G.",
        TITLE = "An Improved Attention for Visual Question Answering",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1653-1662",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183080"}

@inproceedings{bb187798,
        AUTHOR = "Mathew, M. and Karatzas, D. and Jawahar, C.V.",
        TITLE = "DocVQA: A Dataset for VQA on Document Images",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "2199-2208",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183081"}

@inproceedings{bb187799,
        AUTHOR = "Chen, X.W. and Liu, R. and Song, X.M. and Han, Y.H.",
        TITLE = "Locating Visual Explanations for Video Question Answering",
        BOOKTITLE = MMMod21,
        YEAR = "2021",
        PAGES = "I:290-302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183082"}

Last update:Aug 14, 2022 at 21:20:19