@article{bb187700, AUTHOR = "Sharma, H. and Jalal, A.S.", TITLE = "Visual question answering model based on graph neural network and contextual attention", JOURNAL = IVC, VOLUME = "110", YEAR = "2021", PAGES = "104165", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182983"} @article{bb187701, AUTHOR = "Yuan, Z. and Sun, S.Y. and Duan, L.X. and Li, C.S. and Wu, X. and Xu, C.S.", TITLE = "Adversarial Multimodal Network for Movie Story Question Answering", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "1744-1756", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182984"} @article{bb187702, AUTHOR = "Wu, Y. and Ma, Y.T. and Wan, S.H.", TITLE = "Multi-scale relation reasoning for multi-modal Visual Question Answering", JOURNAL = SP:IC, VOLUME = "96", YEAR = "2021", PAGES = "116319", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182985"} @inproceedings{bb187703, AUTHOR = "Ma, Y.T. and Lu, T. and Wu, Y.", TITLE = "Multi-scale Relational Reasoning with Regional Attention for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "5642-5649", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182986"} @article{bb187704, AUTHOR = "dos S Silva, F.H. and Bezerra, G.M. and Holanda, G.B. and de Souza, J.W.M. and Rego, P.A.L. and Lira Neto, A.V. and de Albuquerque, V.H.C. and Reboucas Filho, P.P.", TITLE = "A novel feature extractor for human action recognition in visual question answering", JOURNAL = PRL, VOLUME = "147", YEAR = "2021", PAGES = "41-47", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182987"} @article{bb187705, AUTHOR = "Liu, Y. and Zhang, X.M. and Zhang, Q.Y. and Li, C.Z. and Huang, F. and Tang, X.H. and Li, Z.J.", TITLE = "Dual self-attention with co-attention networks for visual question answering", JOURNAL = PR, VOLUME = "117", YEAR = "2021", PAGES = "107956", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182988"} @article{bb187706, AUTHOR = "Liu, Y. and Zhang, X.M. and Huang, F. and Zhang, B. and Li, Z.J.", TITLE = "Cross-Attentional Spatio-Temporal Semantic Graph Networks for Video Question Answering", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "1684-1696", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182989"} @article{bb187707, AUTHOR = "Jin, W. and Zhao, Z. and Cao, X.C. and Zhu, J.M. and He, X.Q. and Zhuang, Y.T.", TITLE = "Adaptive Spatio-Temporal Graph Enhanced Vision-Language Representation for Video QA", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "5477-5489", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182990"} @article{bb187708, AUTHOR = "He, F.J. and Wang, Y.X. and Miao, X.L. and Sun, X.", TITLE = "Interpretable visual reasoning: A survey", JOURNAL = IVC, VOLUME = "112", YEAR = "2021", PAGES = "104194", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182991"} @article{bb187709, AUTHOR = "Guo, W. and Zhang, Y. and Yang, J.F. and Yuan, X.J.", TITLE = "Re-Attention for Visual Question Answering", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "6730-6743", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182992"} @article{bb187710, AUTHOR = "Hu, J. and Qian, S. and Fang, Q. and Xu, C.S.", TITLE = "Heterogeneous Community Question Answering via Social-Aware Multi-Modal Co-Attention Convolutional Matching", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2321-2334", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182993"} @article{bb187711, AUTHOR = "Gao, L. and Chen, T.M. and Li, X.P. and Zeng, P.P. and Zhao, L. and Li, Y.F.", TITLE = "Generalized pyramid co-attention with learnable aggregation net for video question answering", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108145", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182994"} @article{bb187712, AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.", TITLE = "Accuracy vs. complexity: A trade-off in visual question answering models", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108106", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182995"} @article{bb187713, AUTHOR = "Zheng, W.F. and Yin, L.R. and Chen, X.B. and Ma, Z. and Liu, S. and Yang, B.", TITLE = "Knowledge base graph embedding module design for Visual question answering model", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108153", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182996"} @article{bb187714, AUTHOR = "Al Sadi, A. and Al Ayyoub, M. and Jararweh, Y. and Costen, F.", TITLE = "Visual question answering in the medical domain based on deep learning approaches: A comprehensive study", JOURNAL = PRL, VOLUME = "150", YEAR = "2021", PAGES = "57-75", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182997"} @article{bb187715, AUTHOR = "Barra, S. and Bisogni, C. and de Marsico, M. and Ricciardi, S.", TITLE = "Visual question answering: Which investigated applications?", JOURNAL = PRL, VOLUME = "151", YEAR = "2021", PAGES = "325-331", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182998"} @article{bb187716, AUTHOR = "Le, T.M. and Le, V. and Venkatesh, S. and Tran, T.", TITLE = "Hierarchical Conditional Relation Networks for Multimodal Video Question Answering", JOURNAL = IJCV, VOLUME = "129", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "3027-3050", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT182999"} @inproceedings{bb187717, AUTHOR = "Le, T.M. and Le, V. and Venkatesh, S. and Tran, T.", TITLE = "Hierarchical Conditional Relation Networks for Video Question Answering", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "9969-9978", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183000"} @article{bb187718, AUTHOR = "Manmadhan, S. and Kovoor, B.C.", TITLE = "Multi-Tier Attention Network using Term-weighted Question Features for Visual Question Answering", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104291", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183001"} @article{bb187719, AUTHOR = "Liu, A.A. and Lu, Z. and Xu, N. and Nie, W.Z. and Li, W.H.", TITLE = "Multi-type decision fusion network for visual Q&A", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183002"} @article{bb187720, AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.", TITLE = "MUMC: Minimizing uncertainty of mixture of cues", JOURNAL = IVC, VOLUME = "115", YEAR = "2021", PAGES = "104280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183003"} @article{bb187721, AUTHOR = "Liu, F. and Liu, J. and Fang, Z.W. and Hong, R.C. and Lu, H.Q.", TITLE = "Visual Question Answering With Dense Inter- and Intra-Modality Interactions", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "3518-3529", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183004"} @article{bb187722, AUTHOR = "Wu, J.J. and Du, J. and Wang, F. and Yang, C. and Jiang, X.Z. and Hu, J. and Yin, B. and Zhang, J.S. and Dai, L.R.", TITLE = "A multimodal attention fusion network with a dynamic vocabulary for TextVQA", JOURNAL = PR, VOLUME = "122", YEAR = "2022", PAGES = "108214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183005"} @article{bb187723, AUTHOR = "Su, H.T. and Chang, C.H. and Shen, P.W. and Wang, Y.S. and Chang, Y.L. and Chang, Y.C. and Cheng, P.J. and Hsu, W.H.", TITLE = "End-to-End Video Question-Answer Generation With Generator-Pretester Network", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "4497-4507", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183006"} @article{bb187724, AUTHOR = "Narayanan, A. and Rao, A. and Prasad, A. and Natarajan, S.", TITLE = "VQA as a factoid question answering problem: A novel approach for knowledge-aware and explainable visual question answering", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104328", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183007"} @article{bb187725, AUTHOR = "Guo, Y.Y. and Nie, L.Q. and Cheng, Z.Y. and Tian, Q. and Zhang, M.", TITLE = "Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a Class-Imbalance View", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "227-238", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183008"} @article{bb187726, AUTHOR = "Peng, L. and Yang, Y. and Wang, Z. and Huang, Z. and Shen, H.T.", TITLE = "MRA-Net: Improving VQA Via Multi-Modal Relation Attention Network", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "318-329", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183009"} @inproceedings{bb187727, AUTHOR = "Zhang, M.X. and Yang, Y. and Chen, X. and Ji, Y.L. and Xu, X. and Li, J.J. and Shen, H.T.", TITLE = "Multi-stage Aggregated Transformer Network for Temporal Language Localization in Videos", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12664-12673", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183010"} @article{bb187728, AUTHOR = "Gao, L.L. and Lei, Y. and Zeng, P.P. and Song, J.K. and Wang, M. and Shen, H.T.", TITLE = "Hierarchical Representation Network With Auxiliary Tasks for Video Captioning and Video Question Answering", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "202-215", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183011"} @article{bb187729, AUTHOR = "Sharma, H. and Jalal, A.S.", TITLE = "A survey of methods, datasets and evaluation metrics for visual question answering", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104327", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183012"} @article{bb187730, AUTHOR = "Wu, Z.K. and Liu, Z. and Wang, T. and Wang, D.L.", TITLE = "Improved Speaker and Navigator for Vision-and-Language Navigation", JOURNAL = MultMedMag, VOLUME = "28", YEAR = "2021", NUMBER = "4", MONTH = "October", PAGES = "55-63", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183013"} @article{bb187731, AUTHOR = "Jiang, T.L. and Shao, H.L. and Tian, X. and Ji, Y. and Liu, C.P.", TITLE = "Aligning vision-language for graph inference in visual dialog", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104316", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183014"} @article{bb187732, AUTHOR = "Manogaran, G. and Shakeel, P.M. and Burhanuddin, M.A. and Baskar, S. and Saravanan, V. and Crespo, R.G. and Martinez, O.S.", TITLE = "ADCCF: Adaptive deep concatenation coder framework for visual question answering", JOURNAL = PRL, VOLUME = "152", YEAR = "2021", PAGES = "348-355", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183015"} @article{bb187733, AUTHOR = "Zhang, J. and Shao, J. and Cao, R. and Gao, L. and Xu, X. and Shen, H.T.", TITLE = "Action-Centric Relation Transformer Network for Video Question Answering", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "63-74", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183016"} @article{bb187734, AUTHOR = "Zhou, Y. and Ji, R.R. and Sun, X.S. and Su, J.S. and Meng, D.Y. and Gao, Y. and Shen, C.H.", TITLE = "Plenty is Plague: Fine-Grained Learning for Visual Question Answering", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "2", MONTH = "February", PAGES = "697-709", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183017"} @article{bb187735, AUTHOR = "E, W.N. and Zhou, Y.", TITLE = "A Mathematical Model for Universal Semantics", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "3", MONTH = "March", PAGES = "1124-1132", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183018"} @article{bb187736, AUTHOR = "Li, X.P. and Wu, B. and Song, J.K. and Gao, L.L. and Zeng, P.P. and Gan, C.", TITLE = "Text-instance graph: Exploring the relational semantics for text-based visual question answering", JOURNAL = PR, VOLUME = "124", YEAR = "2022", PAGES = "108455", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183019"} @article{bb187737, AUTHOR = "Shao, X.J. and Xiang, Z.L. and Li, Y.X.", TITLE = "Visual question answering with gated relation-aware auxiliary", JOURNAL = IET-IPR, VOLUME = "16", YEAR = "2022", NUMBER = "5", PAGES = "1424-1432", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183020"} @article{bb187738, AUTHOR = "Zhang, H. and Sun, A. and Jing, W. and Zhen, L.L. and Zhou, J.T.Y. and Goh, R.S.M.", TITLE = "Natural Language Video Localization: A Revisit in Span-Based Question Answering Framework", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "8", MONTH = "August", PAGES = "4252-4266", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183021"} @article{bb187739, AUTHOR = "Liu, Y. and Zhang, X.M. and Zhao, Z.Y. and Zhang, B. and Cheng, L. and Li, Z.", TITLE = "ALSA: Adversarial Learning of Supervised Attentions for Visual Question Answering", JOURNAL = Cyber, VOLUME = "52", YEAR = "2022", NUMBER = "6", MONTH = "June", PAGES = "4520-4533", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183022"} @article{bb187740, AUTHOR = "Wang, J.Y. and Bao, B.K. and Xu, C.S.", TITLE = "DualVGR: A Dual-Visual Graph Reasoning Unit for Video Question Answering", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "3369-3380", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183023"} @article{bb187741, AUTHOR = "Ouyang, N.L. and Huang, Q.B. and Li, P.J. and Cai, Y. and Liu, B. and Leung, H.F. and Li, Q.", TITLE = "Suppressing Biased Samples for Robust VQA", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "3405-3415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183024"} @article{bb187742, AUTHOR = "Yang, L. and Jiang, H. and Song, Q. and Guo, J.", TITLE = "A Survey on Long-Tailed Visual Recognition", JOURNAL = IJCV, VOLUME = "130", YEAR = "2022", NUMBER = "7", MONTH = "July", PAGES = "1837-1872", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183025"} @article{bb187743, AUTHOR = "Shuang, K. and Guo, J. and Wang, Z.", TITLE = "Comprehensive-perception dynamic reasoning for visual question answering", JOURNAL = PR, VOLUME = "131", YEAR = "2022", PAGES = "108878", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183026"} @article{bb187744, AUTHOR = "Gouthaman, K.V. and Mittal, A.", TITLE = "On the role of question encoder sequence model in robust visual question answering", JOURNAL = PR, VOLUME = "131", YEAR = "2022", PAGES = "108883", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183027"} @inproceedings{bb187745, AUTHOR = "Burghouts, G.J. and Huizinga, W.", TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional Refinement", BOOKTITLE = CIAP22, YEAR = "2022", PAGES = "II:418-428", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183028"} @inproceedings{bb187746, AUTHOR = "Li, Z.W. and Stengel Eskin, E. and Zhang, Y.X. and Xie, C. and Tran, Q. and van Durme, B. and Yuille, A.L.", TITLE = "Calibrating Concepts and Operations: Towards Symbolic Reasoning on Real Images", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "14890-14899", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183029"} @inproceedings{bb187747, AUTHOR = "Li, J.C. and Tang, S.L. and Zhu, L.C. and Shi, H. and Huang, X. and Wu, F. and Yang, Y. and Zhuang, Y.T.", TITLE = "Adaptive Hierarchical Graph Reasoning with Semantic Coherence for Video-and-Language Inference", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1847-1857", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183030"} @inproceedings{bb187748, AUTHOR = "Salewski, L. and Emde, C. and Do, V. and Akata, Z. and Lukasiewicz, T.", TITLE = "e-ViL: A Dataset and Benchmark for Natural Language Explanations in Vision-Language Tasks", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1224-1234", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183031"} @inproceedings{bb187749, AUTHOR = "Teney, D. and Abbasnejad, E. and van den Hengel, A.J.", TITLE = "Unshuffling Data for Improved Generalization in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1397-1407", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183032"} @inproceedings{bb187750, AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.", TITLE = "Contrast and Classify: Training Robust VQA Models", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1584-1593", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183033"} @inproceedings{bb187751, AUTHOR = "Han, X.Z. and Wang, S. and Su, C. and Huang, Q.M. and Tian, Q.", TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1564-1573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183034"} @inproceedings{bb187752, AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F. and Liang, X.D. and Ye, Q.X. and Lu, Y. and Jiao, J.B.", TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog Navigation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1574-1583", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183035"} @inproceedings{bb187753, AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.", TITLE = "Beyond Question-Based Biases: Assessing Multimodal Shortcut Learning in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1554-1563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183036"} @inproceedings{bb187754, AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.", TITLE = "On the hidden treasure of dialog in video question answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2044-2053", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183037"} @inproceedings{bb187755, AUTHOR = "Zhou, Y. and Ren, T. and Zhu, C.Y. and Sun, X.S. and Liu, J.Z. and Ding, X.H. and Xu, M.L. and Ji, R.R.", TITLE = "TRAR: Routing the Attention Spans in Transformer for Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2054-2064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183038"} @inproceedings{bb187756, AUTHOR = "Yang, X. and Gao, C. and Zhang, H. and Cai, J.F.", TITLE = "Auto-Parsing Network for Image Captioning and Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2177-2187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183039"} @inproceedings{bb187757, AUTHOR = "Banerjee, P. and Gokhale, T. and Yang, Y.Z. and Baral, C.", TITLE = "Weakly Supervised Relative Spatial Reasoning for Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1888-1898", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183040"} @inproceedings{bb187758, AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.", TITLE = "Unified Questioner Transformer for Descriptive Question Generation in Goal-Oriented Visual Dialogue", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1878-1887", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183041"} @inproceedings{bb187759, AUTHOR = "Kim, N. and Ha, S.J. and Kang, J.W.", TITLE = "Video Question Answering Using Language-Guided Deep Compressed-Domain Video Feature", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1688-1697", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183042"} @inproceedings{bb187760, AUTHOR = "Liu, F. and Liu, J. and Wang, W.N. and Lu, H.Q.", TITLE = "HAIR: Hierarchical Visual-Semantic Relational Reasoning for Video Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1678-1687", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183043"} @inproceedings{bb187761, AUTHOR = "Yang, A. and Miech, A. and Sivic, J. and Laptev, I. and Schmid, C.", TITLE = "Just Ask: Learning to Answer Questions from Millions of Narrated Videos", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1666-1677", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183044"} @inproceedings{bb187762, AUTHOR = "Gao, D.F. and Wang, R.P. and Bai, Z. and Chen, X.L.", TITLE = "Env-QA: A Video Question Answering Benchmark for Comprehensive Understanding of Dynamic Environments", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1655-1665", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183045"} @inproceedings{bb187763, AUTHOR = "Cao, Q.X. and Wan, W.T. and Wang, K. and Liang, X.D. and Lin, L.", TITLE = "Linguistically Routing Capsule Network for Out-of-distribution Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1594-1603", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183046"} @inproceedings{bb187764, AUTHOR = "Li, L.J. and Lei, J. and Gan, Z. and Liu, J.J.", TITLE = "Adversarial VQA: A New Benchmark for Evaluating the Robustness of VQA Models", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2022-2031", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183047"} @inproceedings{bb187765, AUTHOR = "Yun, H. and Yu, Y. and Yang, W. and Lee, K. and Kim, G.", TITLE = "Pano-AVQA: Grounded Audio-Visual Question Answering on 360° Videos", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2011-2021", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183048"} @inproceedings{bb187766, AUTHOR = "Askarian, N. and Abbasnejad, E. and Zukerman, I. and Buntine, W. and Haffari, G.", TITLE = "Inductive Biases for Low Data VQA: A Data Augmentation Approach", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "231-240", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183049"} @inproceedings{bb187767, AUTHOR = "Mathew, M. and Bagal, V. and Tito, R. and Karatzas, D. and Valveny, E. and Jawahar, C.V.", TITLE = "InfographicVQA", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2582-2591", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183050"} @inproceedings{bb187768, AUTHOR = "Kumar, S. and Patro, B.N. and Namboodiri, V.P.", TITLE = "Auto QA: The Question Is Not Only What, but Also Where", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "272-281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183051"} @inproceedings{bb187769, AUTHOR = "Gupta, V. and Patro, B.N. and Parihar, H. and Namboodiri, V.P.", TITLE = "VQuAD: Video Question Answering Diagnostic Dataset", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "282-291", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183052"} @inproceedings{bb187770, AUTHOR = "Kolling, C. and More, M. and Gavenski, N. and Pooch, E. and Parraga, O. and Barros, R.C.", TITLE = "Efficient Counterfactual Debiasing for Visual Question Answering", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2572-2581", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183053"} @inproceedings{bb187771, AUTHOR = "Jung, S.J. and Byun, J. and Shim, K. and Hwang, S.Y. and Kim, C.", TITLE = "Understanding VQA for Negative Answers Through Visual and Linguistic Inference", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "2873-2877", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183054"} @inproceedings{bb187772, AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.", TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the International Conference on Digital Image Computing: Techniques and Applications (DICTA 2021)", BOOKTITLE = DICTA21, YEAR = "2021", PAGES = "1-9", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183055"} @inproceedings{bb187773, AUTHOR = "Le, T. and Nguyen, H.T. and Nguyen, M.L.", TITLE = "Vision and Text Transformer for Predicting Answerability on Visual Question Answering", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "934-938", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183056"} @inproceedings{bb187774, AUTHOR = "Huang, Z.Q. and Zhu, H.Y. and Sun, Y. and Choi, D. and Tan, C. and Lim, J.H.", TITLE = "A Diagnostic Study of Visual Question Answering With Analogical Reasoning", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "2463-2467", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183057"} @inproceedings{bb187775, AUTHOR = "Chen, H.Y. and Liu, R. and Peng, B.", TITLE = "Cross-modal Relational Reasoning Network for Visual Question Answering", BOOKTITLE = MAIR2-21, YEAR = "2021", PAGES = "3939-3948", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183058"} @inproceedings{bb187776, AUTHOR = "Wang, Z.X. and Miao, Y. and Specia, L.", TITLE = "Latent Variable Models for Visual Question Answering", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3137-3141", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183059"} @inproceedings{bb187777, AUTHOR = "Hirota, Y. and Garcia, N. and Otani, M. and Chu, C. and Nakashima, Y. and Taniguchi, I. and Onoye, T.", TITLE = "Visual Question Answering with Textual Representations for Images", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3147-3150", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183060"} @inproceedings{bb187778, AUTHOR = "Nishimura, T. and Sakoda, K. and Hashimoto, A. and Ushiku, Y. and Tanaka, N. and Ono, F. and Kameko, H. and Mori, S.", TITLE = "Egocentric Biochemical Video-and-Language Dataset", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3122-3126", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183061"} @inproceedings{bb187779, AUTHOR = "Ye, K. and Kovashka, A.", TITLE = "Linguistic Structures as Weak Supervision for Visual Scene Graph Generation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8285-8295", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183062"} @inproceedings{bb187780, AUTHOR = "Lei, J. and Li, L.J. and Zhou, L. and Gan, Z. and Berg, T.L. and Bansal, M. and Liu, J.J.", TITLE = "Less is More: CLIPBERT for Video-and-Language Learning via Sparse Sampling", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7327-7337", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183063"} @inproceedings{bb187781, AUTHOR = "Yang, X. and Zhang, H. and Qi, G.J. and Cai, J.F.", TITLE = "Causal Attention for Vision-Language Tasks", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "9842-9852", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183064"} @inproceedings{bb187782, AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.", TITLE = "Learning Better Visual Dialog Agents with Pretrained Visual-Linguistic Representation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "5618-5627", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183065"} @inproceedings{bb187783, AUTHOR = "Xiao, J.B. and Shang, X. and Yao, A. and Chua, T.S.", TITLE = "NExT-QA: Next Phase of Question-Answering to Explaining Temporal Actions", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "9772-9781", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183066"} @inproceedings{bb187784, AUTHOR = "Xu, L. and Huang, H. and Liu, J.", TITLE = "SUTD-TrafficQA: A Question Answering Benchmark and an Efficient Network for Video Reasoning over Traffic Events", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "9873-9883", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183067"} @inproceedings{bb187785, AUTHOR = "Chen, X.Y. and Jiang, M. and Zhao, Q.", TITLE = "Predicting Human Scanpaths in Visual Question Answering", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "10871-10880", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183068"} @inproceedings{bb187786, AUTHOR = "Qi, Y.G. and Zhang, K. and Sain, A. and Song, Y.Z.", TITLE = "PQA: Perceptual Question Answering", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12051-12059", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183069"} @inproceedings{bb187787, AUTHOR = "Park, J. and Lee, J.Y. and Sohn, K.H.", TITLE = "Bridge to Answer: Structure-aware Graph Interaction Network for Video Question Answering", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "15521-15530", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183070"} @inproceedings{bb187788, AUTHOR = "Yuan, Y.Y. and Wang, S. and Jiang, M.Y. and Chen, T.Y.", TITLE = "Perception Matters: Detecting Perception Failures of VQA Models Using Metamorphic Testing", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "16903-16912", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183071"} @inproceedings{bb187789, AUTHOR = "Marino, K. and Chen, X.L. and Parikh, D. and Gupta, A. and Rohrbach, M.", TITLE = "KRISP: Integrating Implicit and Symbolic Knowledge for Open-Domain Knowledge-Based VQA", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "14106-14116", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183072"} @inproceedings{bb187790, AUTHOR = "Niu, Y. and Tang, K. and Zhang, H. and Lu, Z.W. and Hua, X.S. and Wen, J.R.", TITLE = "Counterfactual VQA: A Cause-Effect Look at Language Bias", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12695-12705", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183073"} @inproceedings{bb187791, AUTHOR = "Yang, Z.Y. and Lu, Y. and Wang, J.F. and Yin, X. and Florencio, D. and Wang, L. and Zhang, C. and Zhang, L. and Luo, J.B.", TITLE = "TAP: Text-Aware Pre-training for Text-VQA and Text-Caption", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8747-8757", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183074"} @inproceedings{bb187792, AUTHOR = "Zhang, M. and Maidment, T. and Diab, A. and Kovashka, A. and Hwa, R.", TITLE = "Domain-robust VQA with diverse datasets and methods but no target labels", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7042-7052", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183075"} @inproceedings{bb187793, AUTHOR = "Kervadec, C. and Jaunet, T. and Antipov, G. and Baccouche, M. and Vuillemot, R. and Wolf, C.", TITLE = "How Transferable are Reasoning Patterns in VQA?", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "4205-4214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183076"} @inproceedings{bb187794, AUTHOR = "Kervadec, C. and Antipov, G. and Baccouche, M. and Wolf, C.", TITLE = "Roses are Red, Violets are Blue… But Should VQA expect Them To?", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "2775-2784", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183077"} @inproceedings{bb187795, AUTHOR = "Cho, J.W. and Kim, D.J. and Choi, J. and Jung, Y. and Kweon, I.S.", TITLE = "Dealing with Missing Modalities in the Visual Question Answer-Difference Prediction Task through Knowledge Distillation", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1592-1601", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183078"} @inproceedings{bb187796, AUTHOR = "Dua, R. and Kancheti, S.S. and Balasubramanian, V.N.", TITLE = "Beyond VQA: Generating Multi-word Answers and Rationales to Visual Questions", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1623-1632", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183079"} @inproceedings{bb187797, AUTHOR = "Rahman, T. and Chou, S.H. and Sigal, L. and Carenini, G.", TITLE = "An Improved Attention for Visual Question Answering", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1653-1662", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183080"} @inproceedings{bb187798, AUTHOR = "Mathew, M. and Karatzas, D. and Jawahar, C.V.", TITLE = "DocVQA: A Dataset for VQA on Document Images", BOOKTITLE = WACV21, YEAR = "2021", PAGES = "2199-2208", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183081"} @inproceedings{bb187799, AUTHOR = "Chen, X.W. and Liu, R. and Song, X.M. and Han, Y.H.", TITLE = "Locating Visual Explanations for Video Question Answering", BOOKTITLE = MMMod21, YEAR = "2021", PAGES = "I:290-302", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT183082"}