@article{bb241100,
AUTHOR = "Wu, Q. and Shen, C.H. and Wang, P. and Dick, A. and van den Hengel, A.J.",
TITLE = "Image Captioning and Visual Question Answering Based on Attributes
and External Knowledge",
JOURNAL = PAMI,
VOLUME = "40",
YEAR = "2018",
NUMBER = "6",
MONTH = "June",
PAGES = "1367-1381",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236017"}
@inproceedings{bb241101,
AUTHOR = "Wu, Q. and Wang, P. and Shen, C.H. and Dick, A. and van den Hengel, A.J.",
TITLE = "Ask Me Anything: Free-Form Visual Question Answering Based on
Knowledge from External Sources",
BOOKTITLE = CVPR16,
YEAR = "2016",
PAGES = "4622-4630",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236018"}
@article{bb241102,
AUTHOR = "Tommasi, T. and Mallya, A. and Plummer, B.A. and Lazebnik, S. and Berg, A.C. and Berg, T.L.",
TITLE = "Combining Multiple Cues for Visual Madlibs Question Answering",
JOURNAL = IJCV,
VOLUME = "127",
YEAR = "2019",
NUMBER = "1",
MONTH = "January",
PAGES = "38-60",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236019"}
@inproceedings{bb241103,
AUTHOR = "Tommasi, T. and Mallya, A. and Plummer, B.A. and Lazebnik, S. and Berg, A.C. and Berg, T.L.",
TITLE = "Solving Visual Madlibs with Multiple Cues",
BOOKTITLE = BMVC16,
YEAR = "2016",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236020"}
@inproceedings{bb241104,
AUTHOR = "Yu, L.C. and Park, E. and Berg, A.C. and Berg, T.L.",
TITLE = "Visual Madlibs:
Fill in the Blank Description Generation and Question Answering",
BOOKTITLE = ICCV15,
YEAR = "2015",
PAGES = "2461-2469",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236021"}
@article{bb241105,
AUTHOR = "Liu, F. and Xiang, T. and Hospedales, T.M. and Yang, W.K. and Sun, C.Y.",
TITLE = "Inverse Visual Question Answering:
A New Benchmark and VQA Diagnosis Tool",
JOURNAL = PAMI,
VOLUME = "42",
YEAR = "2020",
NUMBER = "2",
MONTH = "February",
PAGES = "460-474",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236022"}
@inproceedings{bb241106,
AUTHOR = "Liu, F. and Xiang, T. and Hospedales, T.M. and Yang, W.K. and Sun, C.Y.",
TITLE = "iVQA: Inverse Visual Question Answering",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "8611-8619",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236023"}
@article{bb241107,
AUTHOR = "Patil, C. and Patwardhan, M.",
TITLE = "Visual Question Generation: The State of the Art",
JOURNAL = Surveys,
VOLUME = "53",
YEAR = "2020",
NUMBER = "3",
MONTH = "May",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236024"}
@article{bb241108,
AUTHOR = "He, F.J. and Wang, Y.X. and Miao, X.L. and Sun, X.",
TITLE = "Interpretable visual reasoning: A survey",
JOURNAL = IVC,
VOLUME = "112",
YEAR = "2021",
PAGES = "104194",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236025"}
@article{bb241109,
AUTHOR = "Sharma, H. and Jalal, A.S.",
TITLE = "A survey of methods, datasets and evaluation metrics for visual
question answering",
JOURNAL = IVC,
VOLUME = "116",
YEAR = "2021",
PAGES = "104327",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236026"}
@article{bb241110,
AUTHOR = "Yang, L. and Jiang, H. and Song, Q. and Guo, J.",
TITLE = "A Survey on Long-Tailed Visual Recognition",
JOURNAL = IJCV,
VOLUME = "130",
YEAR = "2022",
NUMBER = "7",
MONTH = "July",
PAGES = "1837-1872",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236027"}
@article{bb241111,
AUTHOR = "Zhao, W.L. and Rao, Y.M. and Tang, Y.S. and Zhou, J. and Lu, J.W.",
TITLE = "VideoABC: A Real-World Video Dataset for Abductive Visual Reasoning",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "6048-6061",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236028"}
@article{bb241112,
AUTHOR = "Lahouti, F. and Kostina, V. and Hassibi, B.",
TITLE = "How to Query an Oracle? Efficient Strategies to Label Data",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "11",
MONTH = "November",
PAGES = "7597-7609",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236029"}
@article{bb241113,
AUTHOR = "Ma, J. and Wang, P.H. and Kong, D.C. and Wang, Z.W. and Liu, J. and Pei, H.B. and Zhao, J.Z.",
TITLE = "Robust Visual Question Answering: Datasets, Methods, and Future
Challenges",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "8",
MONTH = "August",
PAGES = "5575-5594",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236030"}
@article{bb241114,
AUTHOR = "Li, K. and Vosselman, G. and Yang, M.Y.",
TITLE = "HRVQA: A Visual Question Answering benchmark for high-resolution
aerial images",
JOURNAL = PandRS,
VOLUME = "214",
YEAR = "2024",
PAGES = "65-81",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236031"}
@inproceedings{bb241115,
AUTHOR = "Chen, C.Y. and Liu, M.C. and Codella, N. and Li, Y.S. and Yuan, L. and Gurari, D.",
TITLE = "Fully Authentic Visual Question Answering Dataset from Online
Communities",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 252-269",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236032"}
@inproceedings{bb241116,
AUTHOR = "Singh, M. and Patvardhan, C. and Lakshmi, C.V.",
TITLE = "Does ChatGPT Spell the End of Automatic Question Generation Research?",
BOOKTITLE = ICCVMI23,
YEAR = "2023",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236033"}
@inproceedings{bb241117,
AUTHOR = "Zhu, L. and Ning, R. and Li, J. and Xin, C.S. and Wu, H.Y.",
TITLE = "Most and Least Retrievable Images in Visual-Language Query Systems",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVII:1-18",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236034"}
@inproceedings{bb241118,
AUTHOR = "Salewski, L. and Emde, C. and Do, V. and Akata, Z. and Lukasiewicz, T.",
TITLE = "e-ViL: A Dataset and Benchmark for Natural Language Explanations in
Vision-Language Tasks",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1224-1234",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236035"}
@inproceedings{bb241119,
AUTHOR = "Gupta, V. and Patro, B.N. and Parihar, H. and Namboodiri, V.P.",
TITLE = "VQuAD: Video Question Answering Diagnostic Dataset",
BOOKTITLE = Novelty22,
YEAR = "2022",
PAGES = "282-291",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236036"}
@inproceedings{bb241120,
AUTHOR = "Nishimura, T. and Sakoda, K. and Hashimoto, A. and Ushiku, Y. and Tanaka, N. and Ono, F. and Kameko, H. and Mori, S.",
TITLE = "Egocentric Biochemical Video-and-Language Dataset",
BOOKTITLE = CLVL21,
YEAR = "2021",
PAGES = "3122-3126",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236037"}
@inproceedings{bb241121,
AUTHOR = "Zhang, M. and Maidment, T. and Diab, A. and Kovashka, A. and Hwa, R.",
TITLE = "Domain-robust VQA with diverse datasets and methods but no target
labels",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "7042-7052",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236038"}
@inproceedings{bb241122,
AUTHOR = "Mathew, M. and Karatzas, D. and Jawahar, C.V.",
TITLE = "DocVQA: A Dataset for VQA on Document Images",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "2199-2208",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236039"}
@inproceedings{bb241123,
AUTHOR = "Patel, D. and Parikh, R. and Shastri, Y.",
TITLE = "Recent Advances in Video Question Answering:
A Review of Datasets and Methods",
BOOKTITLE = VTIUR20,
YEAR = "2020",
PAGES = "339-356",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236040"}
@inproceedings{bb241124,
AUTHOR = "Fan, C.",
TITLE = "EgoVQA: An Egocentric Video Question Answering Benchmark Dataset",
BOOKTITLE = EPIC19,
YEAR = "2019",
PAGES = "4359-4366",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236041"}
@inproceedings{bb241125,
AUTHOR = "Hudson, D.A. and Manning, C.D.",
TITLE = "GQA: A New Dataset for Real-World Visual Reasoning and Compositional
Question Answering",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6693-6702",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236042"}
@inproceedings{bb241126,
AUTHOR = "Yang, G.Y.R. and Ganichev, I. and Wang, X.J. and Shlens, J. and Sussillo, D.",
TITLE = "A Dataset and Architecture for Visual Reasoning with a Working Memory",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "X: 729-745",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236043"}
@inproceedings{bb241127,
AUTHOR = "Gan, C. and Li, Y. and Li, H. and Sun, C. and Gong, B.",
TITLE = "VQS: Linking Segmentations to Questions and Answers for Supervised
Attention in VQA and Question-Focused Semantic Segmentation",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "1829-1838",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236044"}
@inproceedings{bb241128,
AUTHOR = "Maharaj, T. and Ballas, N. and Rohrbach, A. and Courville, A. and Pal, C.",
TITLE = "A Dataset and Exploration of Models for Understanding Video Data
through Fill-in-the-Blank Question-Answering",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "7359-7368",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT236045"}
@article{bb241129,
AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Lee, S. and Moura, J.M.F. and Parikh, D. and Batra, D.",
TITLE = "Visual Dialog",
JOURNAL = PAMI,
VOLUME = "41",
YEAR = "2019",
NUMBER = "5",
MONTH = "May",
PAGES = "1242-1256",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236046"}
@article{bb241130,
AUTHOR = "Zhao, Z. and Zhang, Z. and Jiang, X.H. and Cai, D.",
TITLE = "Multi-Turn Video Question Answering via Hierarchical Attention
Context Reinforced Networks",
JOURNAL = IP,
VOLUME = "28",
YEAR = "2019",
NUMBER = "8",
MONTH = "August",
PAGES = "3860-3872",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236047"}
@article{bb241131,
AUTHOR = "Gu, M. and Zhao, Z. and Jin, W. and Cai, D. and Wu, F.",
TITLE = "Video Dialog via Multi-Grained Convolutional Self-Attention Context
Multi-Modal Networks",
JOURNAL = CirSysVideo,
VOLUME = "30",
YEAR = "2020",
NUMBER = "12",
MONTH = "December",
PAGES = "4453-4466",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236048"}
@article{bb241132,
AUTHOR = "Guo, D. and Wang, H. and Wang, S. and Wang, M.",
TITLE = "Textual-Visual Reference-Aware Attention Network for Visual Dialog",
JOURNAL = IP,
VOLUME = "29",
YEAR = "2020",
PAGES = "6655-6666",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236049"}
@article{bb241133,
AUTHOR = "Patro, B.N. and Anupriy and Namboodiri, V.P.",
TITLE = "Probabilistic framework for solving visual dialog",
JOURNAL = PR,
VOLUME = "110",
YEAR = "2021",
PAGES = "107586",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236050"}
@article{bb241134,
AUTHOR = "Zhao, L. and Lyu, X.Y. and Song, J.K. and Gao, L.L.",
TITLE = "GuessWhich? Visual dialog with attentive memory network",
JOURNAL = PR,
VOLUME = "114",
YEAR = "2021",
PAGES = "107823",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236051"}
@article{bb241135,
AUTHOR = "Jiang, T.L. and Shao, H.L. and Tian, X. and Ji, Y. and Liu, C.P.",
TITLE = "Aligning vision-language for graph inference in visual dialog",
JOURNAL = IVC,
VOLUME = "116",
YEAR = "2021",
PAGES = "104316",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236052"}
@article{bb241136,
AUTHOR = "Guo, D. and Wang, H. and Wang, M.",
TITLE = "Context-Aware Graph Inference With Knowledge Distillation for Visual
Dialog",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6056-6073",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236053"}
@inproceedings{bb241137,
AUTHOR = "Guo, D. and Wang, H. and Zhang, H.W. and Zha, Z.J. and Wang, M.",
TITLE = "Iterative Context-Aware Graph Inference for Visual Dialog",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10052-10061",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236054"}
@article{bb241138,
AUTHOR = "Patro, B.N. and Anupriy and Namboodiri, V.P.",
TITLE = "Explanation vs. attention: A two-player game to obtain attention for
VQA and visual dialog",
JOURNAL = PR,
VOLUME = "132",
YEAR = "2022",
PAGES = "108898",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236055"}
@article{bb241139,
AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.",
TITLE = "Saying the Unseen: Video Descriptions via Dialog Agents",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "7190-7204",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236056"}
@article{bb241140,
AUTHOR = "Huang, Y. and Wang, Y.M. and Wang, L.",
TITLE = "Efficient Image and Sentence Matching",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "3",
MONTH = "March",
PAGES = "2970-2983",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236057"}
@article{bb241141,
AUTHOR = "Zhao, L. and Li, J.L. and Gao, L.L. and Rao, Y. and Song, J.K. and Shen, H.T.",
TITLE = "Heterogeneous Knowledge Network for Visual Dialog",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "2",
MONTH = "February",
PAGES = "861-871",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236058"}
@article{bb241142,
AUTHOR = "Bucinca, Z. and Yemez, Y. and Erzin, E. and Sezgin, M.",
TITLE = "AffectON: Incorporating Affect Into Dialog Generation",
JOURNAL = AffCom,
VOLUME = "14",
YEAR = "2023",
NUMBER = "1",
MONTH = "January",
PAGES = "823-835",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236059"}
@article{bb241143,
AUTHOR = "Yu, H. and Ko, Y.J.",
TITLE = "Enriching the dialogue state tracking model with a asyntactic
discourse graph",
JOURNAL = PRL,
VOLUME = "169",
YEAR = "2023",
PAGES = "81-86",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236060"}
@article{bb241144,
AUTHOR = "Wu, Y.X. and Liao, L. and Zhang, G.Y. and Lei, W.Q. and Zhao, G.S. and Qian, X.M. and Chua, T.S.",
TITLE = "State Graph Reasoning for Multimodal Conversational Recommendation",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "3113-3124",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236061"}
@article{bb241145,
AUTHOR = "Firdaus, M. and Thangavelu, N. and Ekbal, A. and Bhattacharyya, P.",
TITLE = "I Enjoy Writing and Playing, Do You?: A Personalized and Emotion
Grounded Dialogue Agent Using Generative Adversarial Network",
JOURNAL = AffCom,
VOLUME = "14",
YEAR = "2023",
NUMBER = "3",
MONTH = "July",
PAGES = "2127-2138",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236062"}
@article{bb241146,
AUTHOR = "Zhang, Z. and Li, S. and Ji, Y. and Liu, C.P.",
TITLE = "Infer unseen from seen: Relation regularized zero-shot visual dialog",
JOURNAL = JVCIR,
VOLUME = "97",
YEAR = "2023",
PAGES = "103961",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236063"}
@article{bb241147,
AUTHOR = "Qi, Q.S. and Zhang, A. and Liao, Y. and Sun, W.Y. and Wang, Y.L. and Li, X.B. and Liu, S.",
TITLE = "Simultaneously Training and Compressing Vision-and-Language
Pre-Training Model",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "8194-8203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236064"}
@article{bb241148,
AUTHOR = "Liu, A.A. and Huang, C.X. and Xu, N. and Tian, H.S. and Liu, J. and Zhang, Y.D.",
TITLE = "Counterfactual Visual Dialog: Robust Commonsense Knowledge Learning
From Unbiased Training",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "1639-1651",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236065"}
@article{bb241149,
AUTHOR = "Ricci, R. and Bazi, Y. and Melgani, F.",
TITLE = "Machine-to-Machine Visual Dialoguing with ChatGPT for Enriched
Textual Image Description",
JOURNAL = RS,
VOLUME = "16",
YEAR = "2024",
NUMBER = "3",
PAGES = "441",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236066"}
@article{bb241150,
AUTHOR = "Bulat, A. and Tzimiropoulos, G.",
TITLE = "Language-Aware Soft Prompting: Text-to-Text Optimization for Few- and
Zero-Shot Adaptation of V&L Models",
JOURNAL = IJCV,
VOLUME = "132",
YEAR = "2024",
NUMBER = "4",
MONTH = "April",
PAGES = "1108-1125",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236067"}
@inproceedings{bb241151,
AUTHOR = "Bulat, A. and Tzimiropoulos, G.",
TITLE = "LASP: Text-to-Text Optimization for Language-Aware Soft Prompting of
Vision and Language Models",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23232-23241",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236068"}
@article{bb241152,
AUTHOR = "Wang, A.J.P. and Zhou, P. and Shou, M.Z. and Yan, S.C.",
TITLE = "Enhancing Visual Grounding in Vision-Language Pre-Training With
Position-Guided Text Prompts",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "3406-3421",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236069"}
@inproceedings{bb241153,
AUTHOR = "Wang, A.J.P. and Zhou, P. and Shou, M.Z. and Yan, S.C.",
TITLE = "Position-Guided Text Prompt for Vision-Language Pre-Training",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23242-23251",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236070"}
@article{bb241154,
AUTHOR = "Du, S.S. and Wang, H. and Li, T.P. and Chen, C.W.",
TITLE = "Hybrid Graph Reasoning With Dynamic Interaction for Visual Dialog",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "9095-9108",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236071"}
@article{bb241155,
AUTHOR = "Sun, J.T. and Kou, J.Y. and Hou, W. and Bai, Y.",
TITLE = "A multi-agent curiosity reward model for task-oriented dialogue
systems",
JOURNAL = PR,
VOLUME = "157",
YEAR = "2025",
PAGES = "110884",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236072"}
@article{bb241156,
AUTHOR = "Kane, B. and Giugno, C. and Schubert, L. and Haut, K. and Wohn, C. and Hoque, E.",
TITLE = "Managing Emotional Dialogue for a Virtual Cancer Patient:
A Schema-Guided Approach",
JOURNAL = AffCom,
VOLUME = "15",
YEAR = "2024",
NUMBER = "3",
MONTH = "July",
PAGES = "1041-1052",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236073"}
@article{bb241157,
AUTHOR = "Xie, J.Y. and Chen, J.L. and Liu, Z.H. and Cai, Y. and Huang, Q. and Li, Q.",
TITLE = "Video Question Generation for Dynamic Changes",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "8710-8721",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236074"}
@article{bb241158,
AUTHOR = "Liu, Y.T. and Li, L. and Tu, Y. and Zhang, B.C. and Zha, Z.J. and Huang, Q.M.",
TITLE = "Dynamic Strategy Prompt Reasoning for Emotional Support Conversation",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "108-119",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236075"}
@article{bb241159,
AUTHOR = "Janssens, R. and Wolfert, P. and Demeester, T. and Belpaeme, T.",
TITLE = "Integrating Visual Context Into Language Models for Situated Social
Conversation Starters",
JOURNAL = AffCom,
VOLUME = "16",
YEAR = "2025",
NUMBER = "1",
MONTH = "January",
PAGES = "223-236",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236076"}
@article{bb241160,
AUTHOR = "Ju, X.C. and Zhang, D. and Li, J.H. and Li, S. and Zhou, G.D.",
TITLE = "Enhanced Generative Framework With LLMs for Multimodal Emotion-Cause
Pair Extraction in Conversations",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "4924-4935",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236077"}
@article{bb241161,
AUTHOR = "Liao, N. and Shi, B. and Zhang, X.P. and Cao, M. and Yan, J.C. and Tian, Q.",
TITLE = "Rethinking visual prompt learning as masked visual token modeling",
JOURNAL = AI,
VOLUME = "348",
YEAR = "2025",
PAGES = "104417",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236078"}
@article{bb241162,
AUTHOR = "Hu, Z. and Deng, J. and Nakagawa, S. and Zhuang, Y. and Zhang, X.Y. and Cai, S. and Ren, F.",
TITLE = "Enhanced Emotion Recognition in Conversations Through Hybrid Context
Encoding and Latent Dependency Mining",
JOURNAL = AffCom,
VOLUME = "16",
YEAR = "2025",
NUMBER = "4",
MONTH = "October",
PAGES = "3329-3341",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236079"}
@inproceedings{bb241163,
AUTHOR = "Nedunuri, U. and Hamelin, N. and Gupta, A.D. and Guha, D.",
TITLE = "Exploring Emotional Engagement with Responsible AI Constructs:
A Video-Based Cognitive Experiment",
BOOKTITLE = ICIVC25,
YEAR = "2025",
PAGES = "597-604",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236080"}
@inproceedings{bb241164,
AUTHOR = "Abdessaied, A. and Rohrbach, A. and Rohrbach, M. and Bulling, A.",
TITLE = "V2 Dial: Unification of Video and Visual Dialog via Multimodal
Experts",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8637-8647",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236081"}
@inproceedings{bb241165,
AUTHOR = "Lin, J. and Feng, Y. and Liu, W. and Black, M.J.",
TITLE = "ChatHuman: Chatting about 3D Humans with Tools",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8150-8161",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236082"}
@inproceedings{bb241166,
AUTHOR = "Bai, Y. and Ji, Y.C. and Cao, M. and Wang, J.Q. and Ye, M.",
TITLE = "Chat-based Person Retrieval via Dialogue-Refined Cross-Modal
Alignment",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3952-3962",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236083"}
@inproceedings{bb241167,
AUTHOR = "Chou, C. and Dunlap, L. and Mashita, K. and Mandal, K. and Darrell, T.J. and Stoica, I. and Gonzalez, J.E. and Chiang, W.L.",
TITLE = "VisionArena: 230K Real World User-VLM Conversations with Preference
Labels",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3877-3887",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236084"}
@inproceedings{bb241168,
AUTHOR = "Soni, S. and Dudhane, A. and Debary, H. and Fiaz, M. and Munir, M.A. and Danish, M.S. and Fraccaro, P. and Watson, C.D. and Klein, L.J. and Khan, F.S. and Khan, S.",
TITLE = "EarthDial: Turning Multi-sensory Earth Observations to Interactive
Dialogues",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14303-14313",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236085"}
@inproceedings{bb241169,
AUTHOR = "Wang, H.Y. and Wang, L. and Zhou, S.P. and Tian, J.Y. and Qin, Z. and Wang, Y.B. and Hua, G. and Tang, W.",
TITLE = "Towards Precise Embodied Dialogue Localization via Causality Guided
Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13350-13360",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236086"}
@inproceedings{bb241170,
AUTHOR = "Jiang, B. and Chen, X. and Zhang, C. and Yin, F. and Li, Z.Y. and Yu, G. and Fan, J.Y.",
TITLE = "Motionchain: Conversational Motion Controllers via Multimodal Prompts",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXVI: 54-74",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236087"}
@inproceedings{bb241171,
AUTHOR = "Haydarov, K. and Shen, X.Q. and Madasu, A. and Salem, M. and Li, L.J. and Elsayed, G. and Elhoseiny, M.",
TITLE = "Affective Visual Dialog: A Large-scale Benchmark for Emotional
Reasoning Based on Visually Grounded Conversations",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXV: 18-36",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236088"}
@inproceedings{bb241172,
AUTHOR = "Abdessaied, A. and Shi, L. and Bulling, A.",
TITLE = "Multi-modal Video Dialog State Tracking in the Wild",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVII: 348-365",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236089"}
@inproceedings{bb241173,
AUTHOR = "Yoon, H.S. and Yoon, E. and Tee, J.T.J. and Zhang, K. and Heo, Y.J. and Chang, D.S. and Yoo, C.D.",
TITLE = "BI-MDRG: Bridging Image History in Multimodal Dialogue Response
Generation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXXI: 378-396",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236090"}
@inproceedings{bb241174,
AUTHOR = "He, Q.Q. and Zhang, J. and Qian, S.W. and Wang, C.J.",
TITLE = "Some Can Be Better than All:
Multimodal Star Transformer for Visual Dialog",
BOOKTITLE = ICIP24,
YEAR = "2024",
PAGES = "2022-2026",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236091"}
@inproceedings{bb241175,
AUTHOR = "Chen, Y. and Sikka, K. and Cogswell, M. and Ji, H. and Divakaran, A.",
TITLE = "DRESS: Instructing Large Vision-Language Models to Align and
Interact with Humans via Natural Language Feedback",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14239-14250",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236092"}
@inproceedings{bb241176,
AUTHOR = "Abdessaied, A. and Shi, L. and Bulling, A.",
TITLE = "VD-GR: Boosting Visual Dialog with Cascaded Spatial-Temporal
Multi-Modal GRaphs",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "5793-5802",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236093"}
@inproceedings{bb241177,
AUTHOR = "Han, S.J. and Hessel, J. and Dziri, N. and Choi, Y. and Yu, Y.J.",
TITLE = "Champagne: Learning Real-world Conversation from Large-Scale Web
Videos",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15452-15463",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236094"}
@inproceedings{bb241178,
AUTHOR = "Oshima, R. and Shinagawa, S. and Tsunashima, H. and Feng, Q. and Morishima, S.",
TITLE = "Pointing out Human Answer Mistakes in a Goal-Oriented Visual Dialogue",
BOOKTITLE = VLAR23,
YEAR = "2023",
PAGES = "4665-4670",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236095"}
@inproceedings{bb241179,
AUTHOR = "Ishii, T. and Miura, J. and Hayashi, K.",
TITLE = "Enhancing Human-Robot Collaborative Object Search through Human
Behavior Observation and Dialog",
BOOKTITLE = ACVR23,
YEAR = "2023",
PAGES = "1841-1848",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236096"}
@inproceedings{bb241180,
AUTHOR = "Madasu, A. and Lal, V.",
TITLE = "Is Multimodal Vision Supervision Beneficial to Language?",
BOOKTITLE = NFVLR23,
YEAR = "2023",
PAGES = "2637-2642",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236097"}
@inproceedings{bb241181,
AUTHOR = "Ashutosh, K. and Girdhar, R. and Torresani, L. and Grauman, K.",
TITLE = "HierVL: Learning Hierarchical Video-Language Embeddings",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23066-23078",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236098"}
@inproceedings{bb241182,
AUTHOR = "Smith, J.S. and Cascante Bonilla, P. and Arbelle, A. and Kim, D.H. and Panda, R. and Cox, D. and Yang, D. and Kira, Z. and Feris, R.S. and Karlinsky, L.",
TITLE = "ConStruct-VL: Data-Free Continual Structured VL Concepts Learning*",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "14994-15004",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236099"}
@inproceedings{bb241183,
AUTHOR = "Chen, Y.X. and Ma, Z.Y. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.",
TITLE = "ViLEM: Visual-Language Error Modeling for Image-Text Retrieval",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11018-11027",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236100"}
@inproceedings{bb241184,
AUTHOR = "Huang, J.J. and Li, Y. and Feng, J.S. and Wu, X.L. and Sun, X.S. and Ji, R.R.",
TITLE = "Clover: Towards A Unified Video-Language Alignment and Fusion Model",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "14856-14866",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236101"}
@inproceedings{bb241185,
AUTHOR = "Li, C.H. and Li, Z. and Jing, C.C. and Jia, Y.D. and Wu, Y.W.",
TITLE = "Exploring the Effect of Primitives for Compositional Generalization
in Vision-and-Language",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "19092-19101",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236102"}
@inproceedings{bb241186,
AUTHOR = "Yao, H.T. and Zhang, R. and Xu, C.S.",
TITLE = "Visual-Language Prompt Tuning with Knowledge-Guided Context
Optimization",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6757-6767",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236103"}
@inproceedings{bb241187,
AUTHOR = "Kwon, H. and Song, T. and Jeong, S. and Kim, J. and Jang, J. and Sohn, K.H.",
TITLE = "Probabilistic Prompt Learning for Dense Prediction",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6768-6777",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236104"}
@inproceedings{bb241188,
AUTHOR = "Luo, H.C. and Zhai, W. and Zhang, J. and Cao, Y. and Tao, D.C.",
TITLE = "Leverage Interactive Affinity for Affordance Learning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6809-6819",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236105"}
@inproceedings{bb241189,
AUTHOR = "Bagad, P. and Tapaswi, M. and Snoek, C.G.M.",
TITLE = "Test of Time: Instilling Video-Language Models with a Sense of Time",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "2503-2516",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236106"}
@inproceedings{bb241190,
AUTHOR = "Kang, G.C. and Kim, S. and Kim, J.H. and Kwak, D.H. and Zhang, B.T.",
TITLE = "The Dialog Must Go On: Improving Visual Dialog via Generative
Self-Training",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6746-6756",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236107"}
@inproceedings{bb241191,
AUTHOR = "Bannur, S. and Hyland, S. and Liu, Q. and Perez Garcia, F. and Ilse, M. and Castro, D.C. and Boecking, B. and Sharma, H. and Bouzid, K. and Thieme, A. and Schwaighofer, A. and Wetscherek, M. and Lungren, M.P. and Nori, A. and Alvarez Valle, J. and Oktay, O.",
TITLE = "Learning to Exploit Temporal Structure for Biomedical Vision-Language
Processing",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "15016-15027",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236108"}
@inproceedings{bb241192,
AUTHOR = "Srinivasan, T. and Ren, X. and Thomason, J.",
TITLE = "Curriculum Learning for Data-Efficient Vision-Language Alignment",
BOOKTITLE = ODRUM23,
YEAR = "2023",
PAGES = "5619-5624",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236109"}
@inproceedings{bb241193,
AUTHOR = "Ibing, M. and Lim, I. and Kobbelt, L.",
TITLE = "Localized Latent Updates for Fine-Tuning Vision-Language Models",
BOOKTITLE = ECV23,
YEAR = "2023",
PAGES = "4509-4518",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236110"}
@inproceedings{bb241194,
AUTHOR = "Zhou, Y.T. and Shimada, N.",
TITLE = "Vision + Language Applications: A Survey",
BOOKTITLE = GCV23,
YEAR = "2023",
PAGES = "826-842",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236111"}
@inproceedings{bb241195,
AUTHOR = "Parisot, S. and Yang, Y.X. and McDonagh, S.",
TITLE = "Learning to Name Classes for Vision and Language Models",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23477-23486",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236112"}
@inproceedings{bb241196,
AUTHOR = "Kim, S. and Jo, D. and Lee, D. and Kim, J.",
TITLE = "MAGVLT: Masked Generative Vision-and-Language Transformer",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23338-23348",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236113"}
@inproceedings{bb241197,
AUTHOR = "Ji, Y. and Wang, J.J. and Gong, Y. and Zhang, L. and Zhu, Y. and Wang, H.F. and Zhang, J.X. and Sakai, T. and Yang, Y.",
TITLE = "MAP: Multimodal Uncertainty-Aware Vision-Language Pre-training Model",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23262-23271",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236114"}
@inproceedings{bb241198,
AUTHOR = "Zhang, X. and Wang, W. and Chen, Z. and Xu, Y.F. and Zhang, J. and Tao, D.C.",
TITLE = "CLAMP: Prompt-based Contrastive Learning for Connecting Language and
Animal Pose",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23272-23281",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236115"}
@inproceedings{bb241199,
AUTHOR = "Wang, T. and Ge, Y.X. and Zheng, F. and Cheng, R. and Shan, Y. and Qie, X. and Luo, P.",
TITLE = "Accelerating Vision-Language Pretraining with Free Language Modeling",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23161-23170",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236116"}
Last update:Apr 6, 2026 at 11:28:57