@inproceedings{bb235300,
        AUTHOR = "Renz, K. and Chen, L. and Arani, E. and Sinavski, O.",
        TITLE = "SimLingo: Vision-Only Closed-Loop Autonomous Driving with
Language-Action Alignment",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "11993-12003",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230275"}

@inproceedings{bb235301,
        AUTHOR = "Zhang, Z.Y. and Li, X.F. and Xu, Z.H. and Peng, W.J. and Zhou, Z.J. and Shi, M.J. and Huang, S.P.",
        TITLE = "MPDrive: Improving Spatial Understanding with Marker-Based Prompt
Learning for Autonomous Driving",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "12089-12099",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230276"}

@inproceedings{bb235302,
        AUTHOR = "Xu, Z.H. and Bai, Y. and Zhang, Y.J. and Li, Z.L. and Xia, F. and Wong, K.Y.K. and Wang, J.Q. and Zhao, H.S.",
        TITLE = "DriveGPT4-V2: Harnessing Large Language Model Capabilities for
Enhanced Closed-Loop Autonomous Driving",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "17261-17270",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230277"}

@inproceedings{bb235303,
        AUTHOR = "Hegde, D. and Yasarla, R. and Cai, H. and Han, S.Z. and Bhattacharyya, A. and Mahajan, S. and Liu, L.T. and Garrepalli, R. and Patel, V.M. and Porikli, F.M.",
        TITLE = "Distilling Multi-Modal Large Language Models for Autonomous Driving",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "27575-27585",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230278"}

@inproceedings{bb235304,
        AUTHOR = "Chen, Y. and Ding, Z.H. and Wang, Z.Q. and Wang, Y. and Zhang, L.J. and Liu, S.",
        TITLE = "Asynchronous Large Language Model Enhanced Planner for Autonomous
Driving",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXVI: 22-38",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230279"}

@inproceedings{bb235305,
        AUTHOR = "Li, B. and Wang, Y. and Mao, J. and Ivanovic, B. and Veer, S. and Leung, K. and Pavone, M.",
        TITLE = "Driving Everywhere with Large Language Model Policy Adaptation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14948-14957",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230280"}

@inproceedings{bb235306,
        AUTHOR = "Wei, Y.X. and Wang, Z. and Lu, Y.F. and Xu, C.X. and Liu, C.X. and Zhao, H. and Chen, S. and Wang, Y.F.",
        TITLE = "Editable Scene Simulation for Autonomous Driving via Collaborative
LLM-Agents",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15077-15087",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230281"}

@inproceedings{bb235307,
        AUTHOR = "Shao, H. and Hu, Y.X. and Wang, L. and Song, G.L. and Waslander, S.L. and Liu, Y. and Li, H.S.",
        TITLE = "LMDrive: Closed-Loop End-to-End Driving with Large Language Models",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15120-15130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230282"}

@inproceedings{bb235308,
        AUTHOR = "Ma, Y.S. and Cui, C. and Cao, X. and Ye, W.Q. and Liu, P.R. and Lu, J. and Abdelraouf, A. and Gupta, R. and Han, K.T. and Bera, A. and Rehg, J.M. and Wang, Z.",
        TITLE = "LaMPilot: An Open Benchmark Dataset for Autonomous Driving with
Language Model Programs",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15141-15151",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230283"}

@inproceedings{bb235309,
        AUTHOR = "Zhang, J.W. and Xu, C. and Li, B.",
        TITLE = "ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for
Autonomous Vehicles",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15459-15469",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230284"}

@inproceedings{bb235310,
        AUTHOR = "Sirnam, S. and Yang, J. and Neiman, T. and Rizve, M.N. and Tran, S. and Yao, B. and Chilimbi, T. and Shah, M.",
        TITLE = "X-former: Unifying Contrastive and Reconstruction Learning for MLLMs",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "VI: 146-162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230285"}

@inproceedings{bb235311,
        AUTHOR = "Qiao, Y.Y. and Liu, Q.Y. and Liu, J.J. and Liu, J. and Wu, Q.",
        TITLE = "LLM as Copilot for Coarse-grained Vision-and-language Navigation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "V: 459-476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230286"}

@inproceedings{bb235312,
        AUTHOR = "Zhang, J.Y. and Huang, Z.M. and Ray, A. and Ohn Bar, E.",
        TITLE = "Feedback-Guided Autonomous Driving",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15000-15011",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230287"}

@inproceedings{bb235313,
        AUTHOR = "Yang, Y. and Zhang, Q.W. and Li, C. and Marta, D.S. and Batool, N. and Folkesson, J.",
        TITLE = "Human-Centric Autonomous Systems With LLMs for User Command Reasoning",
        BOOKTITLE = LLVMCrive24,
        YEAR = "2024",
        PAGES = "988-994",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230288"}

@inproceedings{bb235314,
        AUTHOR = "Cui, C. and Ma, Y.S. and Cao, X. and Ye, W.Q. and Zhou, Y. and Liang, K. and Chen, J. and Lu, J. and Yang, Z. and Liao, K.D. and Gao, T. and Li, E. and Tang, K. and Cao, Z.P. and Zhou, T. and Liu, A. and Yan, X.R. and Mei, S.Q. and Cao, J.G. and Wang, Z. and Zheng, C.",
        TITLE = "A Survey on Multimodal Large Language Models for Autonomous Driving",
        BOOKTITLE = LLVMCrive24,
        YEAR = "2024",
        PAGES = "958-979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230289"}

@inproceedings{bb235315,
        AUTHOR = "Fu, D.C. and Li, X. and Wen, L.C. and Dou, M. and Cai, P.L. and Shi, B. and Qiao, Y.",
        TITLE = "Drive Like a Human: Rethinking Autonomous Driving with Large Language
Models",
        BOOKTITLE = LLVMCrive24,
        YEAR = "2024",
        PAGES = "910-919",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmdr5.html#TT230290"}

@article{bb235316,
        AUTHOR = "Wang, J. and Zhu, M. and Li, Y. and Li, H.L. and Yang, L.Z. and Woo, W.L.",
        TITLE = "Detect2Interact: Localizing Object Key Field in Visual Question
Answering with LLMs",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "39",
        YEAR = "2024",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "35-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230291"}

@article{bb235317,
        AUTHOR = "Hu, Z.J. and Yang, P. and Jiang, Y.S. and Bai, Z.J.",
        TITLE = "Prompting large language model with context and pre-answer for
knowledge-based VQA",
        JOURNAL = PR,
        VOLUME = "151",
        YEAR = "2024",
        PAGES = "110399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230292"}

@article{bb235318,
        AUTHOR = "Kuang, J.Y. and Shen, Y. and Xie, J. and Luo, H. and Xu, Z. and Li, R.H. and Li, Y.H. and Cheng, X.F. and Lin, X. and Han, Y.",
        TITLE = "Natural Language Understanding and Inference with MLLM in Visual
Question Answering: A Survey",
        JOURNAL = Surveys,
        VOLUME = "57",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "March",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230293"}

@article{bb235319,
        AUTHOR = "Xiong, H.M. and Zhuge, Y.Z. and Zhu, J. and Zhang, L. and Lu, H.C.",
        TITLE = "3UR-LLM: An End-to-End Multimodal Large Language Model for 3D Scene
Understanding",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "2899-2911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230294"}

@article{bb235320,
        AUTHOR = "Yu, Z. and Ouyang, X.C. and Shao, Z.W. and Wang, M. and Yu, J.",
        TITLE = "Prophet: Prompting Large Language Models With Complementary Answer
Heuristics for Knowledge-Based Visual Question Answering",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "6797-6808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230295"}

@inproceedings{bb235321,
        AUTHOR = "Shao, Z.W. and Yu, Z. and Wang, M. and Yu, J.",
        TITLE = "Prompting Large Language Models with Answer Heuristics for
Knowledge-Based Visual Question Answering",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14974-14983",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230296"}

@article{bb235322,
        AUTHOR = "Xu, Z. and Li, Q. and Nie, W.Z. and Wang, W.J. and Liu, A.",
        TITLE = "Structure Causal Models and LLMs Integration in Medical Visual
Question Answering",
        JOURNAL = MedImg,
        VOLUME = "44",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "3476-3489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230297"}

@inproceedings{bb235323,
        AUTHOR = "Fang, W.L. and Wu, Q. and Chen, J. and Xue, Y.",
        TITLE = "Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual
Notes for Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19597-19607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230298"}

@inproceedings{bb235324,
        AUTHOR = "Huai, T.Y. and Zhou, J. and Wu, X.J. and Chen, Q. and Bai, Q.C. and Zhou, Z. and He, L.",
        TITLE = "CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum
Mixture-of-Experts for Continual Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19608-19617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230299"}

@inproceedings{bb235325,
        AUTHOR = "Zhi, H.Y. and Chen, P.H. and Li, J. and Ma, S. and Sun, X.Y. and Xiang, T.H. and Lei, Y.J. and Tan, M.K. and Gan, C.",
        TITLE = "LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive
Visual Preferences",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3761-3771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230300"}

@inproceedings{bb235326,
        AUTHOR = "Cocchi, F. and Moratelli, N. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Augmenting Multimodal LLMs with Self-Reflective Tokens for
Knowledge-based Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9199-9209",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230301"}

@inproceedings{bb235327,
        AUTHOR = "Yang, Z. and Tao, Z. and Chen, Q. and Li, L. and Qi, Y.K. and van den Hengel, A.J. and Huang, Q.M.",
        TITLE = "Separation of powers: On segregating knowledge from observation in
LLM-enabled knowledge-based visual question answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24753-24762",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230302"}

@inproceedings{bb235328,
        AUTHOR = "Cai, M. and Huang, Z.Y. and Li, Y.H. and Ojha, U. and Wang, H.H. and Lee, Y.J.",
        TITLE = "An Investigation on LLMs' Visual Understanding Ability Using SVG for
Image-Text Bridging",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5377-5386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230303"}

@inproceedings{bb235329,
        AUTHOR = "Amoroso, R. and Zhang, G. and Koner, R. and Baraldi, L. and Cucchiara, R. and Tresp, V.",
        TITLE = "Perceive. Query & Reason: Enhancing Video QA with Question-Guided
Temporal Queries",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8853-8862",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230304"}

@inproceedings{bb235330,
        AUTHOR = "Weng, W.X. and Zhang, R. and Meng, X.J. and Zhu, J. and Liu, Q. and Yuan, C.",
        TITLE = "Unsupervised Domain Adaptive Visual Question Answering in the Era of
Multi-Modal Large Language Models",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "6248-6258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230305"}

@inproceedings{bb235331,
        AUTHOR = "Sun, G.H. and Qin, C. and Wang, J.M. and Chen, Z.Y. and Xu, R. and Tao, Z.Q.",
        TITLE = "SQ-LLAVA: Self-questioning for Large Vision-language Assistant",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "IX: 156-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230306"}

@inproceedings{bb235332,
        AUTHOR = "Ye, Q. and Yu, Z.T. and Shao, R. and Xie, X.Y. and Torr, P.H.S. and Cao, X.C.",
        TITLE = "CAT: Enhancing Multimodal Large Language Model to Answer Questions in
Dynamic Audio-visual Scenarios",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "X: 146-164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230307"}

@inproceedings{bb235333,
        AUTHOR = "Hu, Y.T. and Li, T. and Lu, Q. and Shao, W.Q. and He, J.J. and Qiao, Y. and Luo, P.",
        TITLE = "OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for
Medical LVLM",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "22170-22183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230308"}

@inproceedings{bb235334,
        AUTHOR = "Li, Z. and Jasani, B. and Tang, P. and Ghadar, S.",
        TITLE = "Synthesize Step-by-Step: Tools, Templates and LLMs as Data Generators
for Reasoning-Based Chart VQA",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13613-13623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230309"}

@inproceedings{bb235335,
        AUTHOR = "Ozdemir, O. and Akagunduz, E.",
        TITLE = "Enhancing Visual Question Answering through Question-Driven Image
Captions as Prompts",
        BOOKTITLE = Prompting24,
        YEAR = "2024",
        PAGES = "1562-1571",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230310"}

@inproceedings{bb235336,
        AUTHOR = "Ranasinghe, K. and Shukla, S.N. and Poursaeed, O. and Ryoo, M.S. and Lin, T.Y.",
        TITLE = "Learning to Localize Objects Improves Spatial Reasoning in
Visual-LLMs",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12977-12987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230311"}

@inproceedings{bb235337,
        AUTHOR = "Blau, T. and Fogel, S. and Ronen, R. and Golts, A. and Tsiper, S. and Avraham, E.B. and Aberdam, A. and Ganz, R. and Litman, R.",
        TITLE = "GRAM: Global Reasoning for Multi-Page VQA",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15598-15607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230312"}

@inproceedings{bb235338,
        AUTHOR = "Li, L. and Peng, J.W. and Chen, H. and Gao, C.Y. and Yang, X.",
        TITLE = "How to Configure Good In-Context Sequence for Visual Question
Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26700-26710",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230313"}

@inproceedings{bb235339,
        AUTHOR = "Agrawal, A. and Lezcano, C.M.S. and Heredia Marin, I.B. and Sethi, P.S.",
        TITLE = "Listen Then See: Video Alignment with Speaker Attention",
        BOOKTITLE = MULA24,
        YEAR = "2024",
        PAGES = "2018-2027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230314"}

@inproceedings{bb235340,
        AUTHOR = "Tan, R. and Sun, X. and Hu, P. and Wang, J.H. and Deilamsalehy, H. and Plummer, B.A. and Russell, B. and Saenko, K.",
        TITLE = "Koala: Key Frame-Conditioned Long Video-LLM",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13581-13591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230315"}

@inproceedings{bb235341,
        AUTHOR = "Ganz, R. and Kittenplon, Y. and Aberdam, A. and Avraham, E.B. and Nuriel, O. and Mazor, S. and Litman, R.",
        TITLE = "Question Aware Vision Transformer for Multimodal Reasoning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13861-13871",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230316"}

@inproceedings{bb235342,
        AUTHOR = "Bansal, H. and Bitton, Y. and Szpektor, I. and Chang, K.W. and Grover, A.",
        TITLE = "VideoCon: Robust Video-Language Alignment via Contrast Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13927-13937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230317"}

@inproceedings{bb235343,
        AUTHOR = "Wang, S.W. and Zhang, L.L. and Zhu, L.J. and Qin, T. and Yap, K.H. and Zhang, X.Y. and Liu, J.",
        TITLE = "CoG-DQA: Chain-of-Guiding Learning with Large Language Models for
Diagram Question Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13969-13979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230318"}

@inproceedings{bb235344,
        AUTHOR = "Khan, Z. and BG, V.K. and Schulter, S. and Fu, Y. and Chandraker, M.",
        TITLE = "Self-Training Large Language Models for Improved Visual Program
Synthesis With Visual Reinforcement",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14344-14353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230319"}

@inproceedings{bb235345,
        AUTHOR = "Liao, Z. and Li, J.T. and Niu, L. and Zhang, L.Q.",
        TITLE = "Align and Aggregate: Compositional Reasoning with Video Alignment and
Answer Aggregation for Video Question-Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13395-13404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230320"}

@inproceedings{bb235346,
        AUTHOR = "Pan, J.T. and Lin, Z. and Ge, Y.Y. and Zhu, X.T. and Zhang, R.R. and Wang, Y. and Qiao, Y. and Li, H.S.",
        TITLE = "Retrieving-to-Answer: Zero-Shot Video Question Answering with Frozen
Large Language Models",
        BOOKTITLE = MMFM23,
        YEAR = "2023",
        PAGES = "272-283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230321"}

@inproceedings{bb235347,
        AUTHOR = "Guo, J.X. and Li, J. and Li, D.X. and Tiong, A.M.H. and Li, B.Y. and Tao, D.C. and Hoi, S.",
        TITLE = "From Images to Textual Prompts: Zero-shot Visual Question Answering
with Frozen Large Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10867-10877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT230322"}

@article{bb235348,
        AUTHOR = "Zhou, N. and Fan, J.P.",
        TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
        JOURNAL = PR,
        VOLUME = "48",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "205-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230323"}

@article{bb235349,
        AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.",
        TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text
Matching",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2008-2020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230324"}

@article{bb235350,
        AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.",
        TITLE = "Characterization and classification of semantic image-text relations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "March",
        PAGES = "31-45",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230325"}

@article{bb235351,
        AUTHOR = "Niu, K. and Huang, Y. and Wang, L.",
        TITLE = "Re-ranking image-text matching by adaptive metric fusion",
        JOURNAL = PR,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "107351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230326"}

@article{bb235352,
        AUTHOR = "Huang, Y. and Wang, Y.M. and Zeng, Y. and Huang, J.S. and Chai, Z.H. and Wang, L.",
        TITLE = "Unpaired Image-Text Matching via Multimodal Aligned Conceptual
Knowledge",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5160-5176",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230327"}

@article{bb235353,
        AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.",
        TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2866-2879",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230328"}

@article{bb235354,
        AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X.Y. and Liu, A.A.",
        TITLE = "Dual-Level Representation Enhancement on Characteristic and Context
for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "8037-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230329"}

@article{bb235355,
        AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.",
        TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive
Relational Network",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1840-1852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230330"}

@inproceedings{bb235356,
        AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.",
        TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1556-1564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230331"}

@article{bb235357,
        AUTHOR = "Lan, H. and Zhang, P.",
        TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text
Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "374-378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230332"}

@article{bb235358,
        AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.",
        TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "388-397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230333"}

@article{bb235359,
        AUTHOR = "Malali, N. and Keller, Y.",
        TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10252-10260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230334"}

@article{bb235360,
        AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "471-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230335"}

@article{bb235361,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic
Reasoning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "641-656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230336"}

@inproceedings{bb235362,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Visual Semantic Reasoning for Image-Text Matching",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4653-4661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230337"}

@article{bb235363,
        AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.",
        TITLE = "Plug-and-Play Regulators for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "2322-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230338"}

@article{bb235364,
        AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.",
        TITLE = "Bi-Attention enhanced representation learning for image-text matching",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230339"}

@article{bb235365,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.",
        TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for
Image-Text Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1320-1332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230340"}

@article{bb235366,
        AUTHOR = "Xiong, G.X. and Meng, M. and Zhang, T.Z. and Zhang, D.M. and Zhang, Y.D.",
        TITLE = "Reference-Aware Adaptive Network for Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9678-9691",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230341"}

@article{bb235367,
        AUTHOR = "Liu, Z.J. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.",
        TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2465-2476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230342"}

@article{bb235368,
        AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.",
        TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for
Image-Text Matching",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "38",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "41-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230343"}

@article{bb235369,
        AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.",
        TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent
Multimodal Contrastive Training",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "3622-3633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230344"}

@article{bb235370,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.",
        TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3516-3528",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230345"}

@article{bb235371,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.",
        TITLE = "The Style Transformer With Common Knowledge Optimization for
Image-Text Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1197-1201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230346"}

@article{bb235372,
        AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.",
        TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6131-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230347"}

@article{bb235373,
        AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.",
        TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting
False Negatives",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "103-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230348"}

@article{bb235374,
        AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.",
        TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for
Image-Text Retrieval",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "595-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230349"}

@article{bb235375,
        AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.",
        TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised
Fine-Grained Alignment",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1361-1372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230350"}

@article{bb235376,
        AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
        TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained
Image-Text Matching",
        JOURNAL = Cyber,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "948-961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230351"}

@article{bb235377,
        AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X.Y. and Liu, A.A.",
        TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1867-1880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230352"}

@article{bb235378,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y.X. and Guo, L.",
        TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3827-3841",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230353"}

@article{bb235379,
        AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.",
        TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on
Domain-Selective Attention",
        JOURNAL = PR,
        VOLUME = "149",
        YEAR = "2024",
        PAGES = "110273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230354"}

@article{bb235380,
        AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.",
        TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic
Consistency of Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4912-4925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230355"}

@article{bb235381,
        AUTHOR = "Zhang, K. and Hu, B. and Zhang, H. and Li, Z. and Mao, Z.D.",
        TITLE = "Enhanced Semantic Similarity Learning Framework for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2973-2988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230356"}

@inproceedings{bb235382,
        AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
        TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15159-15168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230357"}

@article{bb235383,
        AUTHOR = "Diao, H. and Zhang, Y. and Gao, S. and Ruan, X. and Lu, H.C.",
        TITLE = "Deep Boosting Learning:
A Brand-New Cooperative Approach for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "3341-3352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230358"}

@inproceedings{bb235384,
        AUTHOR = "Zhang, Y. and Lu, H.C.",
        TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 707-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230359"}

@article{bb235385,
        AUTHOR = "Cao, M. and Bai, Y. and Cao, Z.Q. and Nie, L.Q. and Zhang, M.",
        TITLE = "Efficient Image-Text Retrieval via Keyword-Guided Pre-Screening",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5132-5145",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230360"}

@article{bb235386,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Improving Image-Text Matching With Bidirectional Consistency of
Cross-Modal Alignment",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6590-6607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230361"}

@article{bb235387,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Fast, Accurate, and Lightweight Memory-Enhanced Embedding Learning
Framework for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6542-6558",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230362"}

@article{bb235388,
        AUTHOR = "Cui, Z. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
        TITLE = "Context-aware relation enhancement and similarity reasoning for
image-text retrieval",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "5",
        PAGES = "652-665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230363"}

@article{bb235389,
        AUTHOR = "Pan, Z.X. and Mao, Y.C. and Xiong, L. and Pang, T.F. and Ping, P.",
        TITLE = "MFAE: Multimodal Fusion and Alignment for Entity-level Disinformation
Detection",
        JOURNAL = PRL,
        VOLUME = "184",
        YEAR = "2024",
        PAGES = "59-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230364"}

@article{bb235390,
        AUTHOR = "Pu, X. and Wang, Z.W. and Yuan, L. and Wu, Y. and Jing, L.P. and Gao, X.B.",
        TITLE = "GADNet: Improving image-text matching via graph-based aggregation and
disentanglement",
        JOURNAL = PR,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "110900",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230365"}

@article{bb235391,
        AUTHOR = "Pu, X. and Yang, P. and Yuan, L. and Gao, X.B.",
        TITLE = "Improving Image-Text Matching by Integrating Word Sense
Disambiguation",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2695-2699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230366"}

@article{bb235392,
        AUTHOR = "Li, W.R. and Xiong, R.Q. and Fan, X.P.",
        TITLE = "Multi-Layer Probabilistic Association Reasoning Network for
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9706-9717",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230367"}

@article{bb235393,
        AUTHOR = "Tian, M.X. and Yang, S. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Source-Free Image-Text Matching via Uncertainty-Aware Learning",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "3059-3063",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230368"}

@article{bb235394,
        AUTHOR = "Wang, D. and Tian, J. and Liang, X. and Tian, Y.M. and He, L.H.",
        TITLE = "Global-aware Fragment Representation Aggregation Network for
image-text retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230369"}

@article{bb235395,
        AUTHOR = "Ke, X. and Chen, B.T. and Yang, X. and Cai, Y.H. and Liu, H. and Guo, W.Z.",
        TITLE = "Cross-modal independent matching network for image-text retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230370"}

@article{bb235396,
        AUTHOR = "Li, Z. and Guo, C. and Wang, X. and Feng, Z. and Du, Z.T.",
        TITLE = "Selectively Hard Negative Mining for Alleviating Gradient Vanishing
in Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1921-1935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230371"}

@article{bb235397,
        AUTHOR = "Yang, X.Y. and Li, C. and Wang, Z.M. and Xie, H. and Mao, J. and Yin, G.Q.",
        TITLE = "Remote Sensing Cross-Modal Text-Image Retrieval Based on Attention
Correction and Filtering",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "3",
        PAGES = "503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230372"}

@article{bb235398,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Guo, L. and Liu, H.",
        TITLE = "Dual Stream Relation Learning Network for Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1551-1565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230373"}

@article{bb235399,
        AUTHOR = "Liu, Z. and Xu, J.H. and Gao, S.S. and Chen, Z.",
        TITLE = "CSA: Cross-scale alignment with adaptive semantic aggregation and
filter for image-text retrieval",
        JOURNAL = PR,
        VOLUME = "165",
        YEAR = "2025",
        PAGES = "111647",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT230374"}

Last update:Sep 27, 2025 at 16:28:57