@article{bb241000,
AUTHOR = "Li, H.D. and Zhang, X.F. and Qu, H.",
TITLE = "DDFAV: Remote Sensing Large Vision Language Models Dataset and
Evaluation Benchmark",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "4",
PAGES = "719",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235917"}
@article{bb241001,
AUTHOR = "Xu, P. and Shao, W.Q. and Zhang, K.P. and Gao, P. and Liu, S. and Lei, M. and Meng, F.Q. and Huang, S.Y. and Qiao, Y. and Luo, P.",
TITLE = "LVLM-EHub: A Comprehensive Evaluation Benchmark for Large
Vision-Language Models",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "1877-1893",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235918"}
@inproceedings{bb241002,
AUTHOR = "Wang, J. and Lv, C.H. and Li, X. and Dong, S.C. and Li, H.D. and Yao, K. and Li, C. and Shao, W.Q. and Luo, P.",
TITLE = "Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite
for Large Vision Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "4233-4245",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235919"}
@article{bb241003,
AUTHOR = "Qin, Z. and Chen, D.Y. and Zhang, W.H. and Yao, L. and Huang, Y.L. and Ding, B.L. and Li, Y.L. and Deng, S.G.",
TITLE = "The Synergy Between Data and Multi-Modal Large Language Models:
A Survey From Co-Development Perspective",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "10",
MONTH = "October",
PAGES = "8415-8434",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235920"}
@inproceedings{bb241004,
AUTHOR = "Zhang, Y.H. and Su, Y.C. and Liu, Y.M. and Wang, X.H. and Burgess, J. and Sui, E. and Wang, C.Y. and Aklilu, J. and Lozano, A. and Wei, A. and Schmidt, L. and Yeung Levy, S.",
TITLE = "Automated Generation of Challenging Multiple-Choice Questions for
Vision Language Model Evaluation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29580-29590",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235921"}
@inproceedings{bb241005,
AUTHOR = "Jiang, X. and Zheng, J.W. and Liu, R.P. and Li, J.H. and Zhang, J.M. and Matthiesen, S. and Stiefelhagen, R.",
TITLE = "@BENCH: Benchmarking Vision-Language Models for Human-centered
Assistive Technology",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "3934-3943",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235922"}
@inproceedings{bb241006,
AUTHOR = "Xiong, T.Y. and Wang, X. and Guo, D. and Ye, Q.H. and Fan, H.Q. and Gu, Q.Q. and Huang, H. and Li, C.Y.",
TITLE = "LLLaVA-Critic: Learning to Evaluate Multimodal Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13618-13628",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235923"}
@inproceedings{bb241007,
AUTHOR = "Zhang, Q.H. and Ning, M. and Liu, Z. and Huang, Y. and Yang, S. and Wang, Y.B. and Ye, J.Y. and Chen, X. and Song, Y.B. and Yuan, L.",
TITLE = "UPME: An Unsupervised Peer Review Framework for Multimodal Large
Language Model Evaluation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9165-9174",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235924"}
@inproceedings{bb241008,
AUTHOR = "Fu, C. and Dai, Y.H. and Luo, Y.D. and Li, L. and Ren, S. and Zhang, R.R. and Wang, Z. and Zhou, C.Y. and Shen, Y.H. and Zhang, M.D. and Chen, P.X. and Li, Y.W. and Lin, S.H. and Zhao, S. and Li, K. and Xu, T. and Zheng, X. and Chen, E. and Shan, C.F. and He, R. and Sun, X.",
TITLE = "Video-MME: The First-Ever Comprehensive Evaluation Benchmark of
Multi-modal LLMs in Video Analysis",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24108-24118",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235925"}
@inproceedings{bb241009,
AUTHOR = "Zhang, J.Y. and Yang, H. and Li, A. and Guo, X. and Wang, P. and Wang, H.M. and Chen, Y.R. and Li, H.",
TITLE = "MLLM-LLaVA-FL: Multimodal Large Language Model Assisted Federated
Learning",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "4066-4076",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235926"}
@inproceedings{bb241010,
AUTHOR = "Snæbjarnarson, V. and Du, K. and Stoehr, N. and Belongie, S. and Cotterell, R. and Lang, N. and Frank, S.",
TITLE = "Taxonomy-Aware Evaluation of Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9109-9120",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235927"}
@inproceedings{bb241011,
AUTHOR = "Tu, H.Q. and Cui, C. and Wang, Z.J. and Zhou, Y.Y. and Zhao, B.C. and Han, J.L. and Zhou, W.C.S. and Yao, H.X. and Xie, C.",
TITLE = "How Many Are in This Image A Safety Evaluation Benchmark for Vision
LLMs",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LI: 37-55",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235928"}
@inproceedings{bb241012,
AUTHOR = "Liu, X. and Zhu, Y.C. and Gu, J.D. and Lan, Y. and Yang, C. and Qiao, Y.",
TITLE = "MM-Safetybench: A Benchmark for Safety Evaluation of Multimodal Large
Language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVI: 386-403",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235929"}
@inproceedings{bb241013,
AUTHOR = "Roberts, J. and Luddecke, T. and Sheikh, R. and Han, K. and Albanie, S.",
TITLE = "Charting New Territories: Exploring the Geographic and Geospatial
Capabilities of Multimodal LLMs",
BOOKTITLE = EarthVision24,
YEAR = "2024",
PAGES = "554-563",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235930"}
@inproceedings{bb241014,
AUTHOR = "Verma, A.A. and Saeidi, A. and Hegde, S. and Therala, A. and Bardoliya, F.D. and Machavarapu, N. and Ravindhiran, S.A.K. and Malyala, S. and Chatterjee, A. and Yang, Y.Z. and Baral, C.",
TITLE = "Evaluating Multimodal Large Language Models across Distribution
Shifts and Augmentations",
BOOKTITLE = GenerativeFM24,
YEAR = "2024",
PAGES = "5314-5324",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235931"}
@inproceedings{bb241015,
AUTHOR = "Hu, Y.T. and Li, T. and Lu, Q. and Shao, W.Q. and He, J.J. and Qiao, Y. and Luo, P.",
TITLE = "OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for
Medical LVLM",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "22170-22183",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235932"}
@article{bb241016,
AUTHOR = "Zhou, N. and Fan, J.P.",
TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
JOURNAL = PR,
VOLUME = "48",
YEAR = "2015",
NUMBER = "1",
PAGES = "205-219",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235933"}
@article{bb241017,
AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.",
TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text
Matching",
JOURNAL = IP,
VOLUME = "28",
YEAR = "2019",
NUMBER = "4",
MONTH = "April",
PAGES = "2008-2020",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235934"}
@article{bb241018,
AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.",
TITLE = "Characterization and classification of semantic image-text relations",
JOURNAL = MultInfoRetr,
VOLUME = "9",
YEAR = "2020",
NUMBER = "1",
MONTH = "March",
PAGES = "31-45",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235935"}
@article{bb241019,
AUTHOR = "Niu, K. and Huang, Y. and Wang, L.",
TITLE = "Re-ranking image-text matching by adaptive metric fusion",
JOURNAL = PR,
VOLUME = "104",
YEAR = "2020",
PAGES = "107351",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235936"}
@article{bb241020,
AUTHOR = "Huang, Y. and Wang, Y.M. and Zeng, Y. and Huang, J.S. and Chai, Z.H. and Wang, L.",
TITLE = "Unpaired Image-Text Matching via Multimodal Aligned Conceptual
Knowledge",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "5160-5176",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235937"}
@article{bb241021,
AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.",
TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "31",
YEAR = "2021",
NUMBER = "7",
MONTH = "July",
PAGES = "2866-2879",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235938"}
@article{bb241022,
AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X.Y. and Liu, A.A.",
TITLE = "Dual-Level Representation Enhancement on Characteristic and Context
for Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "11",
MONTH = "November",
PAGES = "8037-8050",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235939"}
@article{bb241023,
AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.",
TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive
Relational Network",
JOURNAL = IP,
VOLUME = "30",
YEAR = "2021",
PAGES = "1840-1852",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235940"}
@inproceedings{bb241024,
AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.",
TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval",
BOOKTITLE = CVPR15,
YEAR = "2015",
PAGES = "1556-1564",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235941"}
@article{bb241025,
AUTHOR = "Lan, H. and Zhang, P.",
TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text
Retrieval",
JOURNAL = SPLetters,
VOLUME = "29",
YEAR = "2022",
PAGES = "374-378",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235942"}
@article{bb241026,
AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.",
TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "1",
MONTH = "January",
PAGES = "388-397",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235943"}
@article{bb241027,
AUTHOR = "Malali, N. and Keller, Y.",
TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "10252-10260",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235944"}
@article{bb241028,
AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.",
TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "471-482",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235945"}
@article{bb241029,
AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic
Reasoning",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "1",
MONTH = "January",
PAGES = "641-656",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235946"}
@inproceedings{bb241030,
AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
TITLE = "Visual Semantic Reasoning for Image-Text Matching",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4653-4661",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235947"}
@article{bb241031,
AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.",
TITLE = "Plug-and-Play Regulators for Image-Text Matching",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "2322-2334",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235948"}
@article{bb241032,
AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.",
TITLE = "Bi-Attention enhanced representation learning for image-text matching",
JOURNAL = PR,
VOLUME = "140",
YEAR = "2023",
PAGES = "109548",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235949"}
@article{bb241033,
AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.",
TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for
Image-Text Matching",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "1320-1332",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235950"}
@article{bb241034,
AUTHOR = "Xiong, G.X. and Meng, M. and Zhang, T.Z. and Zhang, D.M. and Zhang, Y.D.",
TITLE = "Reference-Aware Adaptive Network for Image-Text Matching",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "9678-9691",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235951"}
@article{bb241035,
AUTHOR = "Liu, Z.J. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.",
TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "5",
MONTH = "May",
PAGES = "2465-2476",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235952"}
@article{bb241036,
AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.",
TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for
Image-Text Matching",
JOURNAL = IEEE_Int_Sys,
VOLUME = "38",
YEAR = "2023",
NUMBER = "3",
MONTH = "May",
PAGES = "41-50",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235953"}
@article{bb241037,
AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H.S. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.",
TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent
Multimodal Contrastive Training",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "3622-3633",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235954"}
@article{bb241038,
AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.",
TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust
Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "7",
MONTH = "July",
PAGES = "3516-3528",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235955"}
@article{bb241039,
AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.",
TITLE = "The Style Transformer With Common Knowledge Optimization for
Image-Text Retrieval",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "1197-1201",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235956"}
@article{bb241040,
AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.",
TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "6131-6143",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235957"}
@article{bb241041,
AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.",
TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting
False Negatives",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "103-116",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235958"}
@article{bb241042,
AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.",
TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for
Image-Text Retrieval",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "595-609",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235959"}
@article{bb241043,
AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.",
TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised
Fine-Grained Alignment",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "1361-1372",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235960"}
@article{bb241044,
AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained
Image-Text Matching",
JOURNAL = Cyber,
VOLUME = "54",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "948-961",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235961"}
@article{bb241045,
AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X.Y. and Liu, A.A.",
TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "1867-1880",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235962"}
@article{bb241046,
AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y.X. and Guo, L.",
TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3827-3841",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235963"}
@article{bb241047,
AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.",
TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on
Domain-Selective Attention",
JOURNAL = PR,
VOLUME = "149",
YEAR = "2024",
PAGES = "110273",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235964"}
@article{bb241048,
AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.",
TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic
Consistency of Image-Text Retrieval",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4912-4925",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235965"}
@article{bb241049,
AUTHOR = "Zhang, K. and Hu, B. and Zhang, H. and Li, Z. and Mao, Z.D.",
TITLE = "Enhanced Semantic Similarity Learning Framework for Image-Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "4",
MONTH = "April",
PAGES = "2973-2988",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235966"}
@inproceedings{bb241050,
AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "15159-15168",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235967"}
@article{bb241051,
AUTHOR = "Diao, H. and Zhang, Y. and Gao, S. and Ruan, X. and Lu, H.C.",
TITLE = "Deep Boosting Learning:
A Brand-New Cooperative Approach for Image-Text Matching",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "3341-3352",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235968"}
@inproceedings{bb241052,
AUTHOR = "Zhang, Y. and Lu, H.C.",
TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "I: 707-723",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235969"}
@article{bb241053,
AUTHOR = "Cao, M. and Bai, Y. and Cao, Z.Q. and Nie, L.Q. and Zhang, M.",
TITLE = "Efficient Image-Text Retrieval via Keyword-Guided Pre-Screening",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "6",
MONTH = "June",
PAGES = "5132-5145",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235970"}
@article{bb241054,
AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
TITLE = "Improving Image-Text Matching With Bidirectional Consistency of
Cross-Modal Alignment",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "7",
MONTH = "July",
PAGES = "6590-6607",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235971"}
@article{bb241055,
AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
TITLE = "Fast, Accurate, and Lightweight Memory-Enhanced Embedding Learning
Framework for Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "7",
MONTH = "July",
PAGES = "6542-6558",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235972"}
@article{bb241056,
AUTHOR = "Cui, Z. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
TITLE = "Context-aware relation enhancement and similarity reasoning for
image-text retrieval",
JOURNAL = IET-CV,
VOLUME = "18",
YEAR = "2024",
NUMBER = "5",
PAGES = "652-665",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235973"}
@article{bb241057,
AUTHOR = "Pan, Z.X. and Mao, Y.C. and Xiong, L. and Pang, T.F. and Ping, P.",
TITLE = "MFAE: Multimodal Fusion and Alignment for Entity-level Disinformation
Detection",
JOURNAL = PRL,
VOLUME = "184",
YEAR = "2024",
PAGES = "59-65",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235974"}
@article{bb241058,
AUTHOR = "Pu, X. and Wang, Z.W. and Yuan, L. and Wu, Y. and Jing, L.P. and Gao, X.B.",
TITLE = "GADNet: Improving image-text matching via graph-based aggregation and
disentanglement",
JOURNAL = PR,
VOLUME = "157",
YEAR = "2025",
PAGES = "110900",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235975"}
@article{bb241059,
AUTHOR = "Pu, X. and Yang, P. and Yuan, L. and Gao, X.B.",
TITLE = "Improving Image-Text Matching by Integrating Word Sense
Disambiguation",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "2695-2699",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235976"}
@article{bb241060,
AUTHOR = "Li, W.R. and Xiong, R.Q. and Fan, X.P.",
TITLE = "Multi-Layer Probabilistic Association Reasoning Network for
Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "9706-9717",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235977"}
@article{bb241061,
AUTHOR = "Tian, M.X. and Yang, S. and Wu, X.X. and Jia, Y.D.",
TITLE = "Source-Free Image-Text Matching via Uncertainty-Aware Learning",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "3059-3063",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235978"}
@article{bb241062,
AUTHOR = "Wang, D. and Tian, J. and Liang, X. and Tian, Y.M. and He, L.H.",
TITLE = "Global-aware Fragment Representation Aggregation Network for
image-text retrieval",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111085",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235979"}
@article{bb241063,
AUTHOR = "Ke, X. and Chen, B.T. and Yang, X. and Cai, Y.H. and Liu, H. and Guo, W.Z.",
TITLE = "Cross-modal independent matching network for image-text retrieval",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111096",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235980"}
@article{bb241064,
AUTHOR = "Li, Z. and Guo, C. and Wang, X. and Feng, Z. and Du, Z.T.",
TITLE = "Selectively Hard Negative Mining for Alleviating Gradient Vanishing
in Image-Text Matching",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "2",
MONTH = "February",
PAGES = "1921-1935",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235981"}
@article{bb241065,
AUTHOR = "Yang, X.Y. and Li, C. and Wang, Z.M. and Xie, H. and Mao, J. and Yin, G.Q.",
TITLE = "Remote Sensing Cross-Modal Text-Image Retrieval Based on Attention
Correction and Filtering",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "3",
PAGES = "503",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235982"}
@article{bb241066,
AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Guo, L. and Liu, H.",
TITLE = "Dual Stream Relation Learning Network for Image-Text Retrieval",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "1551-1565",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235983"}
@article{bb241067,
AUTHOR = "Liu, Z. and Xu, J.H. and Gao, S.S. and Chen, Z.",
TITLE = "CSA: Cross-scale alignment with adaptive semantic aggregation and
filter for image-text retrieval",
JOURNAL = PR,
VOLUME = "165",
YEAR = "2025",
PAGES = "111647",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235984"}
@article{bb241068,
AUTHOR = "Zhang, Q. and Wang, D. and Yu, X.",
TITLE = "RLita: A Region-Level Image-Text Alignment Method for Remote Sensing
Foundation Model",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "10",
PAGES = "1661",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235985"}
@article{bb241069,
AUTHOR = "Wen, Z.T. and Liu, J.H. and Zhang, H. and Zuo, F.Y.",
TITLE = "Exploring Fine-Grained Visual-Text Feature Alignment With Prompt
Tuning for Domain-Adaptive Object Detection",
JOURNAL = Cyber,
VOLUME = "55",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "3220-3233",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235986"}
@article{bb241070,
AUTHOR = "Guan, J.H. and Shu, Y. and Li, W. and Song, Z. and Zhang, Y.C.",
TITLE = "PR-CLIP: Cross-Modal Positional Reconstruction for Remote Sensing
Image-Text Retrieval",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "13",
PAGES = "2117",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235987"}
@article{bb241071,
AUTHOR = "Zhao, H. and Li, Z.Y. and Wang, S. and Zhang, Z.X. and Li, K.Q.",
TITLE = "Robust Hashing With Bilinear Drift for Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "7642-7654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235988"}
@article{bb241072,
AUTHOR = "Wang, P.Z. and Zhang, L. and Mao, Z.D. and Lyu, N. and Zhang, Y.D.",
TITLE = "Matryoshka Learning With Metric Transfer for Image-Text Matching",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "9",
MONTH = "September",
PAGES = "9502-9516",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235989"}
@article{bb241073,
AUTHOR = "Koo, H. and Shin, J. and Kim, E.",
TITLE = "Dual-branch scale disentanglement for text-video retrieval",
JOURNAL = PRL,
VOLUME = "196",
YEAR = "2025",
PAGES = "296-302",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235990"}
@article{bb241074,
AUTHOR = "Zhou, J. and Wang, M.",
TITLE = "Unified learning for image-text alignment via multi-scale feature
fusion",
JOURNAL = CVIU,
VOLUME = "260",
YEAR = "2025",
PAGES = "104468",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235991"}
@article{bb241075,
AUTHOR = "Wen, J. and Chen, Y.F. and Shi, R.Q. and Ji, W. and Yang, M.L. and Gao, D.F. and Yuan, J.S. and Zimmermann, R.",
TITLE = "HOVER: Hyperbolic Video-Text Retrieval",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "6192-6203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235992"}
@article{bb241076,
AUTHOR = "Fang, J.Y. and Zhu, B. and Yuan, J.L. and Chen, Y.Y. and Tang, M. and Wang, J.Q.",
TITLE = "AMITA: Attribute-Guided Masked Image-Text Alignment for Multi-Label
Image Representation",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11432-11447",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235993"}
@article{bb241077,
AUTHOR = "Ji, L.L. and Liu, L.",
TITLE = "Multi-Scale Feature Fusion Based on Piecewise Polynomial Activation
Function for Image-Text Matching",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11627-11640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235994"}
@article{bb241078,
AUTHOR = "Chen, R. and Su, T. and Wang, H. and Ni, Z.K.",
TITLE = "Similarity Shuffled Criss-Cross Transformer With Angle Loss for
Image-Text Matching",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "9723-9734",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235995"}
@article{bb241079,
AUTHOR = "Chen, D. and Wang, Y.T. and Xie, Y.Z. and Chen, S.Y. and Peng, W.L. and Tang, M. and Fang, M. and Chen, C.L.P. and Li, P. and Zhang, W.",
TITLE = "Intra-modal consistency for image-text retrieval through soft-label
distillation",
JOURNAL = PR,
VOLUME = "173",
YEAR = "2026",
PAGES = "112817",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235996"}
@article{bb241080,
AUTHOR = "Shi, Z.X. and Ding, Y. and Dong, J.Y. and Zhang, T.Z.",
TITLE = "Beyond One and Two Tower: Cross-Modal Consensus Learning for
Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "2581-2593",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235997"}
@article{bb241081,
AUTHOR = "Wang, H.C. and Liu, L. and Zhang, H.X. and Zhu, L. and Chang, X.J. and Du, H.",
TITLE = "VisualRAG: Knowledge-Guided Retrieval Augmentation for Image-Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "1234-1248",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235998"}
@article{bb241082,
AUTHOR = "Zhang, D.L. and Wang, Z.W. and Wu, X.J. and Kittler, J.V.",
TITLE = "HACG: Leveraging Hierarchical Alignment and Caption Generation for
Text-Video Retrieval",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "93",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235999"}
@inproceedings{bb241083,
AUTHOR = "Vongala, M.R. and Srivastava, S. and Kosecka, J.",
TITLE = "Compositional Image-Text Matching and Retrieval by Grounding Entities",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "241-250",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236000"}
@inproceedings{bb241084,
AUTHOR = "Zhang, Z.C. and Li, X.Y. and Sun, W. and Zhang, Z.C. and Li, Y.H. and Liu, X.H. and Zhai, G.T.",
TITLE = "Leveraging Multimodal Large Language Models for Joint Discrete and
Continuous Evaluation in Text-to-Image Alignment",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "968-977",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236001"}
@inproceedings{bb241085,
AUTHOR = "Zhang, Z.J. and Zheng, X.H. and Wu, X.C. and Peng, C. and Cao, X.Z.",
TITLE = "Tokenfocus-VQA: Enhancing Text-to-Image Alignment with Position-Aware
Focus and Multi-Perspective Aggregations on LVLMs",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "1270-1279",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236002"}
@inproceedings{bb241086,
AUTHOR = "Yue, X. and Sun, J. and Lu, J. and Yao, L.C. and XIA, F. and Wang, T.Y. and Rao, F.Y. and LYU, J. and Deng, Y.",
TITLE = "Instruction-Augmented Multimodal Alignment for Image-Text and Element
Matching",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "1370-1379",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236003"}
@inproceedings{bb241087,
AUTHOR = "Lai, H. and Xiong, G.X. and Mai, H.Y. and Liu, X. and Zhang, T.Z.",
TITLE = "Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9231-9241",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236004"}
@inproceedings{bb241088,
AUTHOR = "Kim, D. and Piergiovanni, A. and Mallya, G. and Angelova, A.",
TITLE = "VideoComp: Advancing Fine-Grained Compositional and Temporal
Alignment in Video-Text Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29060-29070",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236005"}
@inproceedings{bb241089,
AUTHOR = "Shen, L. and Gong, G.Q. and Hao, T.X. and He, T. and Zhang, Y.F. and Liu, P.Z. and Zhao, S.C. and Han, J.G. and Ding, G.",
TITLE = "DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment
for Parameter-Efficient Video-Text Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19702-19712",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236006"}
@inproceedings{bb241090,
AUTHOR = "Jin, Z.X. and Xu, X.W. and Wang, X.D.",
TITLE = "MADA:Multi-Window Attention and Dual-Alignment for Image-Text
Retrieval",
BOOKTITLE = ICIVC24,
YEAR = "2024",
PAGES = "240-245",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236007"}
@inproceedings{bb241091,
AUTHOR = "Xie, C.W. and Sun, S.Y. and Zhao, L.M. and Li, P. and Ma, S. and Zheng, Y.",
TITLE = "Fuseteacher: Modality-fused Encoders are Strong Vision Supervisors",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 287-304",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236008"}
@inproceedings{bb241092,
AUTHOR = "Kim, W. and Chun, S. and Kim, T. and Han, D.Y. and Yun, S.",
TITLE = "HYPE: Hyperbolic Entailment Filtering for Underspecified Images and
Texts",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XL: 247-265",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236009"}
@inproceedings{bb241093,
AUTHOR = "Sogi, N. and Shibata, T. and Terao, M.",
TITLE = "Object-aware Query Perturbation for Cross-modal Image-text Retrieval",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXIX: 447-464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236010"}
@inproceedings{bb241094,
AUTHOR = "Alper, M. and Averbuch Elor, H.",
TITLE = "Emergent Visual-semantic Hierarchies in Image-text Representations",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LII: 220-238",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236011"}
@inproceedings{bb241095,
AUTHOR = "Gordon, B. and Bitton, Y. and Shafir, Y. and Garg, R. and Chen, X. and Lischinski, D. and Cohen Or, D. and Szpektor, I.",
TITLE = "Mismatch Quest: Visual and Textual Feedback for Image-Text Misalignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVII: 310-328",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236012"}
@inproceedings{bb241096,
AUTHOR = "Hua, H. and Shi, J. and Kafle, K. and Jenni, S. and Zhang, D. and Collomosse, J. and Cohen, S. and Luo, J.B.",
TITLE = "Finematch: Aspect-based Fine-grained Image and Text Mismatch Detection
and Correction",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "IX: 474-491",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236013"}
@inproceedings{bb241097,
AUTHOR = "Li, Y.H. and Liu, H.T. and Cai, M. and Li, Y.J. and Shechtman, E. and Lin, Z. and Lee, Y.J. and Singh, K.K.",
TITLE = "Removing Distributional Discrepancies in Captions Improves Image-Text
Alignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXI: 405-422",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236014"}
@inproceedings{bb241098,
AUTHOR = "Ma, W. and Li, K. and Jiang, Z. and Meshry, M. and Liu, Q.H. and Wang, H.Y. and Hane, C. and Yuille, A.L.",
TITLE = "Rethinking Video-text Understanding: Retrieval from Counterfactually
Augmented Data",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIII: 254-269",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236015"}
@inproceedings{bb241099,
AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C.L. and Wei, Z.M.",
TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and
Similarity Filtering",
BOOKTITLE = ICIVC22,
YEAR = "2022",
PAGES = "635-640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236016"}
Last update:Feb 26, 2026 at 10:58:24