@article{bb242000,
AUTHOR = "Zhou, W. and Zhou, Z.H.",
TITLE = "Unsupervised Domain Adaption Harnessing Vision-Language Pre-Training",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "8201-8214",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236916"}
@article{bb242001,
AUTHOR = "Guo, M.H. and Zhang, Y. and Mu, T.J. and Huang, S.X. and Hu, S.M.",
TITLE = "Tuning Vision-Language Models With Multiple Prototypes Clustering",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "11186-11199",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236917"}
@article{bb242002,
AUTHOR = "Sun, B. and Wu, Z.C. and Zhang, H. and He, J.",
TITLE = "VTPL: Visual and text prompt learning for visual-language models",
JOURNAL = JVCIR,
VOLUME = "104",
YEAR = "2024",
PAGES = "104280",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236918"}
@article{bb242003,
AUTHOR = "Liu, L.C. and Wang, N.N. and Liu, D. and Yang, X. and Gao, X.B. and Liu, T.L.",
TITLE = "Towards Specific Domain Prompt Learning via Improved Text Label
Optimization",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "10805-10815",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236919"}
@article{bb242004,
AUTHOR = "Liu, X. and Wu, J. and Yang, W.F. and Zhou, X. and Zhang, T.Z.",
TITLE = "Multi-Modal Attribute Prompting for Vision-Language Models",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "11",
MONTH = "November",
PAGES = "11579-11591",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236920"}
@article{bb242005,
AUTHOR = "Jiang, H.J. and Zhang, J.K. and Huang, R. and Ge, C.J. and Ni, Z. and Song, S. and Huang, G.",
TITLE = "Cross-modal adapter for vision-language retrieval",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111144",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236921"}
@article{bb242006,
AUTHOR = "Yellinek, N. and Karlinsky, L. and Giryes, R.",
TITLE = "3VL: Using Trees to Improve Vision-Language Models' Interpretability",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "495-509",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236922"}
@article{bb242007,
AUTHOR = "Yang, L.F. and Li, X. and Wang, Y.Z. and Wang, X.L. and Yang, J.",
TITLE = "Fine-Grained Visual Text Prompting",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "1594-1609",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236923"}
@article{bb242008,
AUTHOR = "Wang, F. and Han, Z.Y. and Liu, X. and Yin, Y.L. and Gao, X.",
TITLE = "CTPT: Continual Test-time Prompt Tuning for vision-language models",
JOURNAL = PR,
VOLUME = "161",
YEAR = "2025",
PAGES = "111300",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236924"}
@article{bb242009,
AUTHOR = "Liang, N. and Liu, Y.",
TITLE = "DPO: Discrete Prompt Optimization for Vision-Language Models",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "671-675",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236925"}
@article{bb242010,
AUTHOR = "Ondeng, O. and Ouma, H. and Akuon, P.",
TITLE = "Enriching visual feature representations for vision-language tasks
using spectral transforms",
JOURNAL = IVC,
VOLUME = "154",
YEAR = "2025",
PAGES = "105390",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236926"}
@article{bb242011,
AUTHOR = "Xu, C. and Zhu, Y.H. and Shen, H.C. and Chen, B.H. and Liao, Y.X. and Chen, X.X. and Wang, L.M.",
TITLE = "Progressive Visual Prompt Learning with Contrastive Feature
Re-formation",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "2",
MONTH = "February",
PAGES = "511-526",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236927"}
@article{bb242012,
AUTHOR = "Long, S. and Zhao, Z. and Yuan, J.K. and Tan, Z.C. and Liu, J.J. and Feng, J.Y. and Wang, S.S. and Wang, J.D.",
TITLE = "Mutual Prompt Leaning for Vision Language Models",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "1258-1276",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236928"}
@article{bb242013,
AUTHOR = "Yin, J.H. and Zhang, X.Y. and Wu, L. and Wang, X.J.",
TITLE = "Context-aware prompt learning for test-time vision recognition with
frozen vision-language model",
JOURNAL = PR,
VOLUME = "162",
YEAR = "2025",
PAGES = "111359",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236929"}
@article{bb242014,
AUTHOR = "Chen, Y. and Zhang, S. and Sun, Y. and Yang, J. and Liang, W.J. and Wang, H.R.",
TITLE = "Artificial-Spiking Hierarchical Networks for Vision-Language
Representation Learning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "2768-2781",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236930"}
@article{bb242015,
AUTHOR = "Li, B.Z. and Wang, S.R. and Wang, S.Q. and Ye, Y.",
TITLE = "High Efficiency Image Compression for Large Visual-Language Models",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "2870-2880",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236931"}
@article{bb242016,
AUTHOR = "Liu, L.C. and Wang, N.N. and Zhou, D.W. and Liu, D.C. and Yang, X. and Gao, X.B. and Liu, T.L.",
TITLE = "Generalizable Prompt Learning via Gradient Constrained
Sharpness-Aware Minimization",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "1100-1113",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236932"}
@article{bb242017,
AUTHOR = "Lu, Z. and Bai, J. and Li, X. and Xiao, Z. and Wang, X.C.",
TITLE = "Task-to-Instance Prompt Learning for Vision-Language Models at Test
Time",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "1908-1920",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236933"}
@article{bb242018,
AUTHOR = "Fang, Z.Q. and Yuan, Z.H. and Li, Z.Y. and Chen, J.Y. and Kuang, K. and Yao, Y.F. and Wu, F.",
TITLE = "Cross-Modality Image Interpretation via Concept Decomposition Vector
of Visual-Language Models",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "3024-3038",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236934"}
@article{bb242019,
AUTHOR = "Ramzi, E. and Audebert, N. and Rambour, C. and Araujo, A. and Bitot, X. and Thome, N.",
TITLE = "Optimization of Rank Losses for Image Retrieval",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "4317-4329",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236935"}
@inproceedings{bb242020,
AUTHOR = "Lafon, M. and Ramzi, E. and Rambour, C. and Audebert, N. and Thome, N.",
TITLE = "Gallop: Learning Global and Local Prompts for Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXI: 264-282",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236936"}
@article{bb242021,
AUTHOR = "Liu, K.C. and Wang, C.Q. and Han, X.D. and Liu, Y.J. and Chen, B.Q.",
TITLE = "Generalized Robot Vision-Language Model via Linguistic Foreground-Aware
Contrast",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "3481-3518",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236937"}
@article{bb242022,
AUTHOR = "Chen, B.Q. and Liu, Y.J. and Han, X.D. and Wang, C.Q. and Liu, K.C.",
TITLE = "Generalized Robot Vision-Language Model via Linguistic Foreground-Aware
Contrast",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "4971-4971",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236937"}
@article{bb242023,
AUTHOR = "Yang, L.X. and Zhang, R.Y. and Chen, Q. and Xie, X.H.",
TITLE = "Learning with Enriched Inductive Biases for Vision-Language Models",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "3746-3761",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236938"}
@article{bb242024,
AUTHOR = "Yao, H.T. and Zhang, R. and Lyu, H.H. and Zhang, Y.D. and Xu, C.S.",
TITLE = "Bi-Modality Individual-Aware Prompt Tuning for Visual-Language Model",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "6352-6368",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236939"}
@inproceedings{bb242025,
AUTHOR = "Yao, H.T. and Zhang, R. and Xu, C.S.",
TITLE = "TCP: Textual-Based Class-Aware Prompt Tuning for Visual-Language
Model",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23438-23448",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236940"}
@article{bb242026,
AUTHOR = "Hao, Z.W. and Guo, J.Y. and Shen, L. and Luo, Y. and Hu, H. and Wen, Y.G.",
TITLE = "ADEM-VL: Adaptive and Embedded Fusion for Efficient Vision-Language
Tuning",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "5527-5543",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236941"}
@article{bb242027,
AUTHOR = "Zeng, R.F. and Yang, Z.P. and Yu, R.Y. and Zhang, Y.G.",
TITLE = "Supplementary Prompt Learning for Vision-Language Models",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "5822-5839",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236942"}
@article{bb242028,
AUTHOR = "Liu, K.C. and Liu, Y.J. and Chen, B.Q.",
TITLE = "General 3D Vision-Language Model With Fast Rendering and Pre-Training
Vision-Language Alignment",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "9",
MONTH = "September",
PAGES = "7352-7368",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236943"}
@article{bb242029,
AUTHOR = "Gao, Y.S. and Zhu, Z.X. and Wang, S.S.",
TITLE = "Mixture of coarse and fine-grained prompt tuning for vision-language
model",
JOURNAL = PR,
VOLUME = "170",
YEAR = "2026",
PAGES = "112074",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236944"}
@article{bb242030,
AUTHOR = "Hao, F.S. and Liu, L. and Wu, F.X. and Zhang, Q.S. and Cheng, J.",
TITLE = "Textual Embeddings are Good Class-Aware Visual Prompts for Adapting
Vision-Language Models",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "2992-2996",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236945"}
@article{bb242031,
AUTHOR = "Liu, J. and Lu, Z.Q. and Luo, H. and Lu, Z.M. and Zheng, Y.M.",
TITLE = "Progressive Multi-Prompt Learning for Vision-Language Models",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "10",
MONTH = "October",
PAGES = "9562-9574",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236946"}
@article{bb242032,
AUTHOR = "Wang, W.X. and He, X.J. and Zhang, Y. and Guo, L.T. and Shen, J.C. and Li, J.Y. and Liu, J.",
TITLE = "CM-MaskSD: Cross-Modality Masked Self-Distillation for Referring
Image Segmentation",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "6906-6916",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236947"}
@article{bb242033,
AUTHOR = "Zhang, E. and Zhu, B. and Chen, Y.Y. and Miao, Q.H. and Tang, M. and Wang, J.Q.",
TITLE = "Optimization of Prompt Learning via Multi-Knowledge Representation
for Vision-Language Models",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "7557-7569",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236948"}
@article{bb242034,
AUTHOR = "Park, K.Y. and An, S. and Lee, Y.J. and Kim, D.H.",
TITLE = "Learning Compositionality from Multifaceted Synthetic Data for
Language-based Object Detection",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "7873-7896",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236949"}
@inproceedings{bb242035,
AUTHOR = "Park, K.Y. and Saito, K. and Kim, D.H.",
TITLE = "Weak-to-strong Compositional Learning from Generative Models for
Language-based Object Detection",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXIII: 1-19",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236950"}
@article{bb242036,
AUTHOR = "Sarto, S. and Moratelli, N. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
TITLE = "Positive-Augmented Contrastive Learning for Vision-and-Language
Evaluation and Training",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "7647-7671",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236951"}
@inproceedings{bb242037,
AUTHOR = "Stefanini, M. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
TITLE = "A Novel Attention-based Aggregation Function to Combine Vision and
Language",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "1212-1219",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236952"}
@article{bb242038,
AUTHOR = "Liu, L.C. and Wang, N.N. and Chen, C. and Liu, D. and Yang, X. and Gao, X.B. and Liu, T.L.",
TITLE = "Frequency-Based Comprehensive Prompt Learning for Vision-Language
Models",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "11974-11989",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236953"}
@article{bb242039,
AUTHOR = "Yang, X. and Zhong, X.Y. and Wang, N.N.",
TITLE = "Distribution-Aware Prompt Learning for Vision-Language Models With
Dynamic Boundary Prototype",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "3537-3549",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236954"}
@article{bb242040,
AUTHOR = "Li, J.C. and Gao, M. and Tang, S.L. and Wei, L.H. and Xiao, J. and Wu, F. and Hong, R.C. and Wang, M. and Tian, Q.",
TITLE = "Structure-Induced Gradient Regulation for Generalizable
Vision-Language Models",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "219-235",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236955"}
@inproceedings{bb242041,
AUTHOR = "Li, J.C. and Gao, M. and Wei, L.H. and Tang, S.L. and Zhang, W.Q. and Li, M.Z. and Ji, W. and Tian, Q. and Chua, T.S. and Zhuang, Y.T.",
TITLE = "Gradient-Regulated Meta-Prompt Learning for Generalizable
Vision-Language Models",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "2551-2562",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236956"}
@article{bb242042,
AUTHOR = "Xiao, Y.S. and Liu, X.L. and Cheng, Q.J. and Yin, Z.F. and Liang, S.Y. and Li, J.P. and Shao, J. and Liu, A.S. and Tao, D.C.",
TITLE = "GenderBias-VL: Benchmarking Gender Bias in Vision Language Models via
Counterfactual Probing",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "8332-8355",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236957"}
@article{bb242043,
AUTHOR = "Chen, T.Y. and Ai, J.L.",
TITLE = "Hierarchical Prompt Engineering for Remote Sensing Scene
Understanding with Large Vision-Language Models",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "22",
PAGES = "3727",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236958"}
@article{bb242044,
AUTHOR = "Xu, X. and Qin, L. and Che, W. and Kan, M.Y.",
TITLE = "Manager: Aggregating Insights From Unimodal Experts in Two-Tower VLMs
and MLLMs",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12278-12291",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236959"}
@article{bb242045,
AUTHOR = "Kim, G. and Kim, S. and Lee, S.",
TITLE = "Decoupling augmentation bias in prompt learning for vision-language
models",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112630",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236960"}
@inproceedings{bb242046,
AUTHOR = "Kim, G. and Kim, S. and Lee, S.",
TITLE = "AAPL: Adding Attributes to Prompt Learning for Vision-Language Models",
BOOKTITLE = Prompting24,
YEAR = "2024",
PAGES = "1572-1582",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236961"}
@article{bb242047,
AUTHOR = "Guo, Y.C. and Gu, X.D.",
TITLE = "MMRL++: Parameter-Efficient and Interaction-Aware Representation
Learning for Vision-Language Models",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "11",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236962"}
@inproceedings{bb242048,
AUTHOR = "Guo, Y.C. and Gu, X.D.",
TITLE = "MMRL: Multi-Modal Representation Learning for Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "25015-25025",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236963"}
@article{bb242049,
AUTHOR = "Ye, W.X. and Wang, W. and Liu, Y.H. and Song, Y. and Ren, B. and Bi, W. and Cucchiara, R. and Sebe, N.",
TITLE = "A Unified Masked Jigsaw Puzzle Framework for Vision and Language
Models",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "1873-1887",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236964"}
@article{bb242050,
AUTHOR = "Wang, Z.Y. and Liu, L. and Wan, G. and Lu, Y.C. and Zheng, F.J. and Sun, G. and Huang, Y.X. and Guo, S.H. and Li, X. and Yuan, L.",
TITLE = "SAREval: A Multi-Dimensional and Multi-Task Benchmark for Evaluating
Visual Language Models on SAR Image Understanding",
JOURNAL = RS,
VOLUME = "18",
YEAR = "2026",
NUMBER = "1",
PAGES = "82",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236965"}
@article{bb242051,
AUTHOR = "Wu, J.F. and Jiang, Y. and Ma, C.F. and Liu, Y.L. and Zhao, H.S. and Yuan, Z.H. and Bai, S. and Bai, X.",
TITLE = "Liquid: Language Models are Scalable and Unified Multi-Modal Generators",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "39",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236966"}
@article{bb242052,
AUTHOR = "Su, Y.L. and Liu, X.L. and Huang, Z. and Zhao, Y.W. and Hong, R.C. and Wang, M.",
TITLE = "AttriPrompt: Class Attribute-Aware Prompt Tuning for Vision-Language
Model",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "1395-1407",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236967"}
@article{bb242053,
AUTHOR = "Li, Y.W. and Zhang, Y.C. and Wang, C.Y. and Zhong, Z.S. and Chen, Y.X. and Chu, R. and Liu, S. and Jia, J.Y.",
TITLE = "Mini-Gemini: Mining the Potential of Multi-Modality Vision Language
Models",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "3",
MONTH = "March",
PAGES = "3530-3543",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236968"}
@article{bb242054,
AUTHOR = "Xu, N. and Yao, K. and Yang, R. and Li, C.",
TITLE = "Visual-language active search for wide-area remote sensing imagery",
JOURNAL = PR,
VOLUME = "175",
YEAR = "2026",
PAGES = "113106",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236969"}
@article{bb242055,
AUTHOR = "Chen, Y. and Fu, S. and Zhang, Y.",
TITLE = "MoPD: Mixture-of-Prompts Distillation for Vision-Language Models",
JOURNAL = MultMed,
VOLUME = "28",
YEAR = "2026",
PAGES = "1943-1954",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236970"}
@article{bb242056,
AUTHOR = "Qi, Y. and Li, H.X. and Song, Y.Q. and Wu, X.X. and Luo, J.B.",
TITLE = "How Vision-Language Tasks Benefit From Large Pre-Trained Models:
A Survey",
JOURNAL = MultMed,
VOLUME = "28",
YEAR = "2026",
PAGES = "1188-1210",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236971"}
@article{bb242057,
AUTHOR = "Lee, J.J.",
TITLE = "Language-guided invariance probing of vision-language models",
JOURNAL = PRL,
VOLUME = "202",
YEAR = "2026",
PAGES = "108-113",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236972"}
@article{bb242058,
AUTHOR = "Zhang, Y. and Bian, S. and Liu, J. and Song, R.J. and Zhu, D. and Zhang, C.H. and Hua, C.C.",
TITLE = "Robot Active Task Cognition:
Situation-Aware Task Planning With Large Language Models",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "4",
MONTH = "April",
PAGES = "4722-4733",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236973"}
@article{bb242059,
AUTHOR = "Zhao, X.Y. and Li, X.T. and Duan, H.D. and Huang, H. and Li, Y. and Chen, K. and Yang, H.",
TITLE = "MG-LLaVA: Toward Multi-Granularity Visual Instruction Tuning",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "4",
MONTH = "April",
PAGES = "4464-4478",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236974"}
@inproceedings{bb242060,
AUTHOR = "Lin, X. and Li, W.X. and Guo, S. and Wang, L.H. and Huang, D.",
TITLE = "GIP: Gated Interaction Prompt for Parameter Efficient Vision-Language
Fine-Tuning",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "617-622",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236975"}
@inproceedings{bb242061,
AUTHOR = "Valois, P.H.V. and Satav, D. and de Campos, R.A.P. and Pratamasunu, G.Q.O. and Fukui, K.",
TITLE = "Vision Language Model Interpretability with Concept Guided Decoding",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "397-402",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236976"}
@inproceedings{bb242062,
AUTHOR = "Saravanan, D. and Tapaswi, M. and Gandhi, V.",
TITLE = "Investigating Mechanisms for In-Context Vision Language Binding",
BOOKTITLE = InterpVis25,
YEAR = "2025",
PAGES = "4852-4856",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236977"}
@inproceedings{bb242063,
AUTHOR = "Selvam, S. and Rajendran, R.K. and Sankaradas, M. and Raghunathan, A. and Chakradhar, S.T.",
TITLE = "SimCache: Similarity Caching for Efficient VLM-based Scene
Understanding",
BOOKTITLE = LargeVM25,
YEAR = "2025",
PAGES = "3318-3327",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236978"}
@inproceedings{bb242064,
AUTHOR = "Tushar, P. and Pandey, E. and Austria, L.D.B. and Loo, Y.Y. and Lim, J.H. and Atmosukarto, I. and Lock, D.S.C.",
TITLE = "MerCulture: A Comprehensive Benchmark to Evaluate Vision-Language
Models on Cultural Understanding in Singapore",
BOOKTITLE = "AIBench25",
YEAR = "2025",
PAGES = "565-574",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236979"}
@inproceedings{bb242065,
AUTHOR = "Ma, Z.Y. and Gou, C. and Shi, H. and Sun, B. and Li, S.T. and Rezatofighi, H. and Cai, J.F.",
TITLE = "DrVideo: Document Retrieval Based Long Video Understanding",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18936-18946",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236980"}
@inproceedings{bb242066,
AUTHOR = "Dhouib, M. and Buscaldi, D. and Vanier, S. and Shabou, A.",
TITLE = "PACT: Pruning and Clustering-Based Token Reduction for Faster Visual
Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14582-14592",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236981"}
@inproceedings{bb242067,
AUTHOR = "Yu, C. and Chen, T. and Gan, Z.X.",
TITLE = "Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple
Vision-Language Model Variants",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14712-14722",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236982"}
@inproceedings{bb242068,
AUTHOR = "Hao, F.S. and He, F.X. and Wu, F. and Wang, T. and Song, C.Q. and Cheng, J.",
TITLE = "Task-Aware Clustering for Prompting Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14745-14755",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236983"}
@inproceedings{bb242069,
AUTHOR = "Koleilat, T. and Asgariandehkordi, H. and Rivaz, H. and Xiao, Y.M.",
TITLE = "BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14766-14776",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236984"}
@inproceedings{bb242070,
AUTHOR = "Nath, V. and Li, W.Q. and Yang, D. and Myronenko, A. and Zheng, M.X. and Lu, Y. and Liu, Z.J. and Yin, H.X. and Law, Y.M. and Tang, Y.C. and Guo, P.F. and Zhao, C. and Xu, Z.Y. and He, Y.F. and Harmon, S. and Simon, B. and Heinrich, G. and Aylward, S. and Edgar, M. and Zephyr, M. and Molchanov, P. and Turkbey, B. and Roth, H. and Xu, D.",
TITLE = "VILA-M3: Enhancing Vision-Language Models with Medical Expert
Knowledge",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14788-14798",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236985"}
@inproceedings{bb242071,
AUTHOR = "Du, H. and Wu, B. and Lu, Y. and Mao, Z.D.",
TITLE = "SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic
Video Situation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13798-13809",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236986"}
@inproceedings{bb242072,
AUTHOR = "Kaduri, O. and Bagon, S. and Dekel, T.",
TITLE = "What's in the Image? A Deep-Dive into the Vision of Vision Language
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14549-14558",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236987"}
@inproceedings{bb242073,
AUTHOR = "Xing, L. and Huang, Q.D. and Dong, X.Y. and Lu, J.J. and Zhang, P. and Zang, Y.H. and Cao, Y.H. and He, C.H. and Wang, J.Q. and Wu, F. and Lin, D.",
TITLE = "Conical Visual Concentration for Efficient Large Vision-Language
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14593-14603",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236988"}
@inproceedings{bb242074,
AUTHOR = "Zhang, L. and Yang, Q. and Agrawal, A.",
TITLE = "Assessing and Learning Alignment of Unimodal Vision and Language
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14604-14614",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236989"}
@inproceedings{bb242075,
AUTHOR = "Sehgal, A. and Yuan, P. and Hu, Z. and Yue, Y.S. and Sun, J.J. and Chaudhuri, S.",
TITLE = "Self-Evolving Visual Concept Library using Vision-Language Critics",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13124-13134",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236990"}
@inproceedings{bb242076,
AUTHOR = "Wang, W.H. and Wang, L. and Gu, X.T. and Huang, S.Y. and Dong, Y.X. and Tang, J.",
TITLE = "MotionBench: Benchmarking and Improving Fine-Grained Video Motion
Understanding for Vision Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8450-8460",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236991"}
@inproceedings{bb242077,
AUTHOR = "Nacson, M.S. and Aberdam, A. and Ganz, R. and Avraham, E.B. and Golts, A. and Kittenplon, Y. and Mazor, S. and Litman, R.",
TITLE = "DocVLM: Make Your VLM an Efficient Reader",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29005-29015",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236992"}
@inproceedings{bb242078,
AUTHOR = "Alhamoud, K. and Alshammari, S. and Tian, Y.L. and Li, G.H. and Torr, P.H.S. and Kim, Y. and Ghassemi, M.",
TITLE = "Vision-Language Models Do Not Understand Negation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29612-29622",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236993"}
@inproceedings{bb242079,
AUTHOR = "Schmalfuss, J. and Chang, N. and VS, V. and Shen, M. and Bruhn, A. and Alvarez, J.M.",
TITLE = "PARC: A Quantitative Framework Uncovering the Symmetries within
Vision Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "25081-25091",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236994"}
@inproceedings{bb242080,
AUTHOR = "Xiao, J.Q. and Sang, S. and Zhi, T.C. and Liu, J. and Yan, Q. and Luo, L.J. and Yuan, B.",
TITLE = "COAP: Memory-Efficient Training with Correlation-Aware Gradient
Projection",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "30116-30126",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236995"}
@inproceedings{bb242081,
AUTHOR = "Zhu, Y.Q. and Wang, Z.Y. and Zhang, C. and Li, P. and Liu, Y.",
TITLE = "CoSpace: Benchmarking Continuous Space Perception Ability for
Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29569-29579",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236996"}
@inproceedings{bb242082,
AUTHOR = "Kang, H.Q. and Sachdeva, E. and Gupta, P. and Bae, S.J. and Lee, K.",
TITLE = "GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models
with Generative Flow Networks",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3815-3825",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236997"}
@inproceedings{bb242083,
AUTHOR = "Chen, J.H. and Yang, J.W. and Wu, H.P. and Li, D. and Gao, J.F. and Zhou, T.Y. and Xiao, B.",
TITLE = "Florence-VL: Enhancing Vision-Language Models with Generative Vision
Encoder and Depth-Breadth Fusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24928-24938",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236998"}
@inproceedings{bb242084,
AUTHOR = "Yang, C.Y. and Dong, X. and Zhu, X.Z. and Su, W.J. and Wang, J.H. and Tian, H. and Chen, Z. and Wang, W.H. and Lu, L.W. and Dai, J.F.",
TITLE = "PVC: Progressive Visual Token Compression for Unified Image and Video
Processing in Large Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24939-24949",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236999"}
@inproceedings{bb242085,
AUTHOR = "Zhang, K. and Li, J.Y. and Li, Z. and Zhou, S.K.",
TITLE = "DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid
Set-Embeddings Learning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24993-25003",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237000"}
@inproceedings{bb242086,
AUTHOR = "Zhu, B. and Cui, J. and Zhang, H.W. and Zhang, C.",
TITLE = "Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "25487-25496",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237001"}
@inproceedings{bb242087,
AUTHOR = "Li, H.Y. and Wang, L. and Wang, C. and Jiang, J. and Peng, Y. and Long, G.D.",
TITLE = "DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "25623-25632",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237002"}
@inproceedings{bb242088,
AUTHOR = "Saravanan, D. and Gupta, V. and Singh, D. and Khan, Z. and Gandhi, V. and Tapaswi, M.",
TITLE = "VELOCITI: Benchmarking Video-Language Compositional Reasoning with
Strict Entailment",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18914-18924",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237003"}
@inproceedings{bb242089,
AUTHOR = "Pan, B. and Li, Q. and Tang, X.Y. and Huang, W. and Fang, Z. and Liu, F. and Wang, J.Y. and Yu, J.Y. and Shi, Y.",
TITLE = "NLPrompt: Noise-Label Prompt Learning for Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19963-19973",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237004"}
@inproceedings{bb242090,
AUTHOR = "Zhang, Y.T. and Chen, L. and Zheng, G.D. and Gao, Y.F. and Zheng, R. and Fu, J. and Yin, Z.F. and Jin, S. and Qiao, Y. and Huang, X.J. and Zhao, F. and Gui, T. and Shao, J.",
TITLE = "SPA-VL: A Comprehensive Safety Preference Alignment Dataset for
Vision Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19867-19878",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237005"}
@inproceedings{bb242091,
AUTHOR = "Bhattacharjee, S.S. and Campbell, D. and Shome, R.",
TITLE = "Believing is Seeing: Unobserved Object Detection using Generative
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19366-19377",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237006"}
@inproceedings{bb242092,
AUTHOR = "Zhou, E. and Su, Q. and Chi, C. and Zhang, Z.Z. and Wang, Z.Y. and Huang, T.J. and Sheng, L. and Wang, H.",
TITLE = "Code-as-Monitor: Constraint-aware Visual Programming for Reactive and
Proactive Robotic Failure Detection",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "6919-6929",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237007"}
@inproceedings{bb242093,
AUTHOR = "Zhou, W.J. and Tao, M. and Zhao, C.Y. and Guo, H.Y. and Dong, H.H. and Tang, M. and Wang, J.Q.",
TITLE = "PhysVLM: Enabling Visual Language Models to Understand Robotic
Physical Reachability",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "6940-6949",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237008"}
@inproceedings{bb242094,
AUTHOR = "Song, C.H. and Blukis, V. and Tremblay, J. and Tyree, S. and Su, Y. and Birchfield, S.",
TITLE = "RoboSpatial: Teaching Spatial Understanding to 2D and 3D
Vision-Language Models for Robotics",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "15768-15780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237009"}
@inproceedings{bb242095,
AUTHOR = "Lozano, A. and Sun, M.W. and Burgess, J. and Chen, L. and Nirschl, J.J. and Gu, J. and Lopez, I. and Aklilu, J. and Rau, A. and Katzer, A.W. and Zhang, Y.H. and Chiu, C. and Wang, X.H. and Song, A.S. and Tibshirani, R. and Yeung Levy, S.",
TITLE = "BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and
Vision-Language Models Derived from Scientific Literature",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19724-19735",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237010"}
@inproceedings{bb242096,
AUTHOR = "Xiao, R. and Kim, S. and Georgescu, M.I. and Akata, Z. and Alaniz, S.",
TITLE = "FLAIR: VLM with Fine-grained Language-informed Image Representations",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24884-24894",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237011"}
@inproceedings{bb242097,
AUTHOR = "Wang, X. and Chen, K. and Zhang, J.M. and Chen, J.J. and Ma, X.",
TITLE = "TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in
Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19910-19920",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237012"}
@inproceedings{bb242098,
AUTHOR = "Vasu, P.K.A. and Faghri, F. and Li, C.L. and Koc, C. and True, N. and Antony, A. and Santhanam, G. and Gabriel, J. and Grasch, P. and Tuzel, O. and Pouransari, H.",
TITLE = "FastVLM: Efficient Vision Encoding for Vision Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19769-19780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237013"}
@inproceedings{bb242099,
AUTHOR = "Chen, Q.Z. and Wang, C. and Wang, D. and Zhang, T. and Li, W. and He, X.F.",
TITLE = "Lifelong Knowledge Editing for Vision Language Models with Low-Rank
Mixture-of-Experts",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9455-9466",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT237014"}
Last update:May 24, 2026 at 14:46:09