@inproceedings{bb236300, AUTHOR = "Cao, Y.K. and Zhang, J.N. and Frittoli, L. and Cheng, Y.Q. and Shen, W.M. and Boracchi, G.", TITLE = "AdaCLIP: Adapting CLIP with Hybrid Learnable Prompts for Zero-shot Anomaly Detection", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXXV: 55-72", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231275"} @article{bb236301, AUTHOR = "Zhang, Z.Q. and Jiang, Y. and Wang, Y. and Xie, B. and Zhang, W. and Li, Y.H. and Chen, Z. and Jin, X. and Zeng, W.J.", TITLE = "Exploring Contrastive Pre-Training for Domain Connections in Medical Image Segmentation", JOURNAL = MedImg, VOLUME = "44", YEAR = "2025", NUMBER = "4", MONTH = "April", PAGES = "1686-1698", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231276"} @article{bb236302, AUTHOR = "Zhang, W.B. and Zhang, Y.F. and Lin, J.F. and Huang, B.Q. and Zhang, J. and Yu, W.H.", TITLE = "DC-CLIP: Multilingual CLIP Compression via vision-language distillation and vision-language alignment", JOURNAL = PR, VOLUME = "164", YEAR = "2025", PAGES = "111547", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231277"} @article{bb236303, AUTHOR = "Wang, Y.F. and Kang, G.L.", TITLE = "Attention head purification: A new perspective to harness CLIP for domain generalization", JOURNAL = IVC, VOLUME = "157", YEAR = "2025", PAGES = "105511", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231278"} @article{bb236304, AUTHOR = "He, X.J. and Chen, S. and Ma, F. and Huang, Z.C. and Jin, X.J. and Liu, Z. and Fu, D.M. and Yang, Y. and Liu, J. and Feng, J.S.", TITLE = "VLAB: Enhancing Video Language Pretraining by Feature Adapting and Blending", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "2168-2180", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231279"} @article{bb236305, AUTHOR = "Fu, T.C. and Zhang, J.H. and Li, F.X. and Wei, P. and Zeng, X.L. and Zhou, W.", TITLE = "Multimodal alignment augmentation transferable attack on vision-language pre-training models", JOURNAL = PRL, VOLUME = "191", YEAR = "2025", PAGES = "131-137", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231280"} @article{bb236306, AUTHOR = "Chen, J. and Yuan, H. and Xie, B.", TITLE = "MIF: Multi-source information fusion for few-shot classification with CLIP", JOURNAL = PRL, VOLUME = "192", YEAR = "2025", PAGES = "113-121", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231281"} @article{bb236307, AUTHOR = "Luo, G. and Zhou, Y.Y. and Huang, M.L. and Ren, T. and Sun, X.S. and Ji, R.R.", TITLE = "MoIL: Momentum Imitation Learning for Efficient Vision-Language Adaptation", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "7", MONTH = "July", PAGES = "5192-5204", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231282"} @article{bb236308, AUTHOR = "Lu, Z.Q. and Liu, M. and Yu, Y.L. and Wang, Z. and Li, X. and Han, J.G.", TITLE = "Variational Adapter: Improving CLIP in Data-Imbalanced Scenarios", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "5251-5264", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231283"} @article{bb236309, AUTHOR = "Tu, W.J. and Deng, W.J. and Gedeon, T.", TITLE = "Toward a Holistic Evaluation of Robustness in CLIP Models", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "9", MONTH = "September", PAGES = "8280-8296", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231284"} @article{bb236310, AUTHOR = "Cao, M.X. and Xie, W.Y. and Zhang, X. and Zhang, J.Q. and Jiang, K. and Lei, J. and Li, Y.S.", TITLE = "M³amba: CLIP-Driven Mamba Model for Multi-Modal Remote Sensing Classification", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "8", MONTH = "August", PAGES = "7605-7617", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231285"} @article{bb236311, AUTHOR = "Wei, X.Y. and Kurtz, C. and Cloppet, F.", TITLE = "Enhancing vision-language contrastive representation learning using domain knowledge", JOURNAL = CVIU, VOLUME = "259", YEAR = "2025", PAGES = "104403", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231286"} @inproceedings{bb236312, AUTHOR = "Wei, X.Y. and Kurtz, C. and Cloppet, F.", TITLE = "Relaxing Binary Constraints in Contrastive Vision-Language Medical Representation Learning", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "4462-4471", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231287"} @article{bb236313, AUTHOR = "Jia, X.J. and Gao, S.S. and Guo, Q. and Qin, S. and Ma, K. and Huang, Y.H. and Liu, Y. and Tsang, I.W. and Cao, X.C.", TITLE = "Semantic-Aligned Adversarial Evolution Triangle for High-Transferability Vision-Language Attack", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "10", MONTH = "October", PAGES = "8489-8505", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231288"} @article{bb236314, AUTHOR = "You, S. and Li, J. and Bao, B.K.", TITLE = "Pro-MA: Progressively Margin-Based Attribution in Pretrained Vision-Language Models", JOURNAL = MultMedMag, VOLUME = "32", YEAR = "2025", NUMBER = "2", MONTH = "April", PAGES = "53-64", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231289"} @article{bb236315, AUTHOR = "Wang, Y.Z. and Hu, W.B. and Dong, Y.P. and Zhang, H.W. and Su, H. and Hong, R.C.", TITLE = "Exploring Transferability of Multimodal Adversarial Samples for Vision-Language Pre-Training Models with Contrastive Learning", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "6410-6421", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231290"} @article{bb236316, AUTHOR = "Lai, X. and Ke, X. and Xu, H.B. and Wu, S.H. and Guo, W.Z.", TITLE = "MSP: Multimodal Self-Attention Prompt Learning", JOURNAL = IP, VOLUME = "34", YEAR = "2025", PAGES = "5978-5988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231291"} @inproceedings{bb236317, AUTHOR = "Abbasi, R. and Nazari, A. and Sefid, A. and Banayeeanzade, M. and Rohban, M.H. and Baghshah, M.S.", TITLE = "CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "9308-9317", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231292"} @inproceedings{bb236318, AUTHOR = "Choi, H. and Jang, Y.K. and Eom, C.", TITLE = "GOAL: Global-local Object Alignment Learning", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "4070-4079", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231293"} @inproceedings{bb236319, AUTHOR = "Li, S. and Liu, F. and Hao, Z. and Wang, X. and Li, L.L. and Liu, X. and Chen, P. and Ma, W.P.", TITLE = "Logits DeConfusion with CLIP for Few-Shot Learning", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "25411-25421", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231294"} @inproceedings{bb236320, AUTHOR = "Zhang, Z.Y. and Yu, Y. and Chen, Y.C. and Yang, X. and Yeo, S.Y.", TITLE = "MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "29744-29755", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231295"} @inproceedings{bb236321, AUTHOR = "Chen, Z.L. and Huang, X. and Fan, X.X. and Wang, K. and Zhou, Y. and Guan, Q.L. and Lin, L.", TITLE = "Reproducible Vision-Language Models Meet Concepts Out of Pre-Training", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14701-14711", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231296"} @inproceedings{bb236322, AUTHOR = "Wu, S. and Zhang, J. and Zeng, P.P. and Gao, L. and Song, J.K. and Shen, H.T.", TITLE = "Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14723-14732", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231297"} @inproceedings{bb236323, AUTHOR = "Rui, S.H. and Chen, L.Z. and Tang, Z.Y. and Wang, L.L. and Liu, M. and Zhang, S.T. and Wang, X.S.", TITLE = "Multi-modal Vision Pre-training for Medical Image Analysis", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "5164-5174", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231298"} @inproceedings{bb236324, AUTHOR = "Wang, H.C. and Ju, C. and Lin, W.X. and Xiao, S. and Chen, M.T. and Huang, Y.X. and Liu, C. and Yao, M. and Lan, J.S. and Chen, Y. and Liu, Q.W. and Wang, Y.F.", TITLE = "Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "29791-29802", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231299"} @inproceedings{bb236325, AUTHOR = "Wang, Z. and Lee, G.H.", TITLE = "g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14191-14202", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231300"} @inproceedings{bb236326, AUTHOR = "Kim, S. and Xiao, R. and Georgescu, M.I. and Alaniz, S. and Akata, Z.", TITLE = "COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14690-14700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231301"} @inproceedings{bb236327, AUTHOR = "Gallagher Syed, A. and Senior, H. and Alwazzan, O. and Pontarini, E. and Bombardieri, M. and Pitzalis, C. and Lewis, M.J. and Barnes, M.R. and Rossi, L. and Slabaugh, G.", TITLE = "BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "10372-10383", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231302"} @inproceedings{bb236328, AUTHOR = "Wen, C.S. and Peng, Z.L. and Huang, Y. and Yang, X.K. and Shen, W.", TITLE = "Domain Generalization in CLIP via Learning with Diverse Text Prompts", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "9559-9569", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231303"} @inproceedings{bb236329, AUTHOR = "Xing, S. and Zhao, Z.Y. and Sebe, N.", TITLE = "CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "15172-15182", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231304"} @inproceedings{bb236330, AUTHOR = "Asokan, M. and Wu, K. and Albreiki, F.", TITLE = "FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14495-14504", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231305"} @inproceedings{bb236331, AUTHOR = "Yang, Y.H. and Deng, J.H. and Li, W. and Duan, L.X.", TITLE = "ResCLIP: Residual Attention for Training-free Dense Vision-language Inference", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "29968-29978", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231306"} @inproceedings{bb236332, AUTHOR = "Xie, S. and Kong, L. and Zheng, Y.J. and Yao, Y. and Tang, Z. and Xing, E.P. and Chen, G.Y. and Zhang, K.", TITLE = "SmartCLIP: Modular Vision-language Alignment with Identification Guarantees", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "29780-29780", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231307"} @inproceedings{bb236333, AUTHOR = "Wang, Z. and Zhou, S. and He, S.X. and Huang, H.F. and Yang, L. and Zhang, Z. and Cheng, X.Z. and Ji, S.P. and Jin, T. and Zhao, H.S. and Zhao, Z.", TITLE = "SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "29656-29666", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231308"} @inproceedings{bb236334, AUTHOR = "Chen, S.X. and Sra, M. and Sen, P.", TITLE = "Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "28513-28522", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231309"} @inproceedings{bb236335, AUTHOR = "Feng, Y.H. and Wen, C.S. and Peng, Z.L. and Jiaye, L. and Zhu, S.", TITLE = "Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "24895-24904", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231310"} @inproceedings{bb236336, AUTHOR = "Pei, G. and Chen, T. and Wang, Y.J. and Cai, X.H. and Shu, X.B. and Zhou, T.F. and Yao, Y.Z.", TITLE = "Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "24862-24872", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231311"} @inproceedings{bb236337, AUTHOR = "Wang, E. and Peng, Z. and Xie, Z.Y. and Yang, F. and Liu, X.L. and Cheng, M.M.", TITLE = "GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "20296-20306", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231312"} @inproceedings{bb236338, AUTHOR = "Wu, C.E. and Lin, J.H. and Hu, Y.H. and Morgado, P.", TITLE = "Patch Ranking: Token Pruning as Ranking Prediction for Efficient CLIP", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "5842-5851", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231313"} @inproceedings{bb236339, AUTHOR = "Kravets, A. and Namboodiri, V.P.", TITLE = "Zero-Shot Class Unlearning in CLIP with Synthetic Samples", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "6456-6464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231314"} @inproceedings{bb236340, AUTHOR = "Hakim, G.A.V. and Osowiechi, D. and Noori, M. and Cheraghalikhani, M. and Bahri, A. and Yazdanpanah, M. and Ben Ayed, I. and Desrosiers, C.", TITLE = "CLIPArTT: Adaptation of CLIP to New Domains at Test Time", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "7092-7101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231315"} @inproceedings{bb236341, AUTHOR = "Cao, A.Q. and Jaritz, M. and Guillaumin, M. and de Charette, R. and Bazzani, L.", TITLE = "LATTECLIP: Unsupervised CLIP Fine-Tuning via LMM-Synthetic Texts", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "5030-5040", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231316"} @inproceedings{bb236342, AUTHOR = "Song, Y. and Cho, S.", TITLE = "Leveraging CLIP Encoder for Multimodal Emotion Recognition", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "6115-6124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231317"} @inproceedings{bb236343, AUTHOR = "Jiang, H.Y. and Cheng, Z.Q. and Moreira, G. and Zhu, J. and Sun, J.D. and Ren, B. and He, J.Y. and Dai, Q. and Hua, X.S.", TITLE = "UCDR-Adapter: Exploring Adaptation of Pre-Trained Vision-Language Models for Universal Cross-Domain Retrieval", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "5429-5438", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231318"} @inproceedings{bb236344, AUTHOR = "Kim, D. and Jo, Y.J. and Lee, M. and Kim, T.", TITLE = "Retaining and Enhancing Pre-trained Knowledge in Vision-Language Models with Prompt Ensembling", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "5550-5559", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231319"} @inproceedings{bb236345, AUTHOR = "Ruan, S. and Dong, Y.P. and Liu, H.Q. and Huang, Y. and Su, H. and Wei, X.X.", TITLE = "Omniview-tuning: Boosting Viewpoint Invariance of Vision-language Pre-training Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXVI: 309-327", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231320"} @inproceedings{bb236346, AUTHOR = "Bhalgat, Y. and Laina, I. and Henriques, J.F. and Zisserman, A. and Vedaldi, A.", TITLE = "N2F2: Hierarchical Scene Understanding with Nested Neural Feature Fields", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIX: 197-214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231321"} @inproceedings{bb236347, AUTHOR = "An, X. and Yang, K.C. and Dai, X.Z. and Feng, Z. and Deng, J.K.", TITLE = "Multi-label Cluster Discrimination for Visual Representation Learning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXVII: 428-444", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231322"} @inproceedings{bb236348, AUTHOR = "Chen, W. and Shi, C.Y. and Ma, C.X. and Li, W.H. and Dong, S.", TITLE = "DepthBLIP-2: Leveraging Language to Guide BLIP-2 in Understanding Depth Information", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "VII: 287-302", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231323"} @inproceedings{bb236349, AUTHOR = "Zhou, Y. and Wu, Y.J. and Saiyin, J. and Wei, B.Z. and Lai, M. and Chang, E. and Xu, Y.", TITLE = "SDPT: Synchronous Dual Prompt Tuning for Fusion-based Visual-language Pre-trained Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIX: 340-356", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231324"} @inproceedings{bb236350, AUTHOR = "Naeem, M.F. and Xian, Y.Q. and Zhai, X.H. and Hoyer, L. and Van Gool, L.J. and Tombari, F.", TITLE = "Silc: Improving Vision Language Pretraining with Self-distillation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXI: 38-55", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231325"} @inproceedings{bb236351, AUTHOR = "Chen, Q. and Hong, Y.", TITLE = "Medblip: Bootstrapping Language-image Pretraining from 3d Medical Images and Texts", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "III: 98-113", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231326"} @inproceedings{bb236352, AUTHOR = "Zheng, K. and Zhang, Y.F. and Wu, W. and Lu, F. and Ma, S. and Jin, X. and Chen, W. and Shen, Y.J.", TITLE = "Dreamlip: Language-image Pre-training with Long Captions", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XVIII: 73-90", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231327"} @inproceedings{bb236353, AUTHOR = "Zhuang, J. and Hu, J.Q. and Mu, L. and Hu, R. and Liang, X.Y. and Ye, J. and Hu, H.J.", TITLE = "FALIP: Visual Prompt as Foveal Attention Boosts CLIP Zero-shot Performance", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "X: 236-253", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231328"} @inproceedings{bb236354, AUTHOR = "Yang, Q. and Ye, M. and Tao, D.C.", TITLE = "Synergy of Sight and Semantics: Visual Intention Understanding with CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XI: 144-160", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231329"} @inproceedings{bb236355, AUTHOR = "Lu, Z.Q. and Shen, F.L. and Liu, M. and Yu, Y.L. and Li, X.", TITLE = "Improving Zero-shot Generalization for CLIP with Variational Adapter", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XX: 328-344", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231330"} @inproceedings{bb236356, AUTHOR = "Wang, F. and Mei, J. and Yuille, A.L.", TITLE = "SCLIP: Rethinking Self-attention for Dense Vision-language Inference", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXI: 315-332", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231331"} @inproceedings{bb236357, AUTHOR = "Lan, M.C. and Chen, C.F. and Ke, Y.P. and Wang, X.J. and Feng, L. and Zhang, W.", TITLE = "ClearCLIP: Decomposing CLIP Representations for Dense Vision-language Inference", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLVII: 143-160", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231332"} @inproceedings{bb236358, AUTHOR = "Zhang, B.C. and Zhang, P. and Dong, X.Y. and Zang, Y.H. and Wang, J.Q.", TITLE = "Long-CLIP: Unlocking the Long-text Capability of CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LI: 310-325", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231333"} @inproceedings{bb236359, AUTHOR = "Poppi, S. and Poppi, T. and Cocchi, F. and Cornia, M. and Baraldi, L. and Cucchiara, R.", TITLE = "Safe-CLIP: Removing NSFW Concepts from Vision-and-language Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIII: 340-356", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231334"} @inproceedings{bb236360, AUTHOR = "Huang, L. and Cao, X. and Lu, H. and Liu, X.L.", TITLE = "Class-incremental Learning with CLIP: Adaptive Representation Adjustment and Parameter Fusion", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIV: 214-231", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231335"} @inproceedings{bb236361, AUTHOR = "Lai, Z.F. and Zhang, H.T. and Zhang, B. and Wu, W.T. and Bai, H. and Timofeev, A. and Du, X.Z. and Gan, Z. and Shan, J. and Chuah, C.N. and Yang, Y.F. and Cao, M.", TITLE = "VECLIP: Improving CLIP Training via Visual-enriched Captions", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLII: 111-127", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231336"} @inproceedings{bb236362, AUTHOR = "Qian, Q. and Hu, J.", TITLE = "Online Zero-shot Classification with CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVII: 462-477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231337"} @inproceedings{bb236363, AUTHOR = "Balauca, A.A. and Paudel, D.P. and Toutanova, K. and Van Gool, L.J.", TITLE = "Taming CLIP for Fine-grained and Structured Visual Understanding of Museum Exhibits", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVI: 377-394", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231338"} @inproceedings{bb236364, AUTHOR = "Abbasi, R. and Rohban, M.H. and Baghshah, M.S.", TITLE = "Deciphering the Role of Representation Disentanglement: Investigating Compositional Generalization in CLIP Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXIX: 35-50", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231339"} @inproceedings{bb236365, AUTHOR = "Li, Y. and Guo, W.W. and Yang, X. and Liao, N. and He, D. and Zhou, J.Q. and Yu, W.X.", TITLE = "Toward Open Vocabulary Aerial Object Detection with CLIP-activated Student-teacher Learning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXVI: 431-448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231340"} @inproceedings{bb236366, AUTHOR = "Du, Y. and Zhai, Q. and Dai, W. and Li, X.M.", TITLE = "Teach CLIP to Develop a Number Sense for Ordinal Regression", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXV: 1-17", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231341"} @inproceedings{bb236367, AUTHOR = "Jo, S. and Ryu, S. and Kim, S. and Yang, E. and Kim, K.", TITLE = "TTD: Text-tag Self-distillation Enhancing Image-text Alignment in CLIP to Alleviate Single Tag Bias", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXI: 341-357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231342"} @inproceedings{bb236368, AUTHOR = "Lu, S. and Jiao, J. and Wang, L.X. and Qiu, H.Q. and Lin, X.T. and Mei, H. and Li, H.L.", TITLE = "Video Class-Incremental Learning with CLIP Based Transformer", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "500-506", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231343"} @inproceedings{bb236369, AUTHOR = "Chen, Y.C. and Li, W.H. and Chen, C.S.", TITLE = "Open-Vocabulary Panoptic Segmentation Using Bert Pre-Training of Vision-Language Multiway Transformer Model", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2494-2500", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231344"} @inproceedings{bb236370, AUTHOR = "Yi, C. and Ren, L. and Zhan, D.C. and Ye, H.J.", TITLE = "Leveraging Cross-Modal Neighbor Representation for Improved CLIP Classification", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27392-27401", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231345"} @inproceedings{bb236371, AUTHOR = "Byun, J. and Kim, D. and Moon, T.", TITLE = "MAFA: Managing False Negatives for Vision-Language Pre-Training", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27304-27314", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231346"} @inproceedings{bb236372, AUTHOR = "Wei, Z. and Pan, Z.X. and Owens, A.", TITLE = "Efficient Vision-Language Pre-Training by Cluster Masking", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26805-26815", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231347"} @inproceedings{bb236373, AUTHOR = "Wang, S. and Zhang, J. and Yuan, Z. and Shan, S.G.", TITLE = "Pre-Trained Model Guided Fine-Tuning for Zero-Shot Adversarial Robustness", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24502-24511", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231348"} @inproceedings{bb236374, AUTHOR = "Lee, J.H. and Kang, J.W.", TITLE = "SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13689-13699", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231349"} @inproceedings{bb236375, AUTHOR = "Gao, Y. and Shi, K.Y. and Zhu, P. and Belval, E. and Nuriel, O. and Appalaraju, S. and Ghadar, S. and Tu, Z.W. and Mahadevan, V. and Soatto, S.", TITLE = "Enhancing Vision-Language Pre-Training with Rich Supervisions", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13480-13491", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231350"} @inproceedings{bb236376, AUTHOR = "Bulat, A. and Ouali, Y. and Tzimiropoulos, G.", TITLE = "FFF: Fixing Flawed Foundations in contrastive pre-training results in very strong Vision-Language models", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14172-14182", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231351"} @inproceedings{bb236377, AUTHOR = "Shao, S. and Bai, Y. and Wang, Y. and Liu, B. and Zhou, Y.C.", TITLE = "DeIL: Direct-and-Inverse CLIP for Open-World Few-Shot Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "28505-28514", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231352"} @inproceedings{bb236378, AUTHOR = "Martin, S. and Huang, Y. and Shakeri, F. and Pesquet, J.C. and Ayed, I.B.", TITLE = "Transductive Zero-Shot and Few-Shot CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "28816-28826", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231353"} @inproceedings{bb236379, AUTHOR = "Xing, X. and Xiong, Z. and Stylianou, A. and Sastry, S. and Gong, L. and Jacobs, N.", TITLE = "Vision-Language Pseudo-Labels for Single-Positive Multi-Label Learning", BOOKTITLE = ZeroShot24, YEAR = "2024", PAGES = "7799-7808", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231354"} @inproceedings{bb236380, AUTHOR = "Xie, J.H. and Deng, S.H. and Li, B. and Liu, H.Z. and Huang, Y.W. and Zheng, Y.F. and Schmidhuber, J. and Ghanem, B. and Shen, L.L. and Shou, M.Z.", TITLE = "Tune-an-Ellipse: CLIP Has Potential to Find what you Want", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13723-13732", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231355"} @inproceedings{bb236381, AUTHOR = "Maniparambil, M. and Akshulakov, R. and Djilali, Y.A.D. and Seddik, M.E.A. and Narayan, S. and Mangalam, K. and O'Connor, N.E.", TITLE = "Do Vision and Language Encoders Represent the World Similarly?", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14334-14343", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231356"} @inproceedings{bb236382, AUTHOR = "Pan, C. and Yaman, B. and Velipasalar, S. and Ren, L.", TITLE = "CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15216-15225", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231357"} @inproceedings{bb236383, AUTHOR = "Yang, C.G. and An, Z. and Huang, L. and Bi, J.Y. and Yu, X. and Yang, H. and Diao, B. and Xu, Y.J.", TITLE = "CLIP-KD: An Empirical Study of CLIP Model Distillation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15952-15962", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231358"} @inproceedings{bb236384, AUTHOR = "Vasu, P.K.A. and Pouransari, H. and Faghri, F. and Vemulapalli, R. and Tuzel, O.", TITLE = "MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15963-15974", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231359"} @inproceedings{bb236385, AUTHOR = "Fan, L. and Zhou, J.X. and Xing, X.Y. and Wu, Y.", TITLE = "Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16394-16403", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231360"} @inproceedings{bb236386, AUTHOR = "Tang, Y.W. and Lin, Z. and Wang, Q.L. and Zhu, P.F. and Hu, Q.H.", TITLE = "AMU-Tuning: Effective Logit Bias for CLIP-based Few-shot Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23323-23333", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231361"} @inproceedings{bb236387, AUTHOR = "Huang, Y.S. and Shakeri, F. and Dolz, J. and Boudiaf, M. and Bahig, H. and Ben Ayed, I.", TITLE = "LP++: A Surprisingly Strong Linear Probe for Few-Shot CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23773-23782", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231362"} @inproceedings{bb236388, AUTHOR = "Bai, J. and Gao, K. and Min, S.B. and Xia, S.T. and Li, Z.F. and Liu, W.", TITLE = "BadCLIP: Trigger-Aware Prompt Learning for Backdoor Attacks on CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24239-24250", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231363"} @inproceedings{bb236389, AUTHOR = "Liang, S.Y. and Zhu, M.L. and Liu, A. and Wu, B.Y. and Cao, X.C. and Chang, E.C.", TITLE = "BadCLIP: Dual-Embedding Guided Backdoor Attack on Multimodal Contrastive Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24645-24654", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231364"} @inproceedings{bb236390, AUTHOR = "Cheng, J. and Liang, D. and Tan, S.", TITLE = "Transfer CLIP for Generalizable Image Denoising", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "25974-25984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231365"} @inproceedings{bb236391, AUTHOR = "Ma, J.W. and Huang, P.Y. and Xie, S. and Li, S.W. and Zettlemoyer, L. and Chang, S.F. and Yih, W.T. and Xu, H.", TITLE = "MoDE: CLIP Data Experts via Clustering", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26344-26353", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231366"} @inproceedings{bb236392, AUTHOR = "Li, X. and Zhang, W. and Liu, Y.N. and Hu, Z.H. and Zhang, B. and HU, X.L.", TITLE = "Language-Driven Anchors for Zero-Shot Adversarial Robustness", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24686-24695", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231367"} @inproceedings{bb236393, AUTHOR = "Massiceti, D. and Longden, C. and Slowik, A. and Wills, S. and Grayson, M. and Morrison, C.", TITLE = "Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12172-12182", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231368"} @inproceedings{bb236394, AUTHOR = "Wu, S. and Tan, H. and Tian, Z. and Chen, Y.K. and Qi, X.J. and Jia, J.Y.", TITLE = "SaCo Loss: Sample-Wise Affinity Consistency for Vision-Language Pre-Training", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27348-27359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231369"} @inproceedings{bb236395, AUTHOR = "Lin, H. and Bai, H. and Liu, Z. and Hou, L. and Sun, M. and Song, L.Q. and Wei, Y. and Surr, Z.A.", TITLE = "MoPE-CLIP: Structured Pruning for Efficient Vision-Language Models with Module-Wise Pruning Error Metric", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27360-27370", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231370"} @inproceedings{bb236396, AUTHOR = "Gao, Y.P. and Wang, Z.Y. and Zheng, W.S. and Xie, C. and Zhou, Y.", TITLE = "Sculpting Holistic 3D Representation in Contrastive Language-Image-3D Pre-Training", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "22998-23008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231371"} @inproceedings{bb236397, AUTHOR = "Shen, S. and Zhu, Z. and Fan, L.Q. and Zhang, H. and Wu, X.X.", TITLE = "DiffCLIP: Leveraging Stable Diffusion for Language Grounded 3D Classification", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "3584-3593", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231372"} @inproceedings{bb236398, AUTHOR = "Wan, B. and Tuytelaars, T.", TITLE = "Exploiting CLIP for Zero-shot HOI Detection Requires Knowledge Distillation at Multiple Levels", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1794-1804", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231373"} @inproceedings{bb236399, AUTHOR = "Mei, J. and Piergiovanni, A.J. and Hwang, J.N. and Li, W.", TITLE = "SLVP: Self-Supervised Language-Video Pre-Training for Referring Video Object Segmentation", BOOKTITLE = Pretrain24, YEAR = "2024", PAGES = "507-517", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231374"}