@inproceedings{bb228800, AUTHOR = "Yu, Z.H. and Li, R.", TITLE = "Revisiting Counterfactual Problems in Referring Expression Comprehension", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13438-13448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223799"} @inproceedings{bb228801, AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.", TITLE = "Advancing Referring Expression Segmentation Beyond Single Image", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2628-2638", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223800"} @inproceedings{bb228802, AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.", TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person Perception of Ego4D", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15168-15178", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223801"} @inproceedings{bb228803, AUTHOR = "Qiao, Y. and Qi, Y. and Yu, Z. and Liu, J. and Wu, Q.", TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring Expression", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15712-15721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223802"} @inproceedings{bb228804, AUTHOR = "Chen, Y. and Du, R. and Liang, K. and Ma, Z.Y.", TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "3060-3064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223803"} @inproceedings{bb228805, AUTHOR = "Sun, J. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G.N. and Wang, Z.Y. and Ji, R.R.", TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring Expression Comprehension", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19144-19154", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223804"} @inproceedings{bb228806, AUTHOR = "Tang, J.J. and Zheng, G. and Shi, C. and Yang, S.", TITLE = "Contrastive Grouping with Transformer for Referring Image Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23570-23580", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223805"} @inproceedings{bb228807, AUTHOR = "Liu, J. and Ding, H. and Cai, Z.W. and Zhang, Y.T. and Satzoda, R.K. and Mahadevan, V. and Manmatha, R.", TITLE = "PolyFormer: Referring Image Segmentation as Sequential Polygon Generation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18653-18663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223806"} @inproceedings{bb228808, AUTHOR = "Xu, L. and Huang, M.H. and Shang, X. and Yuan, Z.H. and Sun, Y. and Liu, J.", TITLE = "Meta Compositional Referring Expression Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19478-19487", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223807"} @inproceedings{bb228809, AUTHOR = "Liu, C. and Ding, H.H. and Jiang, X.D.", TITLE = "GRES: Generalized Referring Expression Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23592-23601", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223808"} @inproceedings{bb228810, AUTHOR = "Song, S. and Lin, X.D. and Liu, J.Y. and Guo, Z.M. and Chang, S.F.", TITLE = "Co-Grounding Networks with Semantic Attention for Referring Expression Comprehension in Videos", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "1346-1355", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223809"} @inproceedings{bb228811, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G.", TITLE = "Iterative Shrinking for Referring Expression Grounding Using Deep Reinforcement Learning", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "14055-14064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223810"} @inproceedings{bb228812, AUTHOR = "Wang, P. and Wu, Q. and Cao, J.W. and Shen, C.H. and Gao, L.L. and van den Hengel, A.J.", TITLE = "Neighbourhood Watch: Referring Expression Comprehension via Language-Guided Graph Attention Networks", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1960-1968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223811"} @inproceedings{bb228813, AUTHOR = "Yang, S.B. and Li, G.B. and Yu, Y.Z.", TITLE = "Dynamic Graph Attention for Referring Expression Comprehension", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4643-4652", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223812"} @inproceedings{bb228814, AUTHOR = "Zhang, H.W. and Niu, Y.L. and Chang, S.F.", TITLE = "Grounding Referring Expressions in Images by Variational Context", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4158-4166", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223813"} @inproceedings{bb228815, AUTHOR = "Yu, L.C. and Lin, Z. and Shen, X.H. and Yang, J.M. and Lu, X. and Bansal, M. and Berg, T.L.", TITLE = "MAttNet: Modular Attention Network for Referring Expression Comprehension", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "1307-1315", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223814"} @inproceedings{bb228816, AUTHOR = "Luo, R. and Shakhnarovich, G.", TITLE = "Comprehension-Guided Referring Expressions", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3125-3134", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT223815"} @article{bb228817, AUTHOR = "Tung, F. and Mori, G.", TITLE = "Deep Neural Network Compression by In-Parallel Pruning-Quantization", JOURNAL = PAMI, VOLUME = "42", YEAR = "2020", NUMBER = "3", MONTH = "March", PAGES = "568-579", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223816"} @inproceedings{bb228818, AUTHOR = "Tung, F. and Mori, G.", TITLE = "CLIP-Q: Deep Network Compression Learning by In-parallel Pruning-Quantization", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7873-7882", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223817"} @article{bb228819, AUTHOR = "Kwon, G. and Ye, J.C.", TITLE = "One-Shot Adaptation of GAN in Just One CLIP", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12179-12191", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223818"} @article{bb228820, AUTHOR = "Han, B.H. and Jiang, X.Y. and Fang, Z.J. and Fujita, H. and Gao, Y.B.", TITLE = "F-SCP: An automatic prompt generation method for specific classes based on visual language pre-training models", JOURNAL = PR, VOLUME = "147", YEAR = "2024", PAGES = "110096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223819"} @article{bb228821, AUTHOR = "Liu, B. and Lu, D.H. and Wei, D. and Wu, X. and Wang, Y. and Zhang, Y. and Zheng, Y.F.", TITLE = "Improving Medical Vision-Language Contrastive Pretraining with Semantics-Aware Triage", JOURNAL = MedImg, VOLUME = "42", YEAR = "2023", NUMBER = "12", MONTH = "December", PAGES = "3579-3589", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223820"} @article{bb228822, AUTHOR = "Gao, P. and Geng, S.J. and Zhang, R.R. and Ma, T. and Fang, R.Y. and Zhang, Y.F. and Li, H.S. and Qiao, Y.", TITLE = "CLIP-Adapter: Better Vision-Language Models with Feature Adapters", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "581-595", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223821"} @inproceedings{bb228823, AUTHOR = "Liu, Y.H. and He, J.W. and Gu, J.J. and Kong, X.T. and Qiao, Y. and Dong, C.", TITLE = "DegAE: A New Pretraining Paradigm for Low-Level Vision", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23292-23303", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223822"} @article{bb228824, AUTHOR = "Dong, S. and Wang, L. and Du, B. and Meng, X.L.", TITLE = "ChangeCLIP: Remote sensing change detection with multimodal vision-language representation learning", JOURNAL = PandRS, VOLUME = "208", YEAR = "2024", PAGES = "53-69", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223823"} @article{bb228825, AUTHOR = "Peng, F. and Yang, X.S. and Xiao, L.H. and Wang, Y. and Xu, C.S.", TITLE = "SgVA-CLIP: Semantic-Guided Visual Adapting of Vision-Language Models for Few-Shot Image Classification", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3469-3480", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223824"} @article{bb228826, AUTHOR = "Guo, Y.F. and Chen, Y.P. and Ma, Z.", TITLE = "NeuroCLIP: Neuromorphic Data Understanding by CLIP and SNN", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "246-250", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223825"} @article{bb228827, AUTHOR = "Xing, Y.H. and Wu, Q. and Cheng, D. and Zhang, S.Z. and Liang, G.Q. and Wang, P. and Zhang, Y.N.", TITLE = "Dual Modality Prompt Tuning for Vision-Language Pre-Trained Model", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2056-2068", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223826"} @article{bb228828, AUTHOR = "Xiao, L.H. and Yang, X.S. and Peng, F. and Yan, M. and Wang, Y. and Xu, C.S.", TITLE = "CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4334-4347", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223827"} @article{bb228829, AUTHOR = "Zhang, K. and Yang, Y. and Yu, J. and Jiang, H.L. and Fan, J.P. and Huang, Q.M. and Han, W.D.", TITLE = "Multi-Task Paired Masking With Alignment Modeling for Medical Vision-Language Pre-Training", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4706-4721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223828"} @article{bb228830, AUTHOR = "Zara, G. and Turrisi da Costa, V.G. and Roy, S. and Rota, P. and Ricci, E.", TITLE = "Simplifying open-set video domain adaptation with contrastive learning", JOURNAL = CVIU, VOLUME = "241", YEAR = "2024", PAGES = "103953", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223829"} @inproceedings{bb228831, AUTHOR = "Zara, G. and Roy, S. and Rota, P. and Ricci, E.", TITLE = "AutoLabel: CLIP-based framework for Open-Set Video Domain Adaptation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11504-11513", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223830"} @article{bb228832, AUTHOR = "Wang, X.H. and Wang, W.G. and Shao, J.Y. and Yang, Y.", TITLE = "Learning to Follow and Generate Instructions for Language-Capable Navigation", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3334-3350", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223831"} @article{bb228833, AUTHOR = "Zhang, W.B. and Zhang, Y.F. and Deng, Y.Y. and Zhang, W.L. and Lin, J.F. and Huang, B.Q. and Zhang, J. and Yu, W.H.", TITLE = "Ta-Adapter: Enhancing few-shot CLIP with task-aware encoders", JOURNAL = PR, VOLUME = "153", YEAR = "2024", PAGES = "110559", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223832"} @article{bb228834, AUTHOR = "Wu, Z.X. and Weng, Z. and Peng, W. and Yang, X.T. and Li, A. and Davis, L.S. and Jiang, Y.G.", TITLE = "Building an Open-Vocabulary Video CLIP Model With Better Architectures, Optimization and Data", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "7", MONTH = "July", PAGES = "4747-4762", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223833"} @article{bb228835, AUTHOR = "Yu, W.W. and Liu, Y.L. and Zhu, X.K. and Cao, H.Y. and Sun, X. and Bai, X.", TITLE = "Turning a CLIP Model Into a Scene Text Spotter", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "6040-6054", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223834"} @inproceedings{bb228836, AUTHOR = "Yu, W.W. and Liu, Y.L. and Hua, W. and Jiang, D.Q. and Ren, B. and Bai, X.", TITLE = "Turning a CLIP Model into a Scene Text Detector", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6978-6988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223835"} @article{bb228837, AUTHOR = "Cheng, H. and Ye, H.H. and Zhou, X.F. and Liu, X.M. and Chen, F. and Wang, M.Q.", TITLE = "Vision-language pre-training via modal interaction", JOURNAL = PR, VOLUME = "156", YEAR = "2024", PAGES = "110809", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223836"} @article{bb228838, AUTHOR = "Kong, J. and Wang, J. and Yu, L.C. and Zhang, X.J.", TITLE = "Multimodality Self-distillation for Fast Inference of Vision and Language Pretrained Models", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8928-8940", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223837"} @article{bb228839, AUTHOR = "Yang, Z.Q. and An, G. and Zheng, Z.X. and Cao, S. and Ruan, Q.Q.", TITLE = "GBC: Guided Alignment and Adaptive Boosting CLIP Bridging Vision and Language for Robust Action Recognition", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "8172-8187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223838"} @article{bb228840, AUTHOR = "Lin, X. and Zhu, M.H. and Dang, R.H. and Zhou, G.L. and Shu, S.L. and Lin, F. and Liu, C.J. and Chen, Q.J.", TITLE = "CLIPose: Category-Level Object Pose Estimation With Pre-Trained Vision-Language Knowledge", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "10", MONTH = "October", PAGES = "9125-9138", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223839"} @article{bb228841, AUTHOR = "Huang, Z.Y. and Yang, M. and Xiao, X.Y. and Hu, P. and Peng, X.", TITLE = "Noise-Robust Vision-Language Pre-Training With Positive-Negative Learning", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "338-350", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223840"} @article{bb228842, AUTHOR = "Yao, D.S. and Zhu, M.Q. and Zhu, H. and Cai, W.Q. and Zhou, L.", TITLE = "Integrating synthetic datasets with CLIP semantic insights for single image localization advancements", JOURNAL = PandRS, VOLUME = "218", YEAR = "2024", PAGES = "198-213", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223841"} @article{bb228843, AUTHOR = "Jha, A. and Singha, M. and Bhattacharya, A. and Banerjee, B.", TITLE = "RS3Lip: Consistency for remote sensing image classification on part embeddings using self-supervised learning and CLIP", JOURNAL = CVIU, VOLUME = "251", YEAR = "2025", PAGES = "104254", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223842"} @article{bb228844, AUTHOR = "Wang, X. and Jin, J. and Li, C.L. and Tang, J. and Zhang, C. and Wang, W.", TITLE = "Pedestrian Attribute Recognition via CLIP-Based Prompt Vision-Language Fusion", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "148-161", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223843"} @inproceedings{bb228845, AUTHOR = "Zhu, J. and Jin, J. and Yang, Z.H. and Wu, X.H. and Wang, X.", TITLE = "Learning CLIP Guided Visual-Text Fusion Transformer for Video-based Pedestrian Attribute Recognition", BOOKTITLE = NFVLR23, YEAR = "2023", PAGES = "2626-2629", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223844"} @article{bb228846, AUTHOR = "Jin, W.Q. and Qu, M.X. and Shi, C. and Zhao, Y. and Wei, Y.C.", TITLE = "RelFormer: Advancing contextual relations for transformer-based dense captioning", JOURNAL = CVIU, VOLUME = "252", YEAR = "2025", PAGES = "104300", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223845"} @article{bb228847, AUTHOR = "Wu, Y.J. and Zhou, Y. and Saiyin, J. and Wei, B.Z. and Lai, M. and Shou, J.Z. and Xu, Y.", TITLE = "AttriPrompter: Auto-Prompting With Attribute Semantics for Zero-Shot Nuclei Detection via Visual-Language Pre-Trained Models", JOURNAL = MedImg, VOLUME = "44", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "982-993", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223846"} @article{bb228848, AUTHOR = "Ma, Q.J. and Yang, S.Q. and Zhang, L.J. and Lan, Q. and Yang, D.D. and Chen, H. and Tan, Y.", TITLE = "APOVIS: Automated pixel-level open-vocabulary instance segmentation through integration of pre-trained vision-language models and foundational segmentation models", JOURNAL = IVC, VOLUME = "154", YEAR = "2025", PAGES = "105384", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223847"} @article{bb228849, AUTHOR = "Zhang, W.Y. and Shen, L. and Foo, C.S.", TITLE = "Source-Free Domain Adaptation Guided by Vision and Vision-Language Pre-training", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "844-866", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223848"} @article{bb228850, AUTHOR = "Zhang, Y.W. and Wang, J. and Tang, H.Y. and Qin, R.H.", TITLE = "DALSCLIP: Domain aggregation via learning stronger domain-invariant features for CLIP", JOURNAL = IVC, VOLUME = "154", YEAR = "2025", PAGES = "105359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223849"} @article{bb228851, AUTHOR = "Liu, Y. and Wang, X.L. and Zhu, M.Z. and Cao, Y. and Huang, T.J. and Shen, C.H.", TITLE = "Masked Channel Modeling for Bootstrapping Visual Pre-training", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "760-780", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223850"} @article{bb228852, AUTHOR = "Li, Y. and Wang, H.L. and Duan, Y.Q. and Zhang, J.H. and Li, X.M.", TITLE = "A closer look at the explainability of Contrastive language-image pre-training", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111409", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223851"} @article{bb228853, AUTHOR = "Zhang, Z. and Gao, B. and Ye, J.R. and Jin, H. and Jiang, L.H. and Yang, W.M.", TITLE = "CLIP prior-guided 3D open-vocabulary occupancy prediction", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111347", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223852"} @article{bb228854, AUTHOR = "Cai, S.Q. and Liu, X. and Yuan, J.L. and Zhou, Q.H.", TITLE = "Prompt-Ladder: Memory-efficient prompt tuning for vision-language models on edge devices", JOURNAL = PR, VOLUME = "163", YEAR = "2025", PAGES = "111460", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223853"} @article{bb228855, AUTHOR = "Ren, H.R. and Tang, F. and Zheng, H.J. and Zhao, H. and Guo, D.D. and Chang, Y.", TITLE = "Modality-Consistent Prompt Tuning With Optimal Transport", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "3", MONTH = "March", PAGES = "2499-2512", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223854"} @article{bb228856, AUTHOR = "Cao, Y.K. and Xu, X.H. and Cheng, Y.Q. and Sun, C. and Du, Z.W. and Gao, L. and Shen, W.M.", TITLE = "Personalizing Vision-Language Models With Hybrid Prompts for Zero-Shot Anomaly Detection", JOURNAL = Cyber, VOLUME = "55", YEAR = "2025", NUMBER = "4", MONTH = "April", PAGES = "1917-1929", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223855"} @inproceedings{bb228857, AUTHOR = "Cao, Y.K. and Zhang, J.N. and Frittoli, L. and Cheng, Y.Q. and Shen, W.M. and Boracchi, G.", TITLE = "AdaCLIP: Adapting CLIP with Hybrid Learnable Prompts for Zero-shot Anomaly Detection", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXXV: 55-72", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223856"} @inproceedings{bb228858, AUTHOR = "Bhalgat, Y. and Laina, I. and Henriques, J.F. and Zisserman, A. and Vedaldi, A.", TITLE = "N2F2: Hierarchical Scene Understanding with Nested Neural Feature Fields", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIX: 197-214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223857"} @inproceedings{bb228859, AUTHOR = "An, X. and Yang, K.C. and Dai, X.Z. and Feng, Z. and Deng, J.K.", TITLE = "Multi-label Cluster Discrimination for Visual Representation Learning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXVII: 428-444", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223858"} @inproceedings{bb228860, AUTHOR = "Chen, W. and Shi, C.Y. and Ma, C.X. and Li, W.H. and Dong, S.", TITLE = "DepthBLIP-2: Leveraging Language to Guide BLIP-2 in Understanding Depth Information", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "VII: 287-302", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223859"} @inproceedings{bb228861, AUTHOR = "Zhou, Y. and Wu, Y.J. and Saiyin, J. and Wei, B.Z. and Lai, M. and Chang, E. and Xu, Y.", TITLE = "SDPT: Synchronous Dual Prompt Tuning for Fusion-based Visual-language Pre-trained Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIX: 340-356", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223860"} @inproceedings{bb228862, AUTHOR = "Naeem, M.F. and Xian, Y.Q. and Zhai, X.H. and Hoyer, L. and Van Gool, L.J. and Tombari, F.", TITLE = "Silc: Improving Vision Language Pretraining with Self-distillation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXI: 38-55", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223861"} @inproceedings{bb228863, AUTHOR = "Chen, Q. and Hong, Y.", TITLE = "Medblip: Bootstrapping Language-image Pretraining from 3d Medical Images and Texts", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "III: 98-113", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223862"} @inproceedings{bb228864, AUTHOR = "Zheng, K. and Zhang, Y.F. and Wu, W. and Lu, F. and Ma, S. and Jin, X. and Chen, W. and Shen, Y.J.", TITLE = "Dreamlip: Language-image Pre-training with Long Captions", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XVIII: 73-90", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223863"} @inproceedings{bb228865, AUTHOR = "Zhuang, J. and Hu, J.Q. and Mu, L. and Hu, R. and Liang, X.Y. and Ye, J. and Hu, H.J.", TITLE = "FALIP: Visual Prompt as Foveal Attention Boosts CLIP Zero-shot Performance", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "X: 236-253", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223864"} @inproceedings{bb228866, AUTHOR = "Yang, Q. and Ye, M. and Tao, D.C.", TITLE = "Synergy of Sight and Semantics: Visual Intention Understanding with CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XI: 144-160", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223865"} @inproceedings{bb228867, AUTHOR = "Lu, Z.Q. and Shen, F. and Liu, M. and Yu, Y.L. and Li, X.", TITLE = "Improving Zero-shot Generalization for CLIP with Variational Adapter", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XX: 328-344", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223866"} @inproceedings{bb228868, AUTHOR = "Wang, F. and Mei, J. and Yuille, A.L.", TITLE = "SCLIP: Rethinking Self-attention for Dense Vision-language Inference", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXI: 315-332", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223867"} @inproceedings{bb228869, AUTHOR = "Lan, M.C. and Chen, C.F. and Ke, Y.P. and Wang, X.J. and Feng, L. and Zhang, W.", TITLE = "ClearCLIP: Decomposing CLIP Representations for Dense Vision-language Inference", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLVII: 143-160", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223868"} @inproceedings{bb228870, AUTHOR = "Zhang, B.C. and Zhang, P. and Dong, X.Y. and Zang, Y.H. and Wang, J.Q.", TITLE = "Long-CLIP: Unlocking the Long-text Capability of CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LI: 310-325", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223869"} @inproceedings{bb228871, AUTHOR = "Poppi, S. and Poppi, T. and Cocchi, F. and Cornia, M. and Baraldi, L. and Cucchiara, R.", TITLE = "Safe-CLIP: Removing NSFW Concepts from Vision-and-language Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIII: 340-356", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223870"} @inproceedings{bb228872, AUTHOR = "Huang, L. and Cao, X. and Lu, H. and Liu, X.L.", TITLE = "Class-incremental Learning with CLIP: Adaptive Representation Adjustment and Parameter Fusion", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIV: 214-231", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223871"} @inproceedings{bb228873, AUTHOR = "Lai, Z.F. and Zhang, H.T. and Zhang, B. and Wu, W.T. and Bai, H. and Timofeev, A. and Du, X.Z. and Gan, Z. and Shan, J. and Chuah, C.N. and Yang, Y.F. and Cao, M.", TITLE = "VECLIP: Improving CLIP Training via Visual-enriched Captions", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLII: 111-127", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223872"} @inproceedings{bb228874, AUTHOR = "Qian, Q. and Hu, J.", TITLE = "Online Zero-shot Classification with CLIP", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVII: 462-477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223873"} @inproceedings{bb228875, AUTHOR = "Ouali, Y. and Bulat, A. and Martinez, B. and Tzimiropoulos, G.", TITLE = "CLIP-DPO: Vision-language Models as a Source of Preference for Fixing Hallucinations in LVLMS", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVI: 395-413", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223874"} @inproceedings{bb228876, AUTHOR = "Balauca, A.A. and Paudel, D.P. and Toutanova, K. and Van Gool, L.J.", TITLE = "Taming CLIP for Fine-grained and Structured Visual Understanding of Museum Exhibits", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVI: 377-394", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223875"} @inproceedings{bb228877, AUTHOR = "Abbasi, R. and Rohban, M.H. and Baghshah, M.S.", TITLE = "Deciphering the Role of Representation Disentanglement: Investigating Compositional Generalization in CLIP Models", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXIX: 35-50", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223876"} @inproceedings{bb228878, AUTHOR = "Li, Y. and Guo, W.W. and Yang, X. and Liao, N. and He, D. and Zhou, J.Q. and Yu, W.X.", TITLE = "Toward Open Vocabulary Aerial Object Detection with CLIP-activated Student-teacher Learning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXVI: 431-448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223877"} @inproceedings{bb228879, AUTHOR = "Du, Y. and Zhai, Q. and Dai, W. and Li, X.M.", TITLE = "Teach CLIP to Develop a Number Sense for Ordinal Regression", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXV: 1-17", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223878"} @inproceedings{bb228880, AUTHOR = "Jo, S. and Ryu, S. and Kim, S. and Yang, E. and Kim, K.", TITLE = "TTD: Text-tag Self-distillation Enhancing Image-text Alignment in CLIP to Alleviate Single Tag Bias", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXI: 341-357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223879"} @inproceedings{bb228881, AUTHOR = "Lu, S. and Jiao, J. and Wang, L.X. and Qiu, H.Q. and Lin, X.T. and Mei, H. and Li, H.L.", TITLE = "Video Class-Incremental Learning with CLIP Based Transformer", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "500-506", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223880"} @inproceedings{bb228882, AUTHOR = "Chen, Y.C. and Li, W.H. and Chen, C.S.", TITLE = "Open-Vocabulary Panoptic Segmentation Using Bert Pre-Training of Vision-Language Multiway Transformer Model", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2494-2500", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223881"} @inproceedings{bb228883, AUTHOR = "Yi, C. and Ren, L. and Zhan, D.C. and Ye, H.J.", TITLE = "Leveraging Cross-Modal Neighbor Representation for Improved CLIP Classification", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27392-27401", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223882"} @inproceedings{bb228884, AUTHOR = "Shao, S. and Bai, Y. and Wang, Y. and Liu, B. and Zhou, Y.C.", TITLE = "DeIL: Direct-and-Inverse CLIP for Open-World Few-Shot Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "28505-28514", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223883"} @inproceedings{bb228885, AUTHOR = "Martin, S. and Huang, Y. and Shakeri, F. and Pesquet, J.C. and Ayed, I.B.", TITLE = "Transductive Zero-Shot and Few-Shot CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "28816-28826", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223884"} @inproceedings{bb228886, AUTHOR = "Xing, X. and Xiong, Z. and Stylianou, A. and Sastry, S. and Gong, L. and Jacobs, N.", TITLE = "Vision-Language Pseudo-Labels for Single-Positive Multi-Label Learning", BOOKTITLE = ZeroShot24, YEAR = "2024", PAGES = "7799-7808", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223885"} @inproceedings{bb228887, AUTHOR = "Xie, J.H. and Deng, S.H. and Li, B. and Liu, H.Z. and Huang, Y.W. and Zheng, Y.F. and Schmidhuber, J. and Ghanem, B. and Shen, L.L. and Shou, M.Z.", TITLE = "Tune-an-Ellipse: CLIP Has Potential to Find what you Want", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13723-13732", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223886"} @inproceedings{bb228888, AUTHOR = "Maniparambil, M. and Akshulakov, R. and Djilali, Y.A.D. and Seddik, M.E.A. and Narayan, S. and Mangalam, K. and O'Connor, N.E.", TITLE = "Do Vision and Language Encoders Represent the World Similarly?", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14334-14343", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223887"} @inproceedings{bb228889, AUTHOR = "Pan, C. and Yaman, B. and Velipasalar, S. and Ren, L.", TITLE = "CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15216-15225", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223888"} @inproceedings{bb228890, AUTHOR = "Yang, C.G. and An, Z. and Huang, L. and Bi, J.Y. and Yu, X. and Yang, H. and Diao, B. and Xu, Y.J.", TITLE = "CLIP-KD: An Empirical Study of CLIP Model Distillation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15952-15962", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223889"} @inproceedings{bb228891, AUTHOR = "Vasu, P.K.A. and Pouransari, H. and Faghri, F. and Vemulapalli, R. and Tuzel, O.", TITLE = "MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15963-15974", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223890"} @inproceedings{bb228892, AUTHOR = "Fan, L. and Zhou, J.X. and Xing, X.Y. and Wu, Y.", TITLE = "Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16394-16403", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223891"} @inproceedings{bb228893, AUTHOR = "Stevens, S. and Wu, J. and Thompson, M.J. and Campolongo, E.G. and Song, C.H. and Carlyn, D.E. and Dong, L. and Dahdul, W.M. and Stewart, C. and Berger Wolf, T. and Chao, W.L. and Su, Y.", TITLE = "BioCLIP: A Vision Foundation Model for the Tree of Life", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "19412-19424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223892"} @inproceedings{bb228894, AUTHOR = "Tang, Y.W. and Lin, Z. and Wang, Q.L. and Zhu, P.F. and Hu, Q.H.", TITLE = "AMU-Tuning: Effective Logit Bias for CLIP-based Few-shot Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23323-23333", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223893"} @inproceedings{bb228895, AUTHOR = "Huang, Y.S. and Shakeri, F. and Dolz, J. and Boudiaf, M. and Bahig, H. and Ben Ayed, I.", TITLE = "LP++: A Surprisingly Strong Linear Probe for Few-Shot CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23773-23782", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223894"} @inproceedings{bb228896, AUTHOR = "Bai, J. and Gao, K. and Min, S.B. and Xia, S.T. and Li, Z.F. and Liu, W.", TITLE = "BadCLIP: Trigger-Aware Prompt Learning for Backdoor Attacks on CLIP", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24239-24250", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223895"} @inproceedings{bb228897, AUTHOR = "Liang, S.Y. and Zhu, M.L. and Liu, A. and Wu, B.Y. and Cao, X.C. and Chang, E.C.", TITLE = "BadCLIP: Dual-Embedding Guided Backdoor Attack on Multimodal Contrastive Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24645-24654", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223896"} @inproceedings{bb228898, AUTHOR = "Cheng, J. and Liang, D. and Tan, S.", TITLE = "Transfer CLIP for Generalizable Image Denoising", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "25974-25984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223897"} @inproceedings{bb228899, AUTHOR = "Ma, J.W. and Huang, P.Y. and Xie, S. and Li, S.W. and Zettlemoyer, L. and Chang, S.F. and Yih, W.T. and Xu, H.", TITLE = "MoDE: CLIP Data Experts via Clustering", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26344-26353", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT223898"}