@inproceedings{bb243200,
AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.",
TITLE = "Learning Visual Grounding from Generative Vision and Language Model",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "8057-8067",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238115"}
@inproceedings{bb243201,
AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.",
TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "3107-3117",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238116"}
@inproceedings{bb243202,
AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.",
TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression
Comprehension with Cross-modality Attention",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 131-147",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238117"}
@inproceedings{bb243203,
AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.",
TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring
Expression Segmentation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLIV: 485-503",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238118"}
@inproceedings{bb243204,
AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.",
TITLE = "Referring Expression Counting",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16985-16995",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238119"}
@inproceedings{bb243205,
AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.",
TITLE = "Zero-Shot Referring Expression Comprehension via Structural
Similarity Between Images and Captions",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14364-14375",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238120"}
@inproceedings{bb243206,
AUTHOR = "Su, W. and Miao, P.H. and Dou, H.Z. and Li, X.",
TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively
Scanning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13449-13458",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238121"}
@inproceedings{bb243207,
AUTHOR = "Yu, Z.H. and Li, R.",
TITLE = "Revisiting Counterfactual Problems in Referring Expression
Comprehension",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13438-13448",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238122"}
@inproceedings{bb243208,
AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.",
TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models
Under Perturbations",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "IX: 211-230",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238123"}
@inproceedings{bb243209,
AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.",
TITLE = "Mask Grounding for Referring Image Segmentation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26563-26573",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238124"}
@inproceedings{bb243210,
AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.",
TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image
Segmentation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12903-12913",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238125"}
@inproceedings{bb243211,
AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.",
TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring
Expression Segmentation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12998-13008",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238126"}
@inproceedings{bb243212,
AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.",
TITLE = "Advancing Referring Expression Segmentation Beyond Single Image",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "2628-2638",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238127"}
@inproceedings{bb243213,
AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.",
TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person
Perception of Ego4D",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15168-15178",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238128"}
@inproceedings{bb243214,
AUTHOR = "Qiao, Y.Y. and Qi, Y.K. and Yu, Z. and Liu, J. and Wu, Q.",
TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring
Expression",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15712-15721",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238129"}
@inproceedings{bb243215,
AUTHOR = "Chen, Y. and Du, R. and Liang, K.M. and Ma, Z.Y.",
TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "3060-3064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238130"}
@inproceedings{bb243216,
AUTHOR = "Sun, J. and Luo, G. and Zhou, Y.Y. and Sun, X.S. and Jiang, G.N. and Wang, Z.Y. and Ji, R.R.",
TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring
Expression Comprehension",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "19144-19154",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238131"}
@inproceedings{bb243217,
AUTHOR = "Tang, J.J. and Zheng, G. and Shi, C. and Yang, S.",
TITLE = "Contrastive Grouping with Transformer for Referring Image
Segmentation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23570-23580",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238132"}
@inproceedings{bb243218,
AUTHOR = "Liu, J. and Ding, H. and Cai, Z.W. and Zhang, Y.T. and Satzoda, R.K. and Mahadevan, V. and Manmatha, R.",
TITLE = "PolyFormer: Referring Image Segmentation as Sequential Polygon
Generation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "18653-18663",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238133"}
@inproceedings{bb243219,
AUTHOR = "Xu, L. and Huang, M.H. and Shang, X. and Yuan, Z.H. and Sun, Y. and Liu, J.",
TITLE = "Meta Compositional Referring Expression Segmentation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "19478-19487",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238134"}
@inproceedings{bb243220,
AUTHOR = "Liu, C. and Ding, H.H. and Jiang, X.D.",
TITLE = "GRES: Generalized Referring Expression Segmentation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23592-23601",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238135"}
@inproceedings{bb243221,
AUTHOR = "Song, S. and Lin, X.D. and Liu, J.Y. and Guo, Z.M. and Chang, S.F.",
TITLE = "Co-Grounding Networks with Semantic Attention for Referring
Expression Comprehension in Videos",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "1346-1355",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238136"}
@inproceedings{bb243222,
AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G.",
TITLE = "Iterative Shrinking for Referring Expression Grounding Using Deep
Reinforcement Learning",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "14055-14064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238137"}
@inproceedings{bb243223,
AUTHOR = "Zhu, H.D. and Sadhu, A. and Zheng, Z.H. and Nevatia, R.",
TITLE = "Utilizing Every Image Object for Semi-supervised Phrase Grounding",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "2209-2218",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238138"}
@inproceedings{bb243224,
AUTHOR = "Wang, P. and Wu, Q. and Cao, J.W. and Shen, C.H. and Gao, L.L. and van den Hengel, A.J.",
TITLE = "Neighbourhood Watch: Referring Expression Comprehension via
Language-Guided Graph Attention Networks",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "1960-1968",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238139"}
@inproceedings{bb243225,
AUTHOR = "Tanaka, M. and Itamochi, T. and Narioka, K. and Sato, I. and Ushiku, Y. and Harada, T.",
TITLE = "Generating Easy-to-Understand Referring Expressions for Target
Identifications",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "5793-5802",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238140"}
@inproceedings{bb243226,
AUTHOR = "Yang, S.B. and Li, G.B. and Yu, Y.Z.",
TITLE = "Dynamic Graph Attention for Referring Expression Comprehension",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4643-4652",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238141"}
@inproceedings{bb243227,
AUTHOR = "Zhang, H.W. and Niu, Y.L. and Chang, S.F.",
TITLE = "Grounding Referring Expressions in Images by Variational Context",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4158-4166",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238142"}
@inproceedings{bb243228,
AUTHOR = "Yu, L.C. and Lin, Z. and Shen, X.H. and Yang, J.M. and Lu, X. and Bansal, M. and Berg, T.L.",
TITLE = "MAttNet: Modular Attention Network for Referring Expression
Comprehension",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "1307-1315",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238143"}
@inproceedings{bb243229,
AUTHOR = "Luo, R. and Shakhnarovich, G.",
TITLE = "Comprehension-Guided Referring Expressions",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "3125-3134",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT238144"}
@article{bb243230,
AUTHOR = "Tung, F. and Mori, G.",
TITLE = "Deep Neural Network Compression by In-Parallel Pruning-Quantization",
JOURNAL = PAMI,
VOLUME = "42",
YEAR = "2020",
NUMBER = "3",
MONTH = "March",
PAGES = "568-579",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238145"}
@inproceedings{bb243231,
AUTHOR = "Tung, F. and Mori, G.",
TITLE = "CLIP-Q: Deep Network Compression Learning by In-parallel
Pruning-Quantization",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7873-7882",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238146"}
@article{bb243232,
AUTHOR = "Kwon, G. and Ye, J.C.",
TITLE = "One-Shot Adaptation of GAN in Just One CLIP",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "12179-12191",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238147"}
@article{bb243233,
AUTHOR = "Han, B.H. and Jiang, X.Y. and Fang, Z.J. and Fujita, H. and Gao, Y.B.",
TITLE = "F-SCP: An automatic prompt generation method for specific classes
based on visual language pre-training models",
JOURNAL = PR,
VOLUME = "147",
YEAR = "2024",
PAGES = "110096",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238148"}
@article{bb243234,
AUTHOR = "Liu, B. and Lu, D.H. and Wei, D. and Wu, X. and Wang, Y. and Zhang, Y. and Zheng, Y.F.",
TITLE = "Improving Medical Vision-Language Contrastive Pretraining with
Semantics-Aware Triage",
JOURNAL = MedImg,
VOLUME = "42",
YEAR = "2023",
NUMBER = "12",
MONTH = "December",
PAGES = "3579-3589",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238149"}
@article{bb243235,
AUTHOR = "Gao, P. and Geng, S.J. and Zhang, R.R. and Ma, T. and Fang, R.Y. and Zhang, Y.F. and Li, H.S. and Qiao, Y.",
TITLE = "CLIP-Adapter: Better Vision-Language Models with Feature Adapters",
JOURNAL = IJCV,
VOLUME = "132",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "581-595",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238150"}
@inproceedings{bb243236,
AUTHOR = "Liu, Y.H. and He, J.W. and Gu, J.J. and Kong, X.T. and Qiao, Y. and Dong, C.",
TITLE = "DegAE: A New Pretraining Paradigm for Low-Level Vision",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23292-23303",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238151"}
@article{bb243237,
AUTHOR = "Dong, S. and Wang, L.B. and Du, B. and Meng, X.L.",
TITLE = "ChangeCLIP: Remote sensing change detection with multimodal
vision-language representation learning",
JOURNAL = PandRS,
VOLUME = "208",
YEAR = "2024",
PAGES = "53-69",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238152"}
@article{bb243238,
AUTHOR = "Peng, F. and Yang, X.S. and Xiao, L.H. and Wang, Y.W. and Xu, C.S.",
TITLE = "SgVA-CLIP: Semantic-Guided Visual Adapting of Vision-Language Models
for Few-Shot Image Classification",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3469-3480",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238153"}
@article{bb243239,
AUTHOR = "Guo, Y.F. and Chen, Y.P. and Ma, Z.",
TITLE = "NeuroCLIP: Neuromorphic Data Understanding by CLIP and SNN",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "246-250",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238154"}
@article{bb243240,
AUTHOR = "Xing, Y.H. and Wu, Q. and Cheng, D. and Zhang, S.Z. and Liang, G.Q. and Wang, P. and Zhang, Y.N.",
TITLE = "Dual Modality Prompt Tuning for Vision-Language Pre-Trained Model",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "2056-2068",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238155"}
@article{bb243241,
AUTHOR = "Xiao, L.H. and Yang, X.S. and Peng, F. and Yan, M. and Wang, Y.W. and Xu, C.S.",
TITLE = "CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4334-4347",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238156"}
@article{bb243242,
AUTHOR = "Zhang, K. and Yang, Y. and Yu, J. and Jiang, H.L. and Fan, J.P. and Huang, Q.M. and Han, W.D.",
TITLE = "Multi-Task Paired Masking With Alignment Modeling for Medical
Vision-Language Pre-Training",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4706-4721",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238157"}
@article{bb243243,
AUTHOR = "Zara, G. and Turrisi da Costa, V.G. and Roy, S. and Rota, P. and Ricci, E.",
TITLE = "Simplifying open-set video domain adaptation with contrastive
learning",
JOURNAL = CVIU,
VOLUME = "241",
YEAR = "2024",
PAGES = "103953",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238158"}
@inproceedings{bb243244,
AUTHOR = "Zara, G. and Roy, S. and Rota, P. and Ricci, E.",
TITLE = "AutoLabel: CLIP-based framework for Open-Set Video Domain Adaptation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11504-11513",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238159"}
@article{bb243245,
AUTHOR = "Wang, X.H. and Wang, W.G. and Shao, J.Y. and Yang, Y.",
TITLE = "Learning to Follow and Generate Instructions for Language-Capable
Navigation",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "3334-3350",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238160"}
@article{bb243246,
AUTHOR = "Zhang, W.B. and Zhang, Y.F. and Deng, Y.Y. and Zhang, W.L. and Lin, J.F. and Huang, B.Q. and Zhang, J. and Yu, W.H.",
TITLE = "Ta-Adapter: Enhancing few-shot CLIP with task-aware encoders",
JOURNAL = PR,
VOLUME = "153",
YEAR = "2024",
PAGES = "110559",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238161"}
@article{bb243247,
AUTHOR = "Wu, Z.X. and Weng, Z. and Peng, W. and Yang, X.T. and Li, A. and Davis, L.S. and Jiang, Y.G.",
TITLE = "Building an Open-Vocabulary Video CLIP Model With Better
Architectures, Optimization and Data",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "7",
MONTH = "July",
PAGES = "4747-4762",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238162"}
@article{bb243248,
AUTHOR = "Yu, W.W. and Liu, Y.L. and Zhu, X.K. and Cao, H.Y. and Sun, X. and Bai, X.",
TITLE = "Turning a CLIP Model Into a Scene Text Spotter",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "6040-6054",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238163"}
@inproceedings{bb243249,
AUTHOR = "Yu, W.W. and Liu, Y.L. and Hua, W. and Jiang, D.Q. and Ren, B. and Bai, X.",
TITLE = "Turning a CLIP Model into a Scene Text Detector",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6978-6988",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238164"}
@article{bb243250,
AUTHOR = "Cheng, H. and Ye, H.H. and Zhou, X.F. and Liu, X.M. and Chen, F. and Wang, M.Q.",
TITLE = "Vision-language pre-training via modal interaction",
JOURNAL = PR,
VOLUME = "156",
YEAR = "2024",
PAGES = "110809",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238165"}
@article{bb243251,
AUTHOR = "Kong, J. and Wang, J. and Yu, L.C. and Zhang, X.J.",
TITLE = "Multimodality Self-distillation for Fast Inference of Vision and
Language Pretrained Models",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "8928-8940",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238166"}
@article{bb243252,
AUTHOR = "Yang, Z.Q. and An, G. and Zheng, Z.X. and Cao, S. and Ruan, Q.Q.",
TITLE = "GBC: Guided Alignment and Adaptive Boosting CLIP Bridging Vision and
Language for Robust Action Recognition",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "8172-8187",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238167"}
@article{bb243253,
AUTHOR = "Lin, X. and Zhu, M.H. and Dang, R.H. and Zhou, G.L. and Shu, S.L. and Lin, F. and Liu, C.J. and Chen, Q.J.",
TITLE = "CLIPose: Category-Level Object Pose Estimation With Pre-Trained
Vision-Language Knowledge",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "9125-9138",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238168"}
@article{bb243254,
AUTHOR = "Huang, Z.Y. and Yang, M. and Xiao, X.Y. and Hu, P. and Peng, X.",
TITLE = "Noise-Robust Vision-Language Pre-Training With Positive-Negative
Learning",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "1",
MONTH = "January",
PAGES = "338-350",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238169"}
@article{bb243255,
AUTHOR = "Yao, D.S. and Zhu, M.Q. and Zhu, H. and Cai, W.Q. and Zhou, L.",
TITLE = "Integrating synthetic datasets with CLIP semantic insights for single
image localization advancements",
JOURNAL = PandRS,
VOLUME = "218",
YEAR = "2024",
PAGES = "198-213",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238170"}
@article{bb243256,
AUTHOR = "Jha, A. and Singha, M. and Bhattacharya, A. and Banerjee, B.",
TITLE = "RS3Lip: Consistency for remote sensing image classification on part
embeddings using self-supervised learning and CLIP",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104254",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238171"}
@article{bb243257,
AUTHOR = "Wang, X. and Jin, J.D. and Li, C.L. and Tang, J. and Zhang, C. and Wang, W.",
TITLE = "Pedestrian Attribute Recognition via CLIP-Based Prompt
Vision-Language Fusion",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "1",
MONTH = "January",
PAGES = "148-161",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238172"}
@inproceedings{bb243258,
AUTHOR = "Zhu, J. and Jin, J.D. and Yang, Z.H. and Wu, X.H. and Wang, X.",
TITLE = "Learning CLIP Guided Visual-Text Fusion Transformer for Video-based
Pedestrian Attribute Recognition",
BOOKTITLE = NFVLR23,
YEAR = "2023",
PAGES = "2626-2629",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238173"}
@article{bb243259,
AUTHOR = "Jin, W.Q. and Qu, M.X. and Shi, C. and Zhao, Y. and Wei, Y.C.",
TITLE = "RelFormer: Advancing contextual relations for transformer-based dense
captioning",
JOURNAL = CVIU,
VOLUME = "252",
YEAR = "2025",
PAGES = "104300",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238174"}
@article{bb243260,
AUTHOR = "Wu, Y.J. and Zhou, Y. and Saiyin, J. and Wei, B.Z. and Lai, M. and Shou, J.Z. and Xu, Y.",
TITLE = "AttriPrompter: Auto-Prompting With Attribute Semantics for Zero-Shot
Nuclei Detection via Visual-Language Pre-Trained Models",
JOURNAL = MedImg,
VOLUME = "44",
YEAR = "2025",
NUMBER = "2",
MONTH = "February",
PAGES = "982-993",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238175"}
@article{bb243261,
AUTHOR = "Ma, Q.J. and Yang, S.Q. and Zhang, L.J. and Lan, Q. and Yang, D.D. and Chen, H. and Tan, Y.",
TITLE = "APOVIS: Automated pixel-level open-vocabulary instance segmentation
through integration of pre-trained vision-language models and
foundational segmentation models",
JOURNAL = IVC,
VOLUME = "154",
YEAR = "2025",
PAGES = "105384",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238176"}
@article{bb243262,
AUTHOR = "Zhang, W.Y. and Shen, L. and Foo, C.S.",
TITLE = "Source-Free Domain Adaptation Guided by Vision and Vision-Language
Pre-training",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "2",
MONTH = "February",
PAGES = "844-866",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238177"}
@article{bb243263,
AUTHOR = "Zhang, Y.W. and Wang, J. and Tang, H.Y. and Qin, R.H.",
TITLE = "DALSCLIP: Domain aggregation via learning stronger domain-invariant
features for CLIP",
JOURNAL = IVC,
VOLUME = "154",
YEAR = "2025",
PAGES = "105359",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238178"}
@article{bb243264,
AUTHOR = "Liu, Y. and Wang, X.L. and Zhu, M.Z. and Cao, Y. and Huang, T.J. and Shen, C.H.",
TITLE = "Masked Channel Modeling for Bootstrapping Visual Pre-training",
JOURNAL = IJCV,
VOLUME = "133",
YEAR = "2025",
NUMBER = "2",
MONTH = "February",
PAGES = "760-780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238179"}
@article{bb243265,
AUTHOR = "Li, Y. and Wang, H.L. and Duan, Y.Q. and Zhang, J.H. and Li, X.M.",
TITLE = "A closer look at the explainability of Contrastive language-image
pre-training",
JOURNAL = PR,
VOLUME = "162",
YEAR = "2025",
PAGES = "111409",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238180"}
@article{bb243266,
AUTHOR = "Zhang, Z.K. and Gao, B. and Ye, J.R. and Jin, H. and Jiang, L.H. and Yang, W.M.",
TITLE = "CLIP prior-guided 3D open-vocabulary occupancy prediction",
JOURNAL = PR,
VOLUME = "162",
YEAR = "2025",
PAGES = "111347",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238181"}
@article{bb243267,
AUTHOR = "Cai, S.Q. and Liu, X. and Yuan, J.L. and Zhou, Q.H.",
TITLE = "Prompt-Ladder: Memory-efficient prompt tuning for vision-language
models on edge devices",
JOURNAL = PR,
VOLUME = "163",
YEAR = "2025",
PAGES = "111460",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238182"}
@article{bb243268,
AUTHOR = "Ren, H.R. and Tang, F. and Zheng, H.J. and Zhao, H. and Guo, D.D. and Chang, Y.",
TITLE = "Modality-Consistent Prompt Tuning With Optimal Transport",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "2499-2512",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238183"}
@article{bb243269,
AUTHOR = "Cao, Y.K. and Xu, X.H. and Cheng, Y.Q. and Sun, C. and Du, Z.W. and Gao, L. and Shen, W.M.",
TITLE = "Personalizing Vision-Language Models With Hybrid Prompts for
Zero-Shot Anomaly Detection",
JOURNAL = Cyber,
VOLUME = "55",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "1917-1929",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238184"}
@inproceedings{bb243270,
AUTHOR = "Cao, Y.K. and Zhang, J.N. and Frittoli, L. and Cheng, Y.Q. and Shen, W.M. and Boracchi, G.",
TITLE = "AdaCLIP: Adapting CLIP with Hybrid Learnable Prompts for Zero-shot
Anomaly Detection",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXXV: 55-72",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238185"}
@article{bb243271,
AUTHOR = "Zhang, Z.Q. and Jiang, Y. and Wang, Y. and Xie, B. and Zhang, W. and Li, Y.H. and Chen, Z. and Jin, X. and Zeng, W.J.",
TITLE = "Exploring Contrastive Pre-Training for Domain Connections in Medical
Image Segmentation",
JOURNAL = MedImg,
VOLUME = "44",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "1686-1698",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238186"}
@article{bb243272,
AUTHOR = "Zhang, W.B. and Zhang, Y.F. and Lin, J.F. and Huang, B.Q. and Zhang, J. and Yu, W.H.",
TITLE = "DC-CLIP: Multilingual CLIP Compression via vision-language
distillation and vision-language alignment",
JOURNAL = PR,
VOLUME = "164",
YEAR = "2025",
PAGES = "111547",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238187"}
@article{bb243273,
AUTHOR = "Wang, Y.F. and Kang, G.L.",
TITLE = "Attention head purification: A new perspective to harness CLIP for
domain generalization",
JOURNAL = IVC,
VOLUME = "157",
YEAR = "2025",
PAGES = "105511",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238188"}
@article{bb243274,
AUTHOR = "He, X.J. and Chen, S. and Ma, F. and Huang, Z.C. and Jin, X.J. and Liu, Z. and Fu, D.M. and Yang, Y. and Liu, J. and Feng, J.S.",
TITLE = "VLAB: Enhancing Video Language Pretraining by Feature Adapting and
Blending",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "2168-2180",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238189"}
@article{bb243275,
AUTHOR = "Chen, J.F. and Yuan, H. and Xie, B.J.",
TITLE = "MIF: Multi-source information fusion for few-shot classification with
CLIP",
JOURNAL = PRL,
VOLUME = "192",
YEAR = "2025",
PAGES = "113-121",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238190"}
@article{bb243276,
AUTHOR = "Luo, G. and Zhou, Y.Y. and Huang, M.L. and Ren, T. and Sun, X.S. and Ji, R.R.",
TITLE = "MoIL: Momentum Imitation Learning for Efficient Vision-Language
Adaptation",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "5192-5204",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238191"}
@article{bb243277,
AUTHOR = "Lu, Z.Q. and Liu, M. and Yu, Y.L. and Wang, Z. and Li, X. and Han, J.G.",
TITLE = "Variational Adapter: Improving CLIP in Data-Imbalanced Scenarios",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "5251-5264",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238192"}
@article{bb243278,
AUTHOR = "Tu, W.J. and Deng, W.J. and Gedeon, T.",
TITLE = "Toward a Holistic Evaluation of Robustness in CLIP Models",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "9",
MONTH = "September",
PAGES = "8280-8296",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238193"}
@article{bb243279,
AUTHOR = "Cao, M.X. and Xie, W.Y. and Zhang, X. and Zhang, J.Q. and Jiang, K. and Lei, J. and Li, Y.S.",
TITLE = "M³amba: CLIP-Driven Mamba Model for Multi-Modal Remote Sensing
Classification",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "7605-7617",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238194"}
@article{bb243280,
AUTHOR = "Wei, X.Y. and Kurtz, C. and Cloppet, F.",
TITLE = "Enhancing vision-language contrastive representation learning using
domain knowledge",
JOURNAL = CVIU,
VOLUME = "259",
YEAR = "2025",
PAGES = "104403",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238195"}
@inproceedings{bb243281,
AUTHOR = "Wei, X.Y. and Kurtz, C. and Cloppet, F.",
TITLE = "Relaxing Binary Constraints in Contrastive Vision-Language Medical
Representation Learning",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "4462-4471",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238196"}
@article{bb243282,
AUTHOR = "You, S. and Li, J. and Bao, B.K.",
TITLE = "Pro-MA: Progressively Margin-Based Attribution in Pretrained
Vision-Language Models",
JOURNAL = MultMedMag,
VOLUME = "32",
YEAR = "2025",
NUMBER = "2",
MONTH = "April",
PAGES = "53-64",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238197"}
@article{bb243283,
AUTHOR = "Wang, Y.Z. and Hu, W.B. and Dong, Y.P. and Zhang, H.W. and Su, H. and Hong, R.C.",
TITLE = "Exploring Transferability of Multimodal Adversarial Samples for
Vision-Language Pre-Training Models with Contrastive Learning",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "6410-6421",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238198"}
@article{bb243284,
AUTHOR = "Lai, X. and Ke, X. and Xu, H.B. and Wu, S.H. and Guo, W.Z.",
TITLE = "MSP: Multimodal Self-Attention Prompt Learning",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "5978-5988",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238199"}
@article{bb243285,
AUTHOR = "Guo, Z.X. and Wang, T.J.J. and Pehlivan, S. and Radman, A. and Cao, M. and Laaksonen, J.",
TITLE = "Prompt-based Weakly-supervised Vision-language Pre-training",
JOURNAL = PRL,
VOLUME = "197",
YEAR = "2025",
PAGES = "8-15",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238200"}
@article{bb243286,
AUTHOR = "Ding, M. and Ma, Y. and Qin, P. and Wu, J.L. and Li, Y.H. and Nie, L.Q.",
TITLE = "RA-BLIP: Multimodal Adaptive Retrieval-Augmented Bootstrapping
Language-Image Pre-Training",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "7522-7532",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238201"}
@article{bb243287,
AUTHOR = "Ye, Z.P. and Jiang, F. and Wang, Q. and Huang, K. and Huang, J.Q.",
TITLE = "IDEA: Image description enhanced CLIP-adapter for image
classification",
JOURNAL = PR,
VOLUME = "171",
YEAR = "2026",
PAGES = "112224",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238202"}
@article{bb243288,
AUTHOR = "Lai, H. and Wei, X. and Sun, R. and Zhang, T.Z.",
TITLE = "Agent-Based Control Prompt Tuning for Video-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11613-11626",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238203"}
@article{bb243289,
AUTHOR = "Tu, R.C. and Ji, Y. and Jiang, J. and Kong, W.J. and Cai, C.F. and Zhao, W.Z. and Wang, H.F. and Yang, Y. and Liu, W.",
TITLE = "Global and Local Semantic Completion Learning for Vision-Language
Pre-Training",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "11065-11079",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238204"}
@inproceedings{bb243290,
AUTHOR = "Ji, Y. and Tu, R.C. and Jiang, J. and Kong, W.J. and Cai, C.F. and Zhao, W.Z. and Wang, H.F. and Yang, Y. and Liu, W.",
TITLE = "Seeing What You Miss:
Vision-Language Pre-training with Semantic Completion Learning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6789-6798",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238205"}
@inproceedings{bb243291,
AUTHOR = "Dahal, A. and Murad, S.A. and Rahimi, N.",
TITLE = "Embedding Shift Dissection on CLIP: Effects of Augmentations on VLM's
Representation Learning",
BOOKTITLE = InterpVis25,
YEAR = "2025",
PAGES = "4814-4818",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238206"}
@article{bb243292,
AUTHOR = "Song, T.C. and Liu, Q. and Qin, A. and Liu, Y.",
TITLE = "RotCLIP: Tuning CLIP with Visual Adapter and Textual Prompts for
Rotation Robust Remote Sensing Image Classification",
JOURNAL = SP:IC,
VOLUME = "140",
YEAR = "2026",
PAGES = "117407",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238207"}
@article{bb243293,
AUTHOR = "Gao, X. and Chang, Z.B. and Kong, D. and Zhou, H.Y. and Lu, Y.G.",
TITLE = "MIP-CLIP: Multimodal Independent Prompt CLIP for Action Recognition",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "9918-9930",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238208"}
@article{bb243294,
AUTHOR = "Yang, Q. and Ye, M. and Tao, D.C.",
TITLE = "Synergy of Sight and Semantics: Holistic Visual Understanding With
CLIP",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "3",
MONTH = "March",
PAGES = "3119-3136",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238209"}
@inproceedings{bb243295,
AUTHOR = "Yang, Q. and Ye, M. and Tao, D.C.",
TITLE = "Synergy of Sight and Semantics: Visual Intention Understanding with
CLIP",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XI: 144-160",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238210"}
@article{bb243296,
AUTHOR = "Li, M.W. and Wang, Z. and Zhang, Y.X.",
TITLE = "Improving Zero-Shot Generalization for CLIP With Prompt Ensemble
Self-Distillation",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "2304-2317",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238211"}
@article{bb243297,
AUTHOR = "Peng, Z.Q. and Qi, Z.Y. and Cao, Y. and Kang, Y. and Lv, W.J.",
TITLE = "Modeling Cross-Modal Semantic Transformations From Coarse to Fine in
CLIP",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "2164-2176",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238212"}
@article{bb243298,
AUTHOR = "Li, Y. and Guo, W.W. and Yang, X. and Liao, N. and Zhang, S.F. and Yu, Y. and Yu, W.X. and Yan, J.C.",
TITLE = "Exploiting Unlabeled Data with Multiple Expert Teachers for Open
Vocabulary Aerial Object Detection and Its Orientation Adaptation",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "4",
MONTH = "April",
PAGES = "156",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238213"}
@inproceedings{bb243299,
AUTHOR = "Li, Y. and Guo, W.W. and Yang, X. and Liao, N. and He, D. and Zhou, J.Q. and Yu, W.X.",
TITLE = "Toward Open Vocabulary Aerial Object Detection with CLIP-activated
Student-teacher Learning",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXVI: 431-448",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT238214"}
Last update:May 24, 2026 at 14:46:09