@inproceedings{bb213600,
        AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.",
        TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring
Image Segmentation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15501-15510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208678"}

@article{bb213601,
        AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.",
        TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic
Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2491-2505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208679"}

@article{bb213602,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced
Learning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1611-1621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208680"}

@article{bb213603,
        AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "A Proposal-Free One-Stage Framework for Referring Expression
Comprehension and Generation via Dense Cross-Attention",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2446-2458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208681"}

@article{bb213604,
        AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.",
        TITLE = "Multi-level attention for referring expression comprehension",
        JOURNAL = PRL,
        VOLUME = "172",
        YEAR = "2023",
        PAGES = "252-258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208682"}

@article{bb213605,
        AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.",
        TITLE = "Unified Transformer with Isomorphic Branches for Natural Language
Tracking",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4529-4541",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208683"}

@article{bb213606,
        AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With
End-to-End Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "5999-6012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208684"}

@article{bb213607,
        AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.",
        TITLE = "Instance-Specific Feature Propagation for Referring Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3657-3667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208685"}

@article{bb213608,
        AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.",
        TITLE = "Decoupling-Cooperative Framework for Referring Expression
Comprehension",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1542-1546",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208686"}

@article{bb213609,
        AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.",
        TITLE = "Multiple Relational Learning Network for Joint Referring Expression
Comprehension and Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8805-8816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208687"}

@article{bb213610,
        AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.",
        TITLE = "InterREC: An Interpretable Method for Referring Expression
Comprehension",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "9330-9342",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208688"}

@article{bb213611,
        AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.",
        TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring
Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2480-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208689"}

@article{bb213612,
        AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.",
        TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3937-3950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208690"}

@article{bb213613,
        AUTHOR = "Luo, G. and Zhou, Y. and Sun, J. and Sun, X.S. and Ji, R.R.",
        TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study
of One-Stage Referring Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3689-3700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208691"}

@article{bb213614,
        AUTHOR = "Miao, P. and Su, W. and Wang, G. and Li, X. and Xi, L.",
        TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for
Referring Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1497-1507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208692"}

@article{bb213615,
        AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.",
        TITLE = "Unified Referring Expression Generation for Bounding Boxes and
Segmentations",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "636-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208693"}

@inproceedings{bb213616,
        AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.",
        TITLE = "Advancing Referring Expression Segmentation Beyond Single Image",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2628-2638",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208694"}

@inproceedings{bb213617,
        AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.",
        TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person
Perception of Ego4D",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15168-15178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208695"}

@inproceedings{bb213618,
        AUTHOR = "Qiao, Y. and Qi, Y. and Yu, Z. and Liu, J. and Wu, Q.",
        TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring
Expression",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15712-15721",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208696"}

@inproceedings{bb213619,
        AUTHOR = "Chen, Y. and Du, R. and Liang, K. and Ma, Z.Y.",
        TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "3060-3064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208697"}

@inproceedings{bb213620,
        AUTHOR = "Sun, J. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G. and Wang, Z. and Ji, R.R.",
        TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring
Expression Comprehension",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19144-19154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208698"}

@inproceedings{bb213621,
        AUTHOR = "Tang, J.J. and Zheng, G. and Shi, C. and Yang, S.",
        TITLE = "Contrastive Grouping with Transformer for Referring Image
Segmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23570-23580",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208699"}

@inproceedings{bb213622,
        AUTHOR = "Liu, J. and Ding, H. and Cai, Z.W. and Zhang, Y.T. and Satzoda, R.K. and Mahadevan, V. and Manmatha, R.",
        TITLE = "PolyFormer: Referring Image Segmentation as Sequential Polygon
Generation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18653-18663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208700"}

@inproceedings{bb213623,
        AUTHOR = "Xu, L. and Huang, M.H. and Shang, X. and Yuan, Z.H. and Sun, Y. and Liu, J.",
        TITLE = "Meta Compositional Referring Expression Segmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19478-19487",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208701"}

@inproceedings{bb213624,
        AUTHOR = "Liu, C. and Ding, H.H. and Jiang, X.D.",
        TITLE = "GRES: Generalized Referring Expression Segmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23592-23601",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208702"}

@inproceedings{bb213625,
        AUTHOR = "Song, S. and Lin, X.D. and Liu, J.Y. and Guo, Z.M. and Chang, S.F.",
        TITLE = "Co-Grounding Networks with Semantic Attention for Referring
Expression Comprehension in Videos",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1346-1355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208703"}

@inproceedings{bb213626,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G.",
        TITLE = "Iterative Shrinking for Referring Expression Grounding Using Deep
Reinforcement Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14055-14064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208704"}

@inproceedings{bb213627,
        AUTHOR = "Wang, P. and Wu, Q. and Cao, J.W. and Shen, C.H. and Gao, L.L. and van den Hengel, A.J.",
        TITLE = "Neighbourhood Watch: Referring Expression Comprehension via
Language-Guided Graph Attention Networks",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "1960-1968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208705"}

@inproceedings{bb213628,
        AUTHOR = "Yang, S.B. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Dynamic Graph Attention for Referring Expression Comprehension",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4643-4652",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208706"}

@inproceedings{bb213629,
        AUTHOR = "Zhang, H.W. and Niu, Y.L. and Chang, S.F.",
        TITLE = "Grounding Referring Expressions in Images by Variational Context",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4158-4166",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208707"}

@inproceedings{bb213630,
        AUTHOR = "Yu, L.C. and Lin, Z. and Shen, X.H. and Yang, J.M. and Lu, X. and Bansal, M. and Berg, T.L.",
        TITLE = "MAttNet: Modular Attention Network for Referring Expression
Comprehension",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "1307-1315",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208708"}

@inproceedings{bb213631,
        AUTHOR = "Luo, R. and Shakhnarovich, G.",
        TITLE = "Comprehension-Guided Referring Expressions",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3125-3134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208709"}

@article{bb213632,
        AUTHOR = "Tung, F. and Mori, G.",
        TITLE = "Deep Neural Network Compression by In-Parallel Pruning-Quantization",
        JOURNAL = PAMI,
        VOLUME = "42",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "568-579",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208710"}

@inproceedings{bb213633,
        AUTHOR = "Tung, F. and Mori, G.",
        TITLE = "CLIP-Q: Deep Network Compression Learning by In-parallel
Pruning-Quantization",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7873-7882",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208711"}

@article{bb213634,
        AUTHOR = "Kwon, G. and Ye, J.C.",
        TITLE = "One-Shot Adaptation of GAN in Just One CLIP",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12179-12191",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208712"}

@article{bb213635,
        AUTHOR = "Han, B.H. and Jiang, X.Y. and Fang, Z.J. and Fujita, H. and Gao, Y.B.",
        TITLE = "F-SCP: An automatic prompt generation method for specific classes
based on visual language pre-training models",
        JOURNAL = PR,
        VOLUME = "147",
        YEAR = "2024",
        PAGES = "110096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208713"}

@article{bb213636,
        AUTHOR = "Liu, B. and Lu, D. and Wei, D. and Wu, X. and Wang, Y. and Zhang, Y. and Zheng, Y.F.",
        TITLE = "Improving Medical Vision-Language Contrastive Pretraining With
Semantics-Aware Triage",
        JOURNAL = MedImg,
        VOLUME = "42",
        YEAR = "2023",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3579-3589",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208714"}

@article{bb213637,
        AUTHOR = "Gao, P. and Geng, S.J. and Zhang, R.R. and Ma, T. and Fang, R.Y. and Zhang, Y.F. and Li, H.S. and Qiao, Y.",
        TITLE = "CLIP-Adapter: Better Vision-Language Models with Feature Adapters",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "581-595",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208715"}

@inproceedings{bb213638,
        AUTHOR = "Liu, Y.H. and He, J.W. and Gu, J.J. and Kong, X.T. and Qiao, Y. and Dong, C.",
        TITLE = "DegAE: A New Pretraining Paradigm for Low-Level Vision",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23292-23303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208716"}

@article{bb213639,
        AUTHOR = "Dong, S. and Wang, L. and Du, B. and Meng, X.L.",
        TITLE = "ChangeCLIP: Remote sensing change detection with multimodal
vision-language representation learning",
        JOURNAL = PandRS,
        VOLUME = "208",
        YEAR = "2024",
        PAGES = "53-69",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208717"}

@article{bb213640,
        AUTHOR = "Peng, F. and Yang, X.S. and Xiao, L.H. and Wang, Y. and Xu, C.S.",
        TITLE = "SgVA-CLIP: Semantic-Guided Visual Adapting of Vision-Language Models
for Few-Shot Image Classification",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3469-3480",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208718"}

@article{bb213641,
        AUTHOR = "Guo, Y.F. and Chen, Y.P. and Ma, Z.",
        TITLE = "NeuroCLIP: Neuromorphic Data Understanding by CLIP and SNN",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "246-250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208719"}

@article{bb213642,
        AUTHOR = "Xing, Y.H. and Wu, Q. and Cheng, D. and Zhang, S.Z. and Liang, G.Q. and Wang, P. and Zhang, Y.N.",
        TITLE = "Dual Modality Prompt Tuning for Vision-Language Pre-Trained Model",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2056-2068",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208720"}

@article{bb213643,
        AUTHOR = "Xiao, L.H. and Yang, X.S. and Peng, F. and Yan, M. and Wang, Y. and Xu, C.S.",
        TITLE = "CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4334-4347",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208721"}

@article{bb213644,
        AUTHOR = "Zhang, K. and Yang, Y. and Yu, J. and Jiang, H. and Fan, J.P. and Huang, Q.M. and Han, W.D.",
        TITLE = "Multi-Task Paired Masking With Alignment Modeling for Medical
Vision-Language Pre-Training",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4706-4721",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208722"}

@article{bb213645,
        AUTHOR = "Zara, G. and Turrisi da Costa, V.G. and Roy, S. and Rota, P. and Ricci, E.",
        TITLE = "Simplifying open-set video domain adaptation with contrastive
learning",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103953",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208723"}

@inproceedings{bb213646,
        AUTHOR = "Zara, G. and Roy, S. and Rota, P. and Ricci, E.",
        TITLE = "AutoLabel: CLIP-based framework for Open-Set Video Domain Adaptation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11504-11513",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208724"}

@article{bb213647,
        AUTHOR = "Wang, X.H. and Wang, W.G. and Shao, J.Y. and Yang, Y.",
        TITLE = "Learning to Follow and Generate Instructions for Language-Capable
Navigation",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3334-3350",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208725"}

@inproceedings{bb213648,
        AUTHOR = "Gupta, D. and Kharbanda, S. and Zhou, J.W. and Li, W. and Pfister, H. and Wei, D.L.",
        TITLE = "CLIPTrans: Transferring Visual Knowledge with Pre-trained Models for
Multimodal Machine Translation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2863-2874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208726"}

@inproceedings{bb213649,
        AUTHOR = "Fang, H. and Yang, Z.F. and Wei, Y.H. and Zang, X.H. and Ban, C. and Feng, Z. and He, Z.J. and Li, Y.X. and Sun, H.",
        TITLE = "Alignment and Generation Adapter for Efficient Video-Text
Understanding",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2783-2789",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208727"}

@inproceedings{bb213650,
        AUTHOR = "Zhu, B. and Niu, Y. and Han, Y.C. and Wu, Y. and Zhang, H.W.",
        TITLE = "Prompt-aligned Gradient for Prompt Tuning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15613-15623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208728"}

@inproceedings{bb213651,
        AUTHOR = "Yuan, H.J. and Zhang, S.W. and Wang, X. and Albanie, S. and Pan, Y. and Feng, T. and Jiang, J.W. and Ni, D. and Zhang, Y. and Zhao, D.L.",
        TITLE = "RLIPv2: Fast Scaling of Relational Language-Image Pre-training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21592-21604",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208729"}

@inproceedings{bb213652,
        AUTHOR = "Wang, Z. and Yu, X. and Rao, Y.M. and Zhou, J. and Lu, J.W.",
        TITLE = "Take-A-Photo: 3D-to-2D Generative Pre-training of Point Cloud Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5617-5627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208730"}

@inproceedings{bb213653,
        AUTHOR = "Li, M. and Wu, J. and Wang, X. and Chen, C. and Qin, J. and Xiao, X.F. and Wang, R. and Zheng, M. and Pan, X.",
        TITLE = "AlignDet: Aligning Pre-training and Fine-tuning in Object Detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "6843-6853",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208731"}

@inproceedings{bb213654,
        AUTHOR = "Lee, S. and Chung, H.J. and Park, M.Y. and Park, J. and Ryu, W.S. and Ye, J.C.",
        TITLE = "Improving 3D Imaging with Pre-Trained Perpendicular 2D Diffusion
Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10676-10686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208732"}

@inproceedings{bb213655,
        AUTHOR = "Ye, Q.H. and Xu, G.H. and Yan, M. and Xu, H.Y. and Qian, Q. and Zhang, J. and Huang, F.",
        TITLE = "HiTeA: Hierarchical Temporal-Aware Video-Language Pre-training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15359-15370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208733"}

@inproceedings{bb213656,
        AUTHOR = "Wu, C.Y. and Zhang, X.M. and Zhang, Y. and Wang, Y.F. and Xie, W.",
        TITLE = "MedKLIP: Medical Knowledge Enhanced Language-Image Pre-Training for
X-ray Diagnosis",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21315-21326",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208734"}

@inproceedings{bb213657,
        AUTHOR = "Yang, Q.S. and Li, W.Y. and Li, B. and Yuan, Y.X.",
        TITLE = "MRM: Masked Relation Modeling for Medical Image Pre-Training with
Genetics",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21395-21405",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208735"}

@inproceedings{bb213658,
        AUTHOR = "Ma, W.X. and Li, S. and Zhang, J. and Liu, C.H. and Kang, J.X. and Wang, Y.L. and Huang, G.",
        TITLE = "Borrowing Knowledge From Pre-trained Language Model:
A New Data-efficient Visual Learning Paradigm",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "18740-18751",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208736"}

@inproceedings{bb213659,
        AUTHOR = "Ganugula, P. and Kumar, Y.S.S.S.S. and Reddy, N.K.S. and Chellingi, P. and Thakur, A. and Kasera, N. and Anand, C.S.",
        TITLE = "MOSAIC: Multi-Object Segmented Arbitrary Stylization Using CLIP",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "892-903",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208737"}

@inproceedings{bb213660,
        AUTHOR = "Luo, Z.Y. and Zhao, P. and Xu, C. and Geng, X. and Shen, T. and Tao, C.Y. and Ma, J. and Lin, Q.W. and Jiang, D.X.",
        TITLE = "LexLIP: Lexicon-Bottlenecked Language-Image Pre-Training for
Large-Scale Image-Text Sparse Retrieval",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11172-11183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208738"}

@inproceedings{bb213661,
        AUTHOR = "Zhai, X.H. and Mustafa, B. and Kolesnikov, A. and Beyer, L.",
        TITLE = "Sigmoid Loss for Language Image Pre-Training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11941-11952",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208739"}

@inproceedings{bb213662,
        AUTHOR = "Shtedritski, A. and Rupprecht, C. and Vedaldi, A.",
        TITLE = "What does CLIP know about a red circle? Visual prompt engineering for
VLMs",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11953-11963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208740"}

@inproceedings{bb213663,
        AUTHOR = "Zhu, Z. and Ma, X.J. and Chen, Y.X. and Deng, Z.D. and Huang, S.Y. and Li, Q.",
        TITLE = "3D-VisTA: Pre-trained Transformer for 3D Vision and Text Alignment",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2899-2909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208741"}

@inproceedings{bb213664,
        AUTHOR = "Yang, K.C. and Deng, J.K. and An, X. and Li, J.W. and Feng, Z. and Guo, J. and Yang, J. and Liu, T.L.",
        TITLE = "ALIP: Adaptive Language-Image Pre-training with Synthetic Caption",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2910-2919",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208742"}

@inproceedings{bb213665,
        AUTHOR = "Yang, Y.F. and Huang, W.Q. and Wei, Y.X. and Peng, H. and Jiang, X.Y. and Jiang, H.Q. and Wei, F. and Wang, Y. and Hu, H. and Qiu, L. and Yang, Y.Q.",
        TITLE = "Attentive Mask CLIP",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2759-2769",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208743"}

@inproceedings{bb213666,
        AUTHOR = "Vinker, Y. and Alaluf, Y. and Cohen Or, D. and Shamir, A.",
        TITLE = "CLIPascene: Scene Sketching with Different Types and Levels of
Abstraction",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "4123-4133",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208744"}

@inproceedings{bb213667,
        AUTHOR = "Wei, Y.X. and Hu, H. and Xie, Z. and Liu, Z. and Zhang, Z. and Cao, Y. and Bao, J.M. and Chen, D. and Guo, B.",
        TITLE = "Improving CLIP Fine-tuning Performance",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5416-5426",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208745"}

@inproceedings{bb213668,
        AUTHOR = "Maniparambil, M. and Vorster, C. and Molloy, D. and Murphy, N. and McGuinness, K. and O'Connor, N.E.",
        TITLE = "Enhancing CLIP with GPT-4: Harnessing Visual Descriptions as Prompts",
        BOOKTITLE = MMFM23,
        YEAR = "2023",
        PAGES = "262-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208746"}

@inproceedings{bb213669,
        AUTHOR = "Huang, T.Y. and Dong, B. and Yang, Y.H. and Huang, X.S. and Lau, R.W.H. and Ouyang, W.L. and Zuo, W.M.",
        TITLE = "CLIP2Point: Transfer CLIP to Point Cloud Classification with
Image-Depth Pre-Training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22100-22110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208747"}

@inproceedings{bb213670,
        AUTHOR = "Wu, K. and Peng, H. and Zhou, Z.H. and Xiao, B. and Liu, M. and Yuan, L. and Xuan, H. and Valenzuela, M. and Chen, X.S. and Wang, X.G. and Chao, H.Y. and Hu, H.",
        TITLE = "TinyCLIP: CLIP Distillation via Affinity Mimicking and Weight
Inheritance",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21913-21923",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208748"}

@inproceedings{bb213671,
        AUTHOR = "Deng, X. and Shi, H. and Huang, R. and Li, C.L. and Xu, H. and Han, J.H. and Kwok, J. and Zhao, S. and Zhang, W. and Liang, X.D.",
        TITLE = "GrowCLIP: Data-aware Automatic Model Growing for Large-scale
Contrastive Language-Image Pre-training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22121-22132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208749"}

@inproceedings{bb213672,
        AUTHOR = "Ranasinghe, K. and McKinzie, B. and Ravi, S. and Yang, Y.F. and Toshev, A. and Shlens, J.",
        TITLE = "Perceptual Grouping in Contrastive Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5548-5561",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208750"}

@inproceedings{bb213673,
        AUTHOR = "Shao, B. and Liu, J.Z. and Pei, R. and Xu, S. and Dai, P. and Lu, J.W. and Li, W. and Yan, Y.",
        TITLE = "HiVLP: Hierarchical Interactive Video-Language Pre-Training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13710-13720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208751"}

@inproceedings{bb213674,
        AUTHOR = "Ali, M. and Khan, S.",
        TITLE = "CLIP-Decoder: ZeroShot Multilabel Classification using Multimodal
CLIP Aligned Representations",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4677-4681",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208752"}

@inproceedings{bb213675,
        AUTHOR = "Singha, M. and Pal, H. and Jha, A. and Banerjee, B.",
        TITLE = "AD-CLIP: Adapting Domains in Prompt Space Using CLIP",
        BOOKTITLE = OutDistri23,
        YEAR = "2023",
        PAGES = "4357-4366",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208753"}

@inproceedings{bb213676,
        AUTHOR = "Zhang, J. and Dong, R. and Ma, K.",
        TITLE = "CLIP-FO3D:
Learning Free Open-world 3D Scene Representations from 2D Dense CLIP",
        BOOKTITLE = OpenSUN3D,
        PAGES = "2040-2051",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208754"}

@inproceedings{bb213677,
        AUTHOR = "Auty, D. and Mikolajczyk, K.",
        TITLE = "Learning to Prompt CLIP for Monocular Depth Estimation:
Exploring the Limits of Human Language",
        BOOKTITLE = OpenSUN3D,
        PAGES = "2031-2049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208755"}

@inproceedings{bb213678,
        AUTHOR = "Hegde, D. and Valanarasu, J.M.J. and Patel, V.M.",
        TITLE = "CLIP goes 3D: Leveraging Prompt Tuning for Language Grounded 3D
Recognition",
        BOOKTITLE = OpenSUN3D,
        PAGES = "2020-2030",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208756"}

@inproceedings{bb213679,
        AUTHOR = "Xu, X. and Xiong, T.Y. and Ding, Z. and Tu, Z.W.",
        TITLE = "MasQCLIP for Open-Vocabulary Universal Image Segmentation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "887-898",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208757"}

@inproceedings{bb213680,
        AUTHOR = "Wang, H.L. and Li, Y. and Yao, H. and Li, X.M.",
        TITLE = "CLIPN for Zero-Shot OOD Detection: Teaching CLIP to Say No",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1802-1812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208758"}

@inproceedings{bb213681,
        AUTHOR = "Zhu, X.Y. and Zhang, R. and He, B. and Zhou, A. and Wang, D. and Zhao, B. and Gao, P.",
        TITLE = "Not All Features Matter:
Enhancing Few-shot CLIP with Adaptive Prior Refinement",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2605-2615",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208759"}

@inproceedings{bb213682,
        AUTHOR = "Paiss, R. and Ephrat, A. and Tov, O. and Zada, S. and Mosseri, I. and Irani, M. and Dekel, T.",
        TITLE = "Teaching CLIP to Count to Ten",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3147-3157",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208760"}

@inproceedings{bb213683,
        AUTHOR = "Zhu, X.Y. and Zhang, R.R. and He, B. and Guo, Z. and Zeng, Z. and Qin, Z. and Zhang, S.H. and Gao, P.",
        TITLE = "PointCLIP V2: Prompting CLIP and GPT for Powerful 3D Open-world
Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2639-2650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208761"}

@inproceedings{bb213684,
        AUTHOR = "Yuan, M. and Lv, N.N. and Xie, Y.F. and Lu, F.X. and Zhan, K.",
        TITLE = "CLIP-FG: Selecting Discriminative Image Patches by Contrastive
Language-Image Pre-Training for Fine-Grained Image Classification",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "560-564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208762"}

@inproceedings{bb213685,
        AUTHOR = "Zeng, Z. and Ge, Y.Y. and Liu, X.H. and Chen, B. and Luo, P. and Xia, S.T. and Ge, Y.X.",
        TITLE = "Learning Transferable Spatiotemporal Representations from Natural
Script Knowledge",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23079-23089",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208763"}

@inproceedings{bb213686,
        AUTHOR = "Wang, J.P. and Ge, Y.X. and Yan, R. and Ge, Y.Y. and Lin, K.Q. and Tsutsui, S. and Lin, X.D. and Cai, G. and Wu, J.P. and Shan, Y. and Qie, X. and Shou, M.Z.",
        TITLE = "All in One: Exploring Unified Video-Language Pre-Training",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6598-6608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208764"}

@inproceedings{bb213687,
        AUTHOR = "Ramrakhya, R. and Batra, D. and Wijmans, E. and Das, A.",
        TITLE = "PIRLNav: Pretraining with Imitation and RL Finetuning for OBJECTNAV",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "17896-17906",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208765"}

@inproceedings{bb213688,
        AUTHOR = "Lin, X.D. and Tiwari, S. and Huang, S.Y. and Li, M. and Shou, M.Z. and Ji, H. and Chang, S.F.",
        TITLE = "Towards Fast Adaptation of Pretrained Contrastive Models for
Multi-channel Video-Language Retrieval",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14846-14855",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208766"}

@inproceedings{bb213689,
        AUTHOR = "Wang, H.C. and Du, X.D. and Li, J.H. and Yeh, R.A. and Shakhnarovich, G.",
        TITLE = "Score Jacobian Chaining: Lifting Pretrained 2D Diffusion Models for
3D Generation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "12619-12629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208767"}

@inproceedings{bb213690,
        AUTHOR = "Jin, L. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G. and Shu, A. and Ji, R.R.",
        TITLE = "RefCLIP: A Universal Teacher for Weakly Supervised Referring
Expression Comprehension",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "01-10",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208768"}

@inproceedings{bb213691,
        AUTHOR = "Saito, K. and Sohn, K. and Zhang, X. and Li, C.L. and Lee, C.Y. and Saenko, K. and Pfister, T.",
        TITLE = "Prefix Conditioning Unifies Language and Label Supervision",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2861-2870",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208769"}

@inproceedings{bb213692,
        AUTHOR = "Park, J. and Han, B.H.",
        TITLE = "Multi-Modal Representation Learning with Text-Driven Soft Masks",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2798-2807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208770"}

@inproceedings{bb213693,
        AUTHOR = "Jin, Z. and Hayat, M. and Yang, Y.W. and Guo, Y.L. and Lei, Y.J.",
        TITLE = "Context-aware Alignment and Mutual Masking for 3D-Language
Pre-training",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10984-10994",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208771"}

@inproceedings{bb213694,
        AUTHOR = "Guo, Z.X. and Dong, B. and Ji, Z.L. and Bai, J.F. and Guo, Y. and Zuo, W.M.",
        TITLE = "Texts as Images in Prompt Tuning for Multi-Label Image Recognition",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2808-2817",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208772"}

@inproceedings{bb213695,
        AUTHOR = "Cherti, M. and Beaumont, R. and Wightman, R. and Wortsman, M. and Ilharco, G. and Gordon, C. and Schuhmann, C. and Schmidt, L. and Jitsev, J.",
        TITLE = "Reproducible Scaling Laws for Contrastive Language-Image Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2818-2829",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208773"}

@inproceedings{bb213696,
        AUTHOR = "Lei, J. and Li, L.J. and Zhou, L. and Gan, Z. and Berg, T.L. and Bansal, M. and Liu, J.J.",
        TITLE = "Less is More:
CLIPBERT for Video-and-Language Learning via Sparse Sampling",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7327-7337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208774"}

@inproceedings{bb213697,
        AUTHOR = "Zhou, J.H. and Dong, L. and Gan, Z. and Wang, L.J. and Wei, F.",
        TITLE = "Non-Contrastive Learning Meets Language-Image Pre-Training",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11028-11038",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208775"}

@inproceedings{bb213698,
        AUTHOR = "Hu, Z. and Iscen, A. and Sun, C. and Wang, Z. and Chang, K.W. and Sun, Y.Z. and Schmid, C. and Ross, D.A. and Fathi, A.",
        TITLE = "Reveal: Retrieval-Augmented Visual-Language Pre-Training with
Multi-Source Multimodal Knowledge Memory",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23369-23379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208776"}

@inproceedings{bb213699,
        AUTHOR = "Li, Y.H. and Fan, H.Q. and Hu, R. and Feichtenhofer, C. and He, K.",
        TITLE = "Scaling Language-Image Pre-Training via Masking",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23390-23400",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT208777"}

Last update:Apr 18, 2024 at 11:38:49