@inproceedings{bb221800, AUTHOR = "Jin, L. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G. and Shu, A. and Ji, R.R.", TITLE = "RefCLIP: A Universal Teacher for Weakly Supervised Referring Expression Comprehension", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "01-10", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216800"} @inproceedings{bb221801, AUTHOR = "Saito, K. and Sohn, K. and Zhang, X. and Li, C.L. and Lee, C.Y. and Saenko, K. and Pfister, T.", TITLE = "Prefix Conditioning Unifies Language and Label Supervision", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2861-2870", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216801"} @inproceedings{bb221802, AUTHOR = "Park, J. and Han, B.H.", TITLE = "Multi-Modal Representation Learning with Text-Driven Soft Masks", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2798-2807", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216802"} @inproceedings{bb221803, AUTHOR = "Jin, Z. and Hayat, M. and Yang, Y.W. and Guo, Y.L. and Lei, Y.J.", TITLE = "Context-aware Alignment and Mutual Masking for 3D-Language Pre-training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10984-10994", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216803"} @inproceedings{bb221804, AUTHOR = "Guo, Z.X. and Dong, B. and Ji, Z.L. and Bai, J.F. and Guo, Y.W. and Zuo, W.M.", TITLE = "Texts as Images in Prompt Tuning for Multi-Label Image Recognition", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2808-2817", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216804"} @inproceedings{bb221805, AUTHOR = "Cherti, M. and Beaumont, R. and Wightman, R. and Wortsman, M. and Ilharco, G. and Gordon, C. and Schuhmann, C. and Schmidt, L. and Jitsev, J.", TITLE = "Reproducible Scaling Laws for Contrastive Language-Image Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2818-2829", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216805"} @inproceedings{bb221806, AUTHOR = "Lei, J. and Li, L.J. and Zhou, L. and Gan, Z. and Berg, T.L. and Bansal, M. and Liu, J.J.", TITLE = "Less is More: CLIPBERT for Video-and-Language Learning via Sparse Sampling", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7327-7337", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216806"} @inproceedings{bb221807, AUTHOR = "Zhou, J.H. and Dong, L. and Gan, Z. and Wang, L.J. and Wei, F.", TITLE = "Non-Contrastive Learning Meets Language-Image Pre-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11028-11038", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216807"} @inproceedings{bb221808, AUTHOR = "Hu, Z. and Iscen, A. and Sun, C. and Wang, Z. and Chang, K.W. and Sun, Y.Z. and Schmid, C. and Ross, D.A. and Fathi, A.", TITLE = "Reveal: Retrieval-Augmented Visual-Language Pre-Training with Multi-Source Multimodal Knowledge Memory", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23369-23379", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216808"} @inproceedings{bb221809, AUTHOR = "Li, Y.H. and Fan, H.Q. and Hu, R. and Feichtenhofer, C. and He, K.", TITLE = "Scaling Language-Image Pre-Training via Masking", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23390-23400", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216809"} @inproceedings{bb221810, AUTHOR = "Jin, P. and Huang, J. and Xiong, P.F. and Tian, S.X. and Liu, C. and Ji, X.Y. and Yuan, L. and Chen, J.", TITLE = "Video-Text as Game Players: Hierarchical Banzhaf Interaction for Cross-Modal Representation Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2472-2482", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216810"} @inproceedings{bb221811, AUTHOR = "Ye, S.Q. and Xie, Y. and Chen, D.D. and Xu, Y. and Yuan, L. and Zhu, C.G. and Liao, J.", TITLE = "Improving Commonsense in Vision-Language Models via Knowledge Graph Riddles", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2634-2645", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216811"} @inproceedings{bb221812, AUTHOR = "Li, H. and Zhu, J. and Jiang, X. and Zhu, X. and Li, H.S. and Yuan, C. and Wang, X.H. and Qiao, Y. and Wang, X.G. and Wang, W.H. and Dai, J.F.", TITLE = "Uni-Perceiver v2: A Generalist Model for Large-Scale Vision and Vision-Language Tasks", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2691-2700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216812"} @inproceedings{bb221813, AUTHOR = "Wu, W.H. and Wang, X.H. and Luo, H.P. and Wang, J.D. and Yang, Y. and Ouyang, W.L.", TITLE = "Bidirectional Cross-Modal Knowledge Exploration for Video Recognition with Pre-trained Vision-Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6620-6630", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216813"} @inproceedings{bb221814, AUTHOR = "Seth, A. and Hemani, M. and Agarwal, C.", TITLE = "DeAR: Debiasing Vision-Language Models with Additive Residuals", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6820-6829", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216814"} @inproceedings{bb221815, AUTHOR = "Radenovic, F. and Dubey, A. and Kadian, A. and Mihaylov, T. and Vandenhende, S. and Patel, Y. and Wen, Y. and Ramanathan, V. and Mahajan, D.", TITLE = "Filtering, Distillation, and Hard Negatives for Vision-Language Pre-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6967-6977", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216815"} @inproceedings{bb221816, AUTHOR = "Yu, T. and Lu, Z. and Jin, X. and Chen, Z.B. and Wang, X.C.", TITLE = "Task Residual for Tuning Vision-Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10899-10909", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216816"} @inproceedings{bb221817, AUTHOR = "Ma, Z.X. and Hong, J. and Gul, M.O. and Gandhi, M. and Gao, I. and Krishna, R.", TITLE = "@ CREPE: Can Vision-Language Foundation Models Reason Compositionally?", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10910-10921", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216817"} @inproceedings{bb221818, AUTHOR = "Yin, D. and Gao, F. and Thattai, G. and Johnston, M. and Chang, K.W.", TITLE = "GIVL: Improving Geographical Inclusivity of Vision-Language Models with Pre-Training Methods", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10951-10961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216818"} @inproceedings{bb221819, AUTHOR = "Gao, C. and Peng, X.Y. and Yan, M. and Wang, H. and Yang, L.R. and Ren, H.B. and Li, H.S. and Liu, S.", TITLE = "Adaptive Zone-aware Hierarchical Planner for Vision-Language Navigation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14911-14920", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216819"} @inproceedings{bb221820, AUTHOR = "Yeh, C.H. and Russell, B. and Sivic, J. and Heilbron, F.C. and Jenni, S.", TITLE = "Meta-Personalizing Vision-Language Models to Find Named Instances in Video", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19123-19132", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216820"} @inproceedings{bb221821, AUTHOR = "Gou, Y.H. and Ko, T. and Yang, H. and Kwok, J. and Zhang, Y. and Wang, M.X.", TITLE = "Leveraging per Image-Token Consistency for Vision-Language Pre-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19155-19164", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216821"} @inproceedings{bb221822, AUTHOR = "Wang, S.J. and Chang, J.L. and Li, H.J. and Wang, Z.H. and Ouyang, W.L. and Tian, Q.", TITLE = "Open-Set Fine-Grained Retrieval via Prompting Vision-Language Evaluator", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19381-19391", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216822"} @inproceedings{bb221823, AUTHOR = "Cheng, F. and Wang, X. and Lei, J. and Crandall, D. and Bansal, M. and Bertasius, G.", TITLE = "VindLU: A Recipe for Effective Video-and-Language Pretraining", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10739-10750", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216823"} @inproceedings{bb221824, AUTHOR = "Zhou, H. and Martin Martin, R. and Kapadia, M. and Savarese, S. and Niebles, J.C.", TITLE = "Procedure-Aware Pretraining for Instructional Video Understanding", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10727-10738", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216824"} @inproceedings{bb221825, AUTHOR = "Yang, A. and Nagrani, A. and Seo, P.H. and Miech, A. and Pont Tuset, J. and Laptev, I. and Sivic, J. and Schmid, C.", TITLE = "Vid2Seq: Large-Scale Pretraining of a Visual Language Model for Dense Video Captioning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10714-10726", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216825"} @inproceedings{bb221826, AUTHOR = "Ji, Y. and Tu, R.C. and Jiang, J. and Kong, W.J. and Cai, C. and Zhao, W.Z. and Wang, H.F. and Yang, Y. and Liu, W.", TITLE = "Seeing What You Miss: Vision-Language Pre-training with Semantic Completion Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6789-6798", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216826"} @inproceedings{bb221827, AUTHOR = "Alper, M. and Fiman, M. and Averbuch Elor, H.", TITLE = "Is BERT Blind? Exploring the Effect of Vision-and-Language Pretraining on Visual Language Understanding", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6778-6788", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216827"} @inproceedings{bb221828, AUTHOR = "Liu, M.Y. and Jiang, J. and Zhu, C. and Yin, X.C.", TITLE = "VLPD: Context-Aware Pedestrian Detection via Vision-Language Semantic Self-Supervision", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6662-6671", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216828"} @inproceedings{bb221829, AUTHOR = "Wei, Y.X. and Cao, Y. and Zhang, Z. and Peng, H. and Yao, Z.L. and Xie, Z. and Hu, H. and Guo, B.", TITLE = "iCLIP: Bridging Image Classification and Contrastive Language-Image Pre-training for Visual Recognition", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2776-2786", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216829"} @inproceedings{bb221830, AUTHOR = "Hyung, J. and Hwang, S. and Kim, D. and Lee, H. and Choo, J.", TITLE = "Local 3D Editing via 3D Distillation of CLIP Knowledge", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "12674-12684", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216830"} @inproceedings{bb221831, AUTHOR = "Mu, N. and Kirillov, A. and Wagner, D. and Xie, S.", TITLE = "SLIP: Self-supervision Meets Language-Image Pre-training", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXVI:529-544", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216831"} @inproceedings{bb221832, AUTHOR = "Crowson, K. and Biderman, S. and Kornis, D. and Stander, D. and Hallahan, E. and Castricato, L. and Raff, E.", TITLE = "VQGAN-CLIP: Open Domain Image Generation and Editing with Natural Language Guidance", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:88-105", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216832"} @inproceedings{bb221833, AUTHOR = "Wu, X.S. and Zhu, F. and Zhao, R. and Li, H.S.", TITLE = "CORA: Adapting CLIP for Open-Vocabulary Detection with Region Prompting and Anchor Pre-Matching", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7031-7040", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216833"} @inproceedings{bb221834, AUTHOR = "Dong, X.Y. and Bao, J.M. and Zheng, Y. and Zhang, T. and Chen, D.D. and Yang, H. and Zeng, M. and Zhang, W.M. and Yuan, L. and Chen, D. and Wen, F. and Yu, N.H.", TITLE = "MaskCLIP: Masked Self-Distillation Advances Contrastive Language-Image Pretraining", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10995-11005", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216834"} @inproceedings{bb221835, AUTHOR = "Xie, C.W. and Sun, S.Y. and Xiong, X. and Zheng, Y. and Zhao, D.L. and Zhou, J.R.", TITLE = "RA-CLIP: Retrieval Augmented Contrastive Language-Image Pre-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19265-19274", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216835"} @inproceedings{bb221836, AUTHOR = "Chen, P.J. and Li, Q. and Biaz, S. and Bui, T. and Nguyen, A.", TITLE = "gScoreCAM: What Objects Is CLIP Looking At?", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:588-604", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216836"} @inproceedings{bb221837, AUTHOR = "Wang, R. and Duan, X.Y. and Kang, G.L. and Liu, J.Z. and Lin, S.H. and Xu, S. and Lv, J. and Zhang, B.C.", TITLE = "AttriCLIP: A Non-Incremental Learner for Incremental Knowledge Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "3654-3663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216837"} @inproceedings{bb221838, AUTHOR = "Rasheed, H. and Khattak, M.U. and Maaz, M. and Khan, S. and Khan, F.S.", TITLE = "Fine-tuned CLIP Models are Efficient Video Learners", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6545-6554", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216838"} @inproceedings{bb221839, AUTHOR = "Liu, R. and Huang, J.J. and Li, G. and Feng, J.S. and Wu, X.L. and Li, T.H.", TITLE = "Revisiting Temporal Modeling for CLIP-Based Image-to-Video Knowledge Transferring", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6555-6564", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216839"} @inproceedings{bb221840, AUTHOR = "Tschannen, M. and Mustafa, B. and Houlsby, N.", TITLE = "CLIPPO: Image-and-Language Understanding from Pixels Only", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11006-11017", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216840"} @inproceedings{bb221841, AUTHOR = "Zhou, Z.Q. and Lei, Y.J. and Zhang, B. and Liu, L.Q. and Liu, Y.F.", TITLE = "ZegCLIP: Towards Adapting CLIP for Zero-shot Semantic Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11175-11185", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216841"} @inproceedings{bb221842, AUTHOR = "He, W.B. and Jamonnak, S. and Gou, L. and Ren, L.", TITLE = "CLIP-S4: Language-Guided Self-Supervised Semantic Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11207-11216", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216842"} @inproceedings{bb221843, AUTHOR = "Huang, Z.X. and Jampani, V. and Thai, A. and Li, Y.Z. and Stojanov, S. and Rehg, J.M.", TITLE = "ShapeClipper: Scalable 3D Shape Learning from Single-View Images via Geometric and CLIP-Based Consistency", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "12912-12922", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216843"} @inproceedings{bb221844, AUTHOR = "Tao, M. and Bao, B.K. and Tang, H. and Xu, C.S.", TITLE = "GALIP: Generative Adversarial CLIPs for Text-to-Image Synthesis", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14214-14223", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216844"} @inproceedings{bb221845, AUTHOR = "Zeng, Y. and Jiang, C. and Mao, J. and Han, J.H. and Ye, C.Q. and Huang, Q.Q. and Yeung, D.Y. and Yang, Z. and Liang, X.D. and Xu, H.", TITLE = "CLIP2: Contrastive Language-Image-Point Pretraining from Real-World Point Cloud Data", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15244-15253", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216845"} @inproceedings{bb221846, AUTHOR = "Lin, Y.Q. and Chen, M.H. and Wang, W.X. and Wu, B. and Li, K. and Lin, B.B. and Liu, H.F. and He, X.F.", TITLE = "CLIP is Also an Efficient Segmenter: A Text-Driven Approach for Weakly Supervised Semantic Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15305-15314", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216846"} @inproceedings{bb221847, AUTHOR = "Sanghi, A. and Fu, R. and Liu, V. and Willis, K.D.D. and Shayani, H. and Khasahmadi, A.H. and Sridhar, S. and Ritchie, D.", TITLE = "CLIP-Sculptor: Zero-Shot Generation of High-Fidelity and Diverse Shapes from Natural Language", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18339-18348", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216847"} @inproceedings{bb221848, AUTHOR = "Pei, R.J. and Liu, J.Z. and Li, W. and Shao, B. and Xu, S. and Dai, P. and Lu, J.W. and Yan, Y.", TITLE = "CLIPPING: Distilling CLIP-Based Models with a Student Base for Video-Language Retrieval", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18983-18992", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216848"} @inproceedings{bb221849, AUTHOR = "Jeong, J. and Zou, Y. and Kim, T. and Zhang, D.Q. and Ravichandran, A. and Dabeer, O.", TITLE = "WinCLIP: Zero-/Few-Shot Anomaly Classification and Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19606-19616", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216849"} @inproceedings{bb221850, AUTHOR = "Shamshad, F. and Naseer, M. and Nandakumar, K.", TITLE = "CLIP2Protect: Protecting Facial Privacy Using Text-Guided Makeup via Adversarial Latent Search", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "20595-20605", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216850"} @inproceedings{bb221851, AUTHOR = "Chen, Y.H. and Qi, X. and Wang, J.A. and Zhang, L.", TITLE = "DisCo-CLIP: A Distributed Contrastive Loss for Memory Efficient CLIP Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22648-22657", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216851"} @inproceedings{bb221852, AUTHOR = "Wasim, S.T. and Naseer, M. and Khan, S. and Khan, F.S. and Shah, M.", TITLE = "Vita-CLIP: Video and text adaptive CLIP via Multimodal Prompting", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23034-23044", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216852"} @inproceedings{bb221853, AUTHOR = "Parelli, M. and Delitzas, A. and Hars, N. and Vlassis, G. and Anagnostidis, S. and Bachmann, G. and Hofmann, T.", TITLE = "CLIP-Guided Vision-Language Pre-training for Question Answering in 3D Scenes", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5607-5612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216853"} @inproceedings{bb221854, AUTHOR = "Ning, S. and Qiu, L. and Liu, Y.F. and He, X.M.", TITLE = "HOICLIP: Efficient Knowledge Transfer for HOI Detection with Vision-Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23507-23517", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216854"} @inproceedings{bb221855, AUTHOR = "Yao, L.W. and Han, J.H. and Liang, X.D. and Xu, D. and Zhang, W. and Li, Z.G. and Xu, H.", TITLE = "DetCLIPv2: Scalable Open-Vocabulary Object Detection Pre-training via Word-Region Alignment", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23497-23506", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216855"} @inproceedings{bb221856, AUTHOR = "Singha, M. and Jha, A. and Solanki, B. and Bose, S. and Banerjee, B.", TITLE = "APPLeNet: Visual Attention Parameterized Prompt Learning for Few-Shot Remote Sensing Image Generalization using CLIP", BOOKTITLE = EarthVision23, YEAR = "2023", PAGES = "2024-2034", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216856"} @inproceedings{bb221857, AUTHOR = "Zhu, J. and Jin, J. and Yang, Z.H. and Wu, X. and Wang, X.", TITLE = "Learning CLIP Guided Visual-Text Fusion Transformer for Video-based Pedestrian Attribute Recognition", BOOKTITLE = NFVLR23, YEAR = "2023", PAGES = "2626-2629", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216857"} @inproceedings{bb221858, AUTHOR = "Gannamaneni, S.S. and Sadaghiani, A. and Rao, R.P. and Mock, M. and Akila, M.", TITLE = "Investigating CLIP Performance for Meta-data Generation in AD Datasets", BOOKTITLE = SAIAD23, YEAR = "2023", PAGES = "3840-3850", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216858"} @inproceedings{bb221859, AUTHOR = "Chen, R.N. and Liu, Y.Q. and Kong, L.D. and Zhu, X.G. and Ma, Y.X. and Li, Y. and Hou, Y.N. and Qiao, Y. and Wang, W.P.", TITLE = "CLIP2Scene: Towards Label-efficient 3D Scene Understanding by CLIP", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7020-7030", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216859"} @inproceedings{bb221860, AUTHOR = "Ni, B.L. and Peng, H.W. and Chen, M.H. and Zhang, S.Y. and Meng, G.F. and Fu, J.L. and Xiang, S.M. and Ling, H.B.", TITLE = "Expanding Language-Image Pretrained Models for General Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IV:1-18", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216860"} @inproceedings{bb221861, AUTHOR = "Zhang, R.R. and Zhang, W. and Fang, R.Y. and Gao, P. and Li, K.C. and Dai, J.F. and Qiao, Y. and Li, H.S.", TITLE = "Tip-Adapter: Training-Free Adaption of CLIP for Few-Shot Classification", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:493-510", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216861"} @inproceedings{bb221862, AUTHOR = "Yang, J. and Duan, J.L. and Tran, S. and Xu, Y. and Chanda, S. and Chen, L.Q. and Zeng, B. and Chilimbi, T. and Huang, J.Z.", TITLE = "Vision-Language Pre-Training with Triple Contrastive Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15650-15659", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216862"} @inproceedings{bb221863, AUTHOR = "Guo, X.Y. and Duan, J.L. and Kuo, C.C.J. and Gichoya, J.W. and Banerjee, I.", TITLE = "Augmenting Vision Language Pretraining by Learning Codebook with Visual Semantics", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "4779-4785", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216863"} @inproceedings{bb221864, AUTHOR = "Zhou, C. and Loy, C.C. and Dai, B.", TITLE = "Extract Free Dense Labels from CLIP", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXVIII:696-712", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216864"} @inproceedings{bb221865, AUTHOR = "Lin, Z. and Geng, S.J. and Zhang, R.R. and Gao, P. and de Melo, G. and Wang, X.G. and Dai, J.F. and Qiao, Y. and Li, H.S.", TITLE = "Frozen CLIP Models are Efficient Video Learners", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:388-404", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216865"} @inproceedings{bb221866, AUTHOR = "Rao, Y.M. and Zhao, W.L. and Chen, G.Y. and Tang, Y.S. and Zhu, Z. and Huang, G. and Zhou, J. and Lu, J.W.", TITLE = "DenseCLIP: Language-Guided Dense Prediction with Context-Aware Prompting", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18061-18070", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216866"} @inproceedings{bb221867, AUTHOR = "Kwon, G. and Ye, J.C.", TITLE = "CLIPstyler: Image Style Transfer with a Single Text Condition", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18041-18050", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216867"} @inproceedings{bb221868, AUTHOR = "Khandelwal, A. and Weihs, L. and Mottaghi, R. and Kembhavi, A.", TITLE = "Simple but Effective: CLIP Embeddings for Embodied AI", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14809-14818", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216868"} @inproceedings{bb221869, AUTHOR = "Ma, H.Y. and Zhao, H. and Lin, Z. and Kale, A. and Wang, Z.Y. and Yu, T. and Gu, J.X. and Choudhary, S. and Xie, X.H.", TITLE = "EI-CLIP: Entity-aware Interventional Contrastive Learning for E-commerce Cross-modal Retrieval", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18030-18040", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216869"} @inproceedings{bb221870, AUTHOR = "Barraco, M. and Cornia, M. and Cascianelli, S. and Baraldi, L. and Cucchiara, R.", TITLE = "The Unreasonable Effectiveness of CLIP Features for Image Captioning: An Experimental Analysis", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4661-4669", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216870"} @inproceedings{bb221871, AUTHOR = "Tevet, G. and Gordon, B. and Hertz, A. and Bermano, A.H. and Cohen Or, D.", TITLE = "MotionCLIP: Exposing Human Motion Generation to CLIP Space", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXII:358-374", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216871"} @inproceedings{bb221872, AUTHOR = "Materzynska, J. and Torralba, A. and Bau, D.", TITLE = "Disentangling visual and written concepts in CLIP", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16389-16398", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216872"} @inproceedings{bb221873, AUTHOR = "Li, M. and Xu, R. and Wang, S. and Zhou, L. and Lin, X.D. and Zhu, C.G. and Zeng, M. and Ji, H. and Chang, S.F.", TITLE = "CLIP-Event: Connecting Text and Images with Event Structures", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16399-16408", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216873"} @inproceedings{bb221874, AUTHOR = "Zhong, Y. and Yang, J.W. and Zhang, P.C. and Li, C.Y. and Codella, N. and Li, L.H. and Zhou, L. and Dai, X. and Yuan, L. and Li, Y. and Gao, J.F.", TITLE = "RegionCLIP: Region-based Language-Image Pretraining", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16772-16782", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216874"} @inproceedings{bb221875, AUTHOR = "Baldrati, A. and Bertini, M. and Uricchio, T. and del Bimbo, A.", TITLE = "Effective conditioned and composed image retrieval combining CLIP-based features", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "21434-21442", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216875"} @inproceedings{bb221876, AUTHOR = "Baldrati, A. and Bertini, M. and Uricchio, T. and del Bimbo, A.", TITLE = "Conditioned and composed image retrieval combining and partially fine-tuning CLIP-based features", BOOKTITLE = ODRUM22, YEAR = "2022", PAGES = "4955-4964", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216876"} @inproceedings{bb221877, AUTHOR = "Patashnik, O. and Wu, Z.Z. and Shechtman, E. and Cohen Or, D. and Lischinski, D.", TITLE = "StyleCLIP: Text-Driven Manipulation of StyleGAN Imagery", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2065-2074", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT216877"} @article{bb221878, AUTHOR = "Su, H.H. and Chen, T.W. and Kao, C.C. and Hsu, W.H. and Chien, S.Y.", TITLE = "Preference-Aware View Recommendation System for Scenic Photos Based on Bag-of-Aesthetics-Preserving Features", JOURNAL = MultMed, VOLUME = "14", YEAR = "2012", NUMBER = "3", PAGES = "833-843", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216878"} @article{bb221879, AUTHOR = "Chen, L. and Xu, D. and Tsang, I.W. and Luo, J.", TITLE = "Tag-Based Image Retrieval Improved by Augmented Features and Group-Based Refinement", JOURNAL = MultMed, VOLUME = "14", YEAR = "2012", NUMBER = "4", PAGES = "1057-1067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216879"} @article{bb221880, AUTHOR = "Chen, L. and Xu, D. and Tsang, I.W. and Li, X.", TITLE = "Spectral Embedded Hashing for Scalable Image Retrieval", JOURNAL = Cyber, VOLUME = "44", YEAR = "2014", NUMBER = "7", MONTH = "July", PAGES = "1180-1190", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216880"} @article{bb221881, AUTHOR = "Jiao, B. and Yang, L. and Xu, J. and Tian, Q. and Wu, F.", TITLE = "Visually Summarizing Web Pages Through Internal and External Images", JOURNAL = MultMed, VOLUME = "14", YEAR = "2012", NUMBER = "6", PAGES = "1673-1683", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216881"} @article{bb221882, AUTHOR = "Sawant, N. and Wang, J.Z. and Li, J.", TITLE = "Enhancing Training Collections for Image Annotation: An Instance-Weighted Mixture Modeling Approach", JOURNAL = IP, VOLUME = "22", YEAR = "2013", NUMBER = "9", PAGES = "3562-3577", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216882"} @article{bb221883, AUTHOR = "Tian, J. and Huang, Y. and Guo, Z. and Qi, X. and Chen, Z. and Huang, T.", TITLE = "A Multi-Modal Topic Model for Image Annotation Using Text Analysis", JOURNAL = SPLetters, VOLUME = "22", YEAR = "2015", NUMBER = "7", MONTH = "July", PAGES = "886-890", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216883"} @article{bb221884, AUTHOR = "Rodriguez Vaamonde, S. and Torresani, L. and Fitzgibbon, A.W.", TITLE = "What Can Pictures Tell Us About Web Pages? Improving Document Search Using Images", JOURNAL = PAMI, VOLUME = "37", YEAR = "2015", NUMBER = "6", MONTH = "June", PAGES = "1274-1285", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216884"} @article{bb221885, AUTHOR = "Tirkaz, C. and Eisenstein, J. and Sezgin, T.M. and Yanikoglu, B.A.", TITLE = "Identifying visual attributes for object recognition from text and taxonomy", JOURNAL = CVIU, VOLUME = "137", YEAR = "2015", NUMBER = "1", PAGES = "12-23", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216885"} @article{bb221886, AUTHOR = "Lu, Y.J. and Yang, L. and Yang, K. and Rui, Y.", TITLE = "Mining Latent Attributes From Click-Through Logs for Image Recognition", JOURNAL = MultMed, VOLUME = "17", YEAR = "2015", NUMBER = "8", MONTH = "August", PAGES = "1213-1224", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216886"} @article{bb221887, AUTHOR = "Fu, J. and Wang, J. and Rui, Y. and Wang, X. and Mei, T. and Lu, H.", TITLE = "Image Tag Refinement With View-Dependent Concept Representations", JOURNAL = CirSysVideo, VOLUME = "25", YEAR = "2015", NUMBER = "8", MONTH = "August", PAGES = "1409-1422", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216887"} @article{bb221888, AUTHOR = "Belongie, S. and Perona, P.", TITLE = "Visipedia circa 2015", JOURNAL = PRL, VOLUME = "72", YEAR = "2016", NUMBER = "1", PAGES = "15-24", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216888"} @article{bb221889, AUTHOR = "Niu, L. and Li, W. and Xu, D.", TITLE = "Exploiting Privileged Information from Web Data for Action and Event Recognition", JOURNAL = IJCV, VOLUME = "118", YEAR = "2016", NUMBER = "2", MONTH = "June", PAGES = "130-150", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216889"} @inproceedings{bb221890, AUTHOR = "Li, W. and Niu, L. and Xu, D.", TITLE = "Exploiting Privileged Information from Web Data for Image Categorization", BOOKTITLE = ECCV14, YEAR = "2014", PAGES = "V: 437-452", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216890"} @article{bb221891, AUTHOR = "Sejal, D. and Rashmi, V. and Venugopal, K.R. and Iyengar, S.S. and Patnaik, L.M.", TITLE = "Image recommendation based on keyword relevance using absorbing Markov chain and image features", JOURNAL = MultInfoRetr, VOLUME = "5", YEAR = "2016", NUMBER = "3", MONTH = "September", PAGES = "185-199", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216891"} @article{bb221892, AUTHOR = "Yan, Y. and Nie, F.P. and Li, W. and Gao, C.Q. and Yang, Y. and Xu, D.", TITLE = "Image Classification by Cross-Media Active Learning with Privileged Information", JOURNAL = MultMed, VOLUME = "18", YEAR = "2016", NUMBER = "12", MONTH = "December", PAGES = "2494-2502", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216892"} @article{bb221893, AUTHOR = "Qian, X. and Li, C. and Lan, K. and Hou, X. and Li, Z. and Han, J.", TITLE = "POI Summarization by Aesthetics Evaluation From Crowd Source Social Media", JOURNAL = IP, VOLUME = "27", YEAR = "2018", NUMBER = "3", MONTH = "March", PAGES = "1178-1189", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216893"} @article{bb221894, AUTHOR = "Li, Z.C. and Tang, J.H. and Mei, T.", TITLE = "Deep Collaborative Embedding for Social Image Understanding", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "9", MONTH = "Sep", PAGES = "2070-2083", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216894"} @article{bb221895, AUTHOR = "Zheng, C.M. and Wu, Z.W. and Wang, T. and Cai, Y. and Li, Q.", TITLE = "Object-Aware Multimodal Named Entity Recognition in Social Media Posts With Adversarial Learning", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2520-2532", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216895"} @article{bb221896, AUTHOR = "Park, J.Y. and Ryu, D.J. and Nam, K.W. and Jang, I. and Jang, M. and Lee, Y.", TITLE = "DeepDBSCAN: Deep Density-Based Clustering for Geo-Tagged Photos", JOURNAL = IJGI, VOLUME = "10", YEAR = "2021", NUMBER = "8", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216896"} @article{bb221897, AUTHOR = "Zheng, C. and Zhu, L. and Cheng, Z.Y. and Li, J.J. and Liu, A.A.", TITLE = "Adaptive Partial Multi-View Hashing for Efficient Social Image Retrieval", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "4079-4092", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216897"} @inproceedings{bb221898, AUTHOR = "Liu, Z.J. and Stent, S. and Li, J. and Gideon, J. and Han, S.", TITLE = "LocTex: Learning Data-Efficient Visual Representations from Localized Textual Supervision", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2147-2156", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216898"} @inproceedings{bb221899, AUTHOR = "Desai, K. and Johnson, J.", TITLE = "VirTex: Learning Visual Representations from Textual Annotations", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "11157-11168", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT216899"}