@article{bb236200, AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.", TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention Network", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7654-7667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231175"} @inproceedings{bb236201, AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.", TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring Image Segmentation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "15501-15510", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231176"} @article{bb236202, AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.", TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "2491-2505", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231177"} @article{bb236203, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced Learning", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "1611-1621", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231178"} @article{bb236204, AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "A Proposal-Free One-Stage Framework for Referring Expression Comprehension and Generation via Dense Cross-Attention", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "2446-2458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231179"} @article{bb236205, AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.", TITLE = "Multi-level attention for referring expression comprehension", JOURNAL = PRL, VOLUME = "172", YEAR = "2023", PAGES = "252-258", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231180"} @article{bb236206, AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.", TITLE = "Unified Transformer with Isomorphic Branches for Natural Language Tracking", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "4529-4541", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231181"} @article{bb236207, AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With End-to-End Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "5999-6012", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231182"} @article{bb236208, AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.", TITLE = "Instance-Specific Feature Propagation for Referring Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3657-3667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231183"} @article{bb236209, AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.", TITLE = "Decoupling-Cooperative Framework for Referring Expression Comprehension", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1542-1546", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231184"} @article{bb236210, AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.", TITLE = "Multiple Relational Learning Network for Joint Referring Expression Comprehension and Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "8805-8816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231185"} @article{bb236211, AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.", TITLE = "InterREC: An Interpretable Method for Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "9330-9342", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231186"} @article{bb236212, AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2480-2492", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231187"} @article{bb236213, AUTHOR = "Ke, J.C. and Wang, J. and Wong, W.K. and Toomey, A. and Wen, J.", TITLE = "Graph-Based Group Division Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "6170-6183", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231188"} @article{bb236214, AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R.Z. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.", TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3937-3950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231189"} @article{bb236215, AUTHOR = "Luo, G. and Zhou, Y.Y. and Sun, J. and Sun, X.S. and Ji, R.R.", TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study of One-Stage Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3689-3700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231190"} @article{bb236216, AUTHOR = "Miao, P.H. and Su, W. and Wang, G.A. and Li, X.W. and Xi, L.", TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "1497-1507", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231191"} @article{bb236217, AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.", TITLE = "Unified Referring Expression Generation for Bounding Boxes and Segmentations", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "636-640", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231192"} @article{bb236218, AUTHOR = "Zhang, Y.J. and Li, Q.Z. and Pan, Y. and Zhao, X.G. and Tan, M.", TITLE = "Multi-Stage Image-Language Cross-Generative Fusion Network for Video-Based Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "3256-3270", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231193"} @article{bb236219, AUTHOR = "Lu, M.C. and Li, R.F. and Feng, F.X. and Ma, Z.Y. and Wang, X.J.", TITLE = "LGR-NET: Language Guided Reasoning Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "7771-7784", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231194"} @article{bb236220, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Wang, W. and Zhang, Z. and Shang, X.B.", TITLE = "Language conditioned multi-scale visual attention networks for visual grounding", JOURNAL = IVC, VOLUME = "150", YEAR = "2024", PAGES = "105242", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231195"} @article{bb236221, AUTHOR = "Ji, Z. and Wu, J. and Wang, Y.D. and Yang, A.P. and Han, J.G.", TITLE = "Progressive Semantic Reconstruction Network for Weakly Supervised Referring Expression Grounding", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "13058-13070", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231196"} @article{bb236222, AUTHOR = "Qiu, H.Q. and Wang, L.X. and Zhao, T. and Meng, F.M. and Wu, Q.B. and Li, H.L.", TITLE = "MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for Zero-Shot Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "754-768", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231197"} @article{bb236223, AUTHOR = "Ke, J.C. and Zhang, Q. and Wang, J. and Ding, H.Q. and Zhang, P.F. and Wen, J.", TITLE = "Graph-based referring expression comprehension with expression-guided selective filtering and noun-oriented reasoning", JOURNAL = PR, VOLUME = "161", YEAR = "2025", PAGES = "111222", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231198"} @article{bb236224, AUTHOR = "Ke, J.C. and Wang, D. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "Make Graph-Based Referring Expression Comprehension Great Again Through Expression-Guided Dynamic Gating and Regression", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "1950-1961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231199"} @article{bb236225, AUTHOR = "Huang, S.J. and Li, F. and Zhang, H. and Liu, S.L. and Zhang, L. and Wang, L.W.", TITLE = "A Mutual Supervision Framework for Referring Expression Segmentation and Generation", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "Psges 3597-3612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231200"} @article{bb236226, AUTHOR = "Ke, X. and Xu, P. and Guo, W.Z.", TITLE = "Language-Image Consistency Augmentation and Distillation Network for visual grounding", JOURNAL = PR, VOLUME = "166", YEAR = "2025", PAGES = "111663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231201"} @article{bb236227, AUTHOR = "Yang, X.Z. and Liu, J.Z. and Wang, P. and Wang, G.Q. and Yang, Y. and Shen, H.T.", TITLE = "New Dataset and Methods for Fine-Grained Compositional Referring Expression Comprehension via Specialist-MLLM Collaboration", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "10", MONTH = "October", PAGES = "8598-8612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231202"} @inproceedings{bb236228, AUTHOR = "Wang, Z.C. and Pan, Z.Y. and Peng, Z. and Cheng, J. and Xiao, L.W. and Jiang, W. and Cao, Z.G.", TITLE = "Exploring Contextual Attribute Density in Referring Expression Counting", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "19587-19596", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231203"} @inproceedings{bb236229, AUTHOR = "Chen, X. and Luo, Y.X. and Luo, G. and Ji, J.Y. and Ding, H.H. and Zhou, Y.", TITLE = "DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14347-14357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231204"} @inproceedings{bb236230, AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.", TITLE = "Learning Visual Grounding from Generative Vision and Language Model", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "8057-8067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231205"} @inproceedings{bb236231, AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.", TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "3107-3117", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231206"} @inproceedings{bb236232, AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.", TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression Comprehension with Cross-modality Attention", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "III: 131-147", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231207"} @inproceedings{bb236233, AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.", TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring Expression Segmentation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIV: 485-503", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231208"} @inproceedings{bb236234, AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.", TITLE = "Referring Expression Counting", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16985-16995", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231209"} @inproceedings{bb236235, AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.", TITLE = "Zero-Shot Referring Expression Comprehension via Structural Similarity Between Images and Captions", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14364-14375", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231210"} @inproceedings{bb236236, AUTHOR = "Su, W. and Miao, P.H. and Dou, H.Z. and Li, X.", TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively Scanning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13449-13458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231211"} @inproceedings{bb236237, AUTHOR = "Yu, Z.H. and Li, R.", TITLE = "Revisiting Counterfactual Problems in Referring Expression Comprehension", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13438-13448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231212"} @inproceedings{bb236238, AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.", TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models Under Perturbations", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "IX: 211-230", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231213"} @inproceedings{bb236239, AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.", TITLE = "Mask Grounding for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26563-26573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231214"} @inproceedings{bb236240, AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.", TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12903-12913", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231215"} @inproceedings{bb236241, AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.", TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12998-13008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231216"} @inproceedings{bb236242, AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.", TITLE = "Advancing Referring Expression Segmentation Beyond Single Image", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2628-2638", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231217"} @inproceedings{bb236243, AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.", TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person Perception of Ego4D", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15168-15178", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231218"} @inproceedings{bb236244, AUTHOR = "Qiao, Y. and Qi, Y.K. and Yu, Z. and Liu, J. and Wu, Q.", TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring Expression", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15712-15721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231219"} @inproceedings{bb236245, AUTHOR = "Chen, Y. and Du, R. and Liang, K.M. and Ma, Z.Y.", TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "3060-3064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231220"} @inproceedings{bb236246, AUTHOR = "Sun, J. and Luo, G. and Zhou, Y.Y. and Sun, X.S. and Jiang, G.N. and Wang, Z.Y. and Ji, R.R.", TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring Expression Comprehension", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19144-19154", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231221"} @inproceedings{bb236247, AUTHOR = "Tang, J.J. and Zheng, G. and Shi, C. and Yang, S.", TITLE = "Contrastive Grouping with Transformer for Referring Image Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23570-23580", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231222"} @inproceedings{bb236248, AUTHOR = "Liu, J. and Ding, H. and Cai, Z.W. and Zhang, Y.T. and Satzoda, R.K. and Mahadevan, V. and Manmatha, R.", TITLE = "PolyFormer: Referring Image Segmentation as Sequential Polygon Generation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18653-18663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231223"} @inproceedings{bb236249, AUTHOR = "Xu, L. and Huang, M.H. and Shang, X. and Yuan, Z.H. and Sun, Y. and Liu, J.", TITLE = "Meta Compositional Referring Expression Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19478-19487", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231224"} @inproceedings{bb236250, AUTHOR = "Liu, C. and Ding, H.H. and Jiang, X.D.", TITLE = "GRES: Generalized Referring Expression Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23592-23601", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231225"} @inproceedings{bb236251, AUTHOR = "Song, S. and Lin, X.D. and Liu, J.Y. and Guo, Z.M. and Chang, S.F.", TITLE = "Co-Grounding Networks with Semantic Attention for Referring Expression Comprehension in Videos", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "1346-1355", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231226"} @inproceedings{bb236252, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G.", TITLE = "Iterative Shrinking for Referring Expression Grounding Using Deep Reinforcement Learning", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "14055-14064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231227"} @inproceedings{bb236253, AUTHOR = "Zhu, H.D. and Sadhu, A. and Zheng, Z.H. and Nevatia, R.", TITLE = "Utilizing Every Image Object for Semi-supervised Phrase Grounding", BOOKTITLE = WACV21, YEAR = "2021", PAGES = "2209-2218", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231228"} @inproceedings{bb236254, AUTHOR = "Wang, P. and Wu, Q. and Cao, J.W. and Shen, C.H. and Gao, L.L. and van den Hengel, A.J.", TITLE = "Neighbourhood Watch: Referring Expression Comprehension via Language-Guided Graph Attention Networks", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1960-1968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231229"} @inproceedings{bb236255, AUTHOR = "Tanaka, M. and Itamochi, T. and Narioka, K. and Sato, I. and Ushiku, Y. and Harada, T.", TITLE = "Generating Easy-to-Understand Referring Expressions for Target Identifications", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "5793-5802", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231230"} @inproceedings{bb236256, AUTHOR = "Yang, S.B. and Li, G.B. and Yu, Y.Z.", TITLE = "Dynamic Graph Attention for Referring Expression Comprehension", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4643-4652", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231231"} @inproceedings{bb236257, AUTHOR = "Zhang, H.W. and Niu, Y.L. and Chang, S.F.", TITLE = "Grounding Referring Expressions in Images by Variational Context", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4158-4166", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231232"} @inproceedings{bb236258, AUTHOR = "Yu, L.C. and Lin, Z. and Shen, X.H. and Yang, J.M. and Lu, X. and Bansal, M. and Berg, T.L.", TITLE = "MAttNet: Modular Attention Network for Referring Expression Comprehension", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "1307-1315", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231233"} @inproceedings{bb236259, AUTHOR = "Luo, R. and Shakhnarovich, G.", TITLE = "Comprehension-Guided Referring Expressions", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3125-3134", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT231234"} @article{bb236260, AUTHOR = "Tung, F. and Mori, G.", TITLE = "Deep Neural Network Compression by In-Parallel Pruning-Quantization", JOURNAL = PAMI, VOLUME = "42", YEAR = "2020", NUMBER = "3", MONTH = "March", PAGES = "568-579", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231235"} @inproceedings{bb236261, AUTHOR = "Tung, F. and Mori, G.", TITLE = "CLIP-Q: Deep Network Compression Learning by In-parallel Pruning-Quantization", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7873-7882", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231236"} @article{bb236262, AUTHOR = "Kwon, G. and Ye, J.C.", TITLE = "One-Shot Adaptation of GAN in Just One CLIP", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12179-12191", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231237"} @article{bb236263, AUTHOR = "Han, B.H. and Jiang, X.Y. and Fang, Z.J. and Fujita, H. and Gao, Y.B.", TITLE = "F-SCP: An automatic prompt generation method for specific classes based on visual language pre-training models", JOURNAL = PR, VOLUME = "147", YEAR = "2024", PAGES = "110096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231238"} @article{bb236264, AUTHOR = "Liu, B. and Lu, D.H. and Wei, D. and Wu, X. and Wang, Y. and Zhang, Y. and Zheng, Y.F.", TITLE = "Improving Medical Vision-Language Contrastive Pretraining with Semantics-Aware Triage", JOURNAL = MedImg, VOLUME = "42", YEAR = "2023", NUMBER = "12", MONTH = "December", PAGES = "3579-3589", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231239"} @article{bb236265, AUTHOR = "Gao, P. and Geng, S.J. and Zhang, R.R. and Ma, T. and Fang, R.Y. and Zhang, Y.F. and Li, H.S. and Qiao, Y.", TITLE = "CLIP-Adapter: Better Vision-Language Models with Feature Adapters", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "581-595", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231240"} @inproceedings{bb236266, AUTHOR = "Liu, Y.H. and He, J.W. and Gu, J.J. and Kong, X.T. and Qiao, Y. and Dong, C.", TITLE = "DegAE: A New Pretraining Paradigm for Low-Level Vision", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23292-23303", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231241"} @article{bb236267, AUTHOR = "Dong, S. and Wang, L. and Du, B. and Meng, X.L.", TITLE = "ChangeCLIP: Remote sensing change detection with multimodal vision-language representation learning", JOURNAL = PandRS, VOLUME = "208", YEAR = "2024", PAGES = "53-69", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231242"} @article{bb236268, AUTHOR = "Peng, F. and Yang, X.S. and Xiao, L.H. and Wang, Y.W. and Xu, C.S.", TITLE = "SgVA-CLIP: Semantic-Guided Visual Adapting of Vision-Language Models for Few-Shot Image Classification", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3469-3480", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231243"} @article{bb236269, AUTHOR = "Guo, Y.F. and Chen, Y.P. and Ma, Z.", TITLE = "NeuroCLIP: Neuromorphic Data Understanding by CLIP and SNN", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "246-250", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231244"} @article{bb236270, AUTHOR = "Xing, Y.H. and Wu, Q. and Cheng, D. and Zhang, S.Z. and Liang, G.Q. and Wang, P. and Zhang, Y.N.", TITLE = "Dual Modality Prompt Tuning for Vision-Language Pre-Trained Model", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2056-2068", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231245"} @article{bb236271, AUTHOR = "Xiao, L.H. and Yang, X.S. and Peng, F. and Yan, M. and Wang, Y.W. and Xu, C.S.", TITLE = "CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4334-4347", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231246"} @article{bb236272, AUTHOR = "Zhang, K. and Yang, Y. and Yu, J. and Jiang, H.L. and Fan, J.P. and Huang, Q.M. and Han, W.D.", TITLE = "Multi-Task Paired Masking With Alignment Modeling for Medical Vision-Language Pre-Training", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4706-4721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231247"} @article{bb236273, AUTHOR = "Zara, G. and Turrisi da Costa, V.G. and Roy, S. and Rota, P. and Ricci, E.", TITLE = "Simplifying open-set video domain adaptation with contrastive learning", JOURNAL = CVIU, VOLUME = "241", YEAR = "2024", PAGES = "103953", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231248"} @inproceedings{bb236274, AUTHOR = "Zara, G. and Roy, S. and Rota, P. and Ricci, E.", TITLE = "AutoLabel: CLIP-based framework for Open-Set Video Domain Adaptation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11504-11513", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231249"} @article{bb236275, AUTHOR = "Wang, X.H. and Wang, W.G. and Shao, J.Y. and Yang, Y.", TITLE = "Learning to Follow and Generate Instructions for Language-Capable Navigation", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3334-3350", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231250"} @article{bb236276, AUTHOR = "Zhang, W.B. and Zhang, Y.F. and Deng, Y.Y. and Zhang, W.L. and Lin, J.F. and Huang, B.Q. and Zhang, J. and Yu, W.H.", TITLE = "Ta-Adapter: Enhancing few-shot CLIP with task-aware encoders", JOURNAL = PR, VOLUME = "153", YEAR = "2024", PAGES = "110559", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231251"} @article{bb236277, AUTHOR = "Wu, Z.X. and Weng, Z. and Peng, W. and Yang, X.T. and Li, A. and Davis, L.S. and Jiang, Y.G.", TITLE = "Building an Open-Vocabulary Video CLIP Model With Better Architectures, Optimization and Data", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "7", MONTH = "July", PAGES = "4747-4762", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231252"} @article{bb236278, AUTHOR = "Yu, W.W. and Liu, Y.L. and Zhu, X.K. and Cao, H.Y. and Sun, X. and Bai, X.", TITLE = "Turning a CLIP Model Into a Scene Text Spotter", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "6040-6054", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231253"} @inproceedings{bb236279, AUTHOR = "Yu, W.W. and Liu, Y.L. and Hua, W. and Jiang, D.Q. and Ren, B. and Bai, X.", TITLE = "Turning a CLIP Model into a Scene Text Detector", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6978-6988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231254"} @article{bb236280, AUTHOR = "Cheng, H. and Ye, H.H. and Zhou, X.F. and Liu, X.M. and Chen, F. and Wang, M.Q.", TITLE = "Vision-language pre-training via modal interaction", JOURNAL = PR, VOLUME = "156", YEAR = "2024", PAGES = "110809", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231255"} @article{bb236281, AUTHOR = "Kong, J. and Wang, J. and Yu, L.C. and Zhang, X.J.", TITLE = "Multimodality Self-distillation for Fast Inference of Vision and Language Pretrained Models", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8928-8940", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231256"} @article{bb236282, AUTHOR = "Yang, Z.Q. and An, G. and Zheng, Z.X. and Cao, S. and Ruan, Q.Q.", TITLE = "GBC: Guided Alignment and Adaptive Boosting CLIP Bridging Vision and Language for Robust Action Recognition", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "8172-8187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231257"} @article{bb236283, AUTHOR = "Lin, X. and Zhu, M.H. and Dang, R.H. and Zhou, G.L. and Shu, S.L. and Lin, F. and Liu, C.J. and Chen, Q.J.", TITLE = "CLIPose: Category-Level Object Pose Estimation With Pre-Trained Vision-Language Knowledge", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "10", MONTH = "October", PAGES = "9125-9138", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231258"} @article{bb236284, AUTHOR = "Huang, Z.Y. and Yang, M. and Xiao, X.Y. and Hu, P. and Peng, X.", TITLE = "Noise-Robust Vision-Language Pre-Training With Positive-Negative Learning", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "338-350", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231259"} @article{bb236285, AUTHOR = "Yao, D.S. and Zhu, M.Q. and Zhu, H. and Cai, W.Q. and Zhou, L.", TITLE = "Integrating synthetic datasets with CLIP semantic insights for single image localization advancements", JOURNAL = PandRS, VOLUME = "218", YEAR = "2024", PAGES = "198-213", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231260"} @article{bb236286, AUTHOR = "Jha, A. and Singha, M. and Bhattacharya, A. and Banerjee, B.", TITLE = "RS3Lip: Consistency for remote sensing image classification on part embeddings using self-supervised learning and CLIP", JOURNAL = CVIU, VOLUME = "251", YEAR = "2025", PAGES = "104254", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231261"} @article{bb236287, AUTHOR = "Wang, X. and Jin, J.D. and Li, C.L. and Tang, J. and Zhang, C. and Wang, W.", TITLE = "Pedestrian Attribute Recognition via CLIP-Based Prompt Vision-Language Fusion", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "148-161", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231262"} @inproceedings{bb236288, AUTHOR = "Zhu, J. and Jin, J.D. and Yang, Z.H. and Wu, X.H. and Wang, X.", TITLE = "Learning CLIP Guided Visual-Text Fusion Transformer for Video-based Pedestrian Attribute Recognition", BOOKTITLE = NFVLR23, YEAR = "2023", PAGES = "2626-2629", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231263"} @article{bb236289, AUTHOR = "Jin, W.Q. and Qu, M.X. and Shi, C. and Zhao, Y. and Wei, Y.C.", TITLE = "RelFormer: Advancing contextual relations for transformer-based dense captioning", JOURNAL = CVIU, VOLUME = "252", YEAR = "2025", PAGES = "104300", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231264"} @article{bb236290, AUTHOR = "Wu, Y.J. and Zhou, Y. and Saiyin, J. and Wei, B.Z. and Lai, M. and Shou, J.Z. and Xu, Y.", TITLE = "AttriPrompter: Auto-Prompting With Attribute Semantics for Zero-Shot Nuclei Detection via Visual-Language Pre-Trained Models", JOURNAL = MedImg, VOLUME = "44", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "982-993", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231265"} @article{bb236291, AUTHOR = "Ma, Q.J. and Yang, S.Q. and Zhang, L.J. and Lan, Q. and Yang, D.D. and Chen, H. and Tan, Y.", TITLE = "APOVIS: Automated pixel-level open-vocabulary instance segmentation through integration of pre-trained vision-language models and foundational segmentation models", JOURNAL = IVC, VOLUME = "154", YEAR = "2025", PAGES = "105384", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231266"} @article{bb236292, AUTHOR = "Zhang, W.Y. and Shen, L. and Foo, C.S.", TITLE = "Source-Free Domain Adaptation Guided by Vision and Vision-Language Pre-training", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "844-866", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231267"} @article{bb236293, AUTHOR = "Zhang, Y.W. and Wang, J. and Tang, H.Y. and Qin, R.H.", TITLE = "DALSCLIP: Domain aggregation via learning stronger domain-invariant features for CLIP", JOURNAL = IVC, VOLUME = "154", YEAR = "2025", PAGES = "105359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231268"} @article{bb236294, AUTHOR = "Liu, Y. and Wang, X.L. and Zhu, M.Z. and Cao, Y. and Huang, T.J. and Shen, C.H.", TITLE = "Masked Channel Modeling for Bootstrapping Visual Pre-training", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "2", MONTH = "February", PAGES = "760-780", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231269"} @article{bb236295, AUTHOR = "Li, Y. and Wang, H.L. and Duan, Y.Q. and Zhang, J.H. and Li, X.M.", TITLE = "A closer look at the explainability of Contrastive language-image pre-training", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111409", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231270"} @article{bb236296, AUTHOR = "Zhang, Z.K. and Gao, B. and Ye, J.R. and Jin, H. and Jiang, L.H. and Yang, W.M.", TITLE = "CLIP prior-guided 3D open-vocabulary occupancy prediction", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111347", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231271"} @article{bb236297, AUTHOR = "Cai, S.Q. and Liu, X. and Yuan, J.L. and Zhou, Q.H.", TITLE = "Prompt-Ladder: Memory-efficient prompt tuning for vision-language models on edge devices", JOURNAL = PR, VOLUME = "163", YEAR = "2025", PAGES = "111460", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231272"} @article{bb236298, AUTHOR = "Ren, H.R. and Tang, F. and Zheng, H.J. and Zhao, H. and Guo, D.D. and Chang, Y.", TITLE = "Modality-Consistent Prompt Tuning With Optimal Transport", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "3", MONTH = "March", PAGES = "2499-2512", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231273"} @article{bb236299, AUTHOR = "Cao, Y.K. and Xu, X.H. and Cheng, Y.Q. and Sun, C. and Du, Z.W. and Gao, L. and Shen, W.M.", TITLE = "Personalizing Vision-Language Models With Hybrid Prompts for Zero-Shot Anomaly Detection", JOURNAL = Cyber, VOLUME = "55", YEAR = "2025", NUMBER = "4", MONTH = "April", PAGES = "1917-1929", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803clip3.html#TT231274"}