@inproceedings{bb234900, AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.", TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form Sentences", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10665-10674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229876"} @inproceedings{bb234901, AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.", TITLE = "Video Object Grounding Using Semantic Roles in Language Description", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10414-10424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229877"} @inproceedings{bb234902, AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.", TITLE = "Learning to Generate Grounded Visual Captions Without Localization Supervision", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVIII:353-370", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229878"} @inproceedings{bb234903, AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.", TITLE = "Dense Regression Network for Video Grounding", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10284-10293", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229879"} @inproceedings{bb234904, AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.", TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "III:752-768", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229880"} @inproceedings{bb234905, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIX:589-605", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229881"} @inproceedings{bb234906, AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.", TITLE = "Visual Relation Grounding in Videos", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "VI:447-464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229882"} @inproceedings{bb234907, AUTHOR = "Mun, J. and Cho, M. and Han, B.", TITLE = "Local-Global Video-Text Interactions for Temporal Grounding", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10807-10816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229883"} @inproceedings{bb234908, AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.", TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10213-10222", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229884"} @inproceedings{bb234909, AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.", TITLE = "Object Grounding via Iterative Context Reasoning", BOOKTITLE = MDALC19, YEAR = "2019", PAGES = "1407-1415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229885"} @inproceedings{bb234910, AUTHOR = "Sinha, A. and Akilesh, B. and Sarkar, M. and Krishnamurthy, B.", TITLE = "Attention Based Natural Language Grounding by Navigating Virtual Environment", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "236-244", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229886"} @inproceedings{bb234911, AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.", TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With Contextual Similarity and Visual Clustering Losses", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "10436-10444", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229887"} @inproceedings{bb234912, AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.", TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by Image-Caption Alignment", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2601-2610", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229888"} @inproceedings{bb234913, AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.", TITLE = "Modularized Textual Grounding for Counterfactual Resilience", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6371-6381", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229889"} @inproceedings{bb234914, AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.", TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery Through Dialogs and Queries", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4252-4261", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229890"} @inproceedings{bb234915, AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.", TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query Construction", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIV:387-404", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229891"} @inproceedings{bb234916, AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.", TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4672-4681", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229892"} @inproceedings{bb234917, AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.", TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4693-4702", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229893"} @inproceedings{bb234918, AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.", TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4682-4692", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229894"} @inproceedings{bb234919, AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.", TITLE = "Generating Descriptions with Grounded and Co-referenced People", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4196-4206", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229895"} @inproceedings{bb234920, AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.", TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by Watching Movies and Reading Books", BOOKTITLE = ICCV15, YEAR = "2015", PAGES = "19-27", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229896"} @article{bb234921, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Memex Question Answering", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "1893-1908", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229898"} @inproceedings{bb234922, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6135-6143", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229899"} @article{bb234923, AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.", TITLE = "Explaining VQA predictions using visual grounding and a knowledge base", JOURNAL = IVC, VOLUME = "101", YEAR = "2020", PAGES = "103968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229900"} @article{bb234924, AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.", TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New Benchmark and Strong Baseline", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "2935-2949", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229901"} @article{bb234925, AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.L.", TITLE = "Dual-decoder transformer network for answer grounding in visual question answering", JOURNAL = PRL, VOLUME = "171", YEAR = "2023", PAGES = "53-60", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229902"} @inproceedings{bb234926, AUTHOR = "Huang, J.Y. and Jia, B.X. and Wang, Y. and Zhu, Z.Y. and Linghu, X.K. and Li, Q. and Zhu, S.C. and Huang, S.Y.", TITLE = "Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "24570-24581", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229903"} @inproceedings{bb234927, AUTHOR = "Chen, K. and Wu, X.Q.", TITLE = "VTQA: Visual Text Question Answering via Entity Alignment and Cross-Media Reasoning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27208-27217", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229904"} @inproceedings{bb234928, AUTHOR = "Di, S.Z. and Xie, W.", TITLE = "Grounded Question-Answering in Long Egocentric Videos", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12934-12943", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229905"} @inproceedings{bb234929, AUTHOR = "Chen, C.Y. and Anjum, S. and Gurari, D.", TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding Answers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15269-15279", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229906"} @inproceedings{bb234930, AUTHOR = "Le, T.M. and Le, V. and Gupta, S.I. and Venkatesh, S. and Tran, T.", TITLE = "Guiding Visual Question Answering with Attention Priors", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4370-4379", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229907"} @inproceedings{bb234931, AUTHOR = "Khan, A.U. and Kuehne, H. and Gan, C. and da Vitoria Lobo, N. and Shah, M.", TITLE = "Weakly Supervised Grounding for VQA in Vision-Language Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:652-670", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229908"} @inproceedings{bb234932, AUTHOR = "Gupta, K. and Gautam, D. and Mamidi, R.", TITLE = "cViL: Cross-Lingual Training of Vision-Language Models using Knowledge Distillation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1734-1741", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229909"} @inproceedings{bb234933, AUTHOR = "Li, Y.C. and Wang, X. and Xiao, J.B. and Ji, W. and Chua, T.S.", TITLE = "Invariant Grounding for Video Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "2918-2927", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229910"} @inproceedings{bb234934, AUTHOR = "Lu, X.P. and Fan, Z. and Wang, Y. and Oh, J. and Rose, C.P.", TITLE = "Localize, Group, and Select: Boosting Text-VQA by Scene Text Modeling", BOOKTITLE = XSAnim21, YEAR = "2021", PAGES = "2631-2639", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229911"} @inproceedings{bb234935, AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.", TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question Answering using Capsules", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8461-8470", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229912"} @inproceedings{bb234936, AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.", TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10000-10008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229913"} @inproceedings{bb234937, AUTHOR = "Gouthaman, K.V. and Mittal, A.", TITLE = "Reducing Language Biases in Visual Question Answering with Visually-grounded Question Encoder", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIII:18-34", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229914"} @inproceedings{bb234938, AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.", TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative Applications", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "1426-1430", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229915"} @inproceedings{bb234939, AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.", TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language Models More Grounded", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2591-2600", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229916"} @inproceedings{bb234940, AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.", TITLE = "Interpretable Visual Question Answering by Visual Grounding From Attention Supervision Mining", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "349-357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229917"} @article{bb234941, AUTHOR = "Li, X. and Jiang, S.", TITLE = "Bundled Object Context for Referring Expressions", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "10", MONTH = "October", PAGES = "2749-2760", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229918"} @article{bb234942, AUTHOR = "Wang, J.M. and Cui, E. and Liu, K.L. and Sun, Y.K. and Liang, J.Y. and Yuan, C.M. and Duan, X.J. and Jin, G.H. and Chung, T.S.", TITLE = "Referring expression comprehension model with matching detection and linguistic feedback", JOURNAL = IET-CV, VOLUME = "14", YEAR = "2020", NUMBER = "8", MONTH = "December", PAGES = "625-633", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229919"} @article{bb234943, AUTHOR = "Qiao, Y.Y. and Deng, C.R. and Wu, Q.", TITLE = "Referring Expression Comprehension: A Survey of Methods and Datasets", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "4426-4440", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229920"} @article{bb234944, AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.", TITLE = "Variational Context: Exploiting Visual and Textual Context for Grounding Referring Expressions", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "1", MONTH = "January", PAGES = "347-359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229921"} @article{bb234945, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Relationship-Embedded Representation Learning for Grounding Referring Expressions", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "8", MONTH = "August", PAGES = "2765-2779", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229922"} @inproceedings{bb234946, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "4140-4149", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229923"} @article{bb234947, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.", TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring Expression Grounding", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "4189-4195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229924"} @article{bb234948, AUTHOR = "Lin, L. and Yan, P.X. and Xu, X.Q. and Yang, S. and Zeng, K. and Li, G.B.", TITLE = "Structured Attention Network for Referring Image Segmentation", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", NUMBER = "2022", PAGES = "1922-1932", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229925"} @article{bb234949, AUTHOR = "Yang, X. and Wang, H. and Xie, D. and Deng, C. and Tao, D.C.", TITLE = "Object-Agnostic Transformers for Video Referring Segmentation", JOURNAL = IP, VOLUME = "31", YEAR = "2022", NUMBER = "2022", PAGES = "2839-2849", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229926"} @article{bb234950, AUTHOR = "Wang, X. and Xie, D. and Zheng, Y.S.", TITLE = "Referring expression grounding by multi-context reasoning", JOURNAL = PRL, VOLUME = "160", YEAR = "2022", PAGES = "66-72", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229927"} @article{bb234951, AUTHOR = "Shen, H.T. and Chen, C. and Wang, P. and Gao, L.L. and Wang, M. and Song, J.K.", TITLE = "Continual Referring Expression Comprehension via Dual Modular Memorization", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "6694-6706", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229928"} @article{bb234952, AUTHOR = "Chen, Y.W. and Tsai, Y.H. and Yang, M.H.", TITLE = "Understanding Synonymous Referring Expressions via Contrastive Features", JOURNAL = IJCV, VOLUME = "130", YEAR = "2022", NUMBER = "10", MONTH = "October", PAGES = "2501-2516", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229929"} @article{bb234953, AUTHOR = "Suo, W. and Sun, M.Y. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "Rethinking and Improving Feature Pyramids for One-Stage Referring Expression Comprehension", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "854-864", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229930"} @article{bb234954, AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Li, Z.C. and Tian, Q. and Huang, Q.M.", TITLE = "Entity-Enhanced Adaptive Reconstruction Network for Weakly Supervised Referring Expression Grounding", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "3", MONTH = "March", PAGES = "3003-3018", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229931"} @inproceedings{bb234955, AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.", TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring Expression Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2611-2620", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229932"} @article{bb234956, AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.", TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention Network", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7654-7667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229933"} @inproceedings{bb234957, AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.", TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring Image Segmentation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "15501-15510", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229934"} @article{bb234958, AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.", TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "2491-2505", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229935"} @article{bb234959, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced Learning", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "1611-1621", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229936"} @article{bb234960, AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "A Proposal-Free One-Stage Framework for Referring Expression Comprehension and Generation via Dense Cross-Attention", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "2446-2458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229937"} @article{bb234961, AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.", TITLE = "Multi-level attention for referring expression comprehension", JOURNAL = PRL, VOLUME = "172", YEAR = "2023", PAGES = "252-258", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229938"} @article{bb234962, AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.", TITLE = "Unified Transformer with Isomorphic Branches for Natural Language Tracking", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "4529-4541", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229939"} @article{bb234963, AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With End-to-End Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "5999-6012", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229940"} @article{bb234964, AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.", TITLE = "Instance-Specific Feature Propagation for Referring Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3657-3667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229941"} @article{bb234965, AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.", TITLE = "Decoupling-Cooperative Framework for Referring Expression Comprehension", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1542-1546", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229942"} @article{bb234966, AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.", TITLE = "Multiple Relational Learning Network for Joint Referring Expression Comprehension and Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "8805-8816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229943"} @article{bb234967, AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.", TITLE = "InterREC: An Interpretable Method for Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "9330-9342", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229944"} @article{bb234968, AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2480-2492", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229945"} @article{bb234969, AUTHOR = "Ke, J.C. and Wang, J. and Wong, W.K. and Toomey, A. and Wen, J.", TITLE = "Graph-Based Group Division Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "6170-6183", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229946"} @article{bb234970, AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R.Z. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.", TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3937-3950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229947"} @article{bb234971, AUTHOR = "Luo, G. and Zhou, Y.Y. and Sun, J. and Sun, X.S. and Ji, R.R.", TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study of One-Stage Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3689-3700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229948"} @article{bb234972, AUTHOR = "Miao, P.H. and Su, W. and Wang, G.A. and Li, X.W. and Xi, L.", TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "1497-1507", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229949"} @article{bb234973, AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.", TITLE = "Unified Referring Expression Generation for Bounding Boxes and Segmentations", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "636-640", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229950"} @article{bb234974, AUTHOR = "Zhang, Y.J. and Li, Q.Z. and Pan, Y. and Zhao, X.G. and Tan, M.", TITLE = "Multi-Stage Image-Language Cross-Generative Fusion Network for Video-Based Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "3256-3270", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229951"} @article{bb234975, AUTHOR = "Lu, M.C. and Li, R.F. and Feng, F.X. and Ma, Z.Y. and Wang, X.J.", TITLE = "LGR-NET: Language Guided Reasoning Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "7771-7784", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229952"} @article{bb234976, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Wang, W. and Zhang, Z. and Shang, X.B.", TITLE = "Language conditioned multi-scale visual attention networks for visual grounding", JOURNAL = IVC, VOLUME = "150", YEAR = "2024", PAGES = "105242", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229953"} @article{bb234977, AUTHOR = "Ji, Z. and Wu, J. and Wang, Y.D. and Yang, A.P. and Han, J.G.", TITLE = "Progressive Semantic Reconstruction Network for Weakly Supervised Referring Expression Grounding", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "13058-13070", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229954"} @article{bb234978, AUTHOR = "Qiu, H.Q. and Wang, L.X. and Zhao, T. and Meng, F.M. and Wu, Q.B. and Li, H.L.", TITLE = "MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for Zero-Shot Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "754-768", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229955"} @article{bb234979, AUTHOR = "Ke, J.C. and Zhang, Q. and Wang, J. and Ding, H.Q. and Zhang, P.F. and Wen, J.", TITLE = "Graph-based referring expression comprehension with expression-guided selective filtering and noun-oriented reasoning", JOURNAL = PR, VOLUME = "161", YEAR = "2025", PAGES = "111222", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229956"} @article{bb234980, AUTHOR = "Ke, J.C. and Wang, D. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "Make Graph-Based Referring Expression Comprehension Great Again Through Expression-Guided Dynamic Gating and Regression", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "1950-1961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229957"} @article{bb234981, AUTHOR = "Huang, S.J. and Li, F. and Zhang, H. and Liu, S.L. and Zhang, L. and Wang, L.W.", TITLE = "A Mutual Supervision Framework for Referring Expression Segmentation and Generation", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "Psges 3597-3612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229958"} @article{bb234982, AUTHOR = "Ke, X. and Xu, P. and Guo, W.Z.", TITLE = "Language-Image Consistency Augmentation and Distillation Network for visual grounding", JOURNAL = PR, VOLUME = "166", YEAR = "2025", PAGES = "111663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229959"} @inproceedings{bb234983, AUTHOR = "Wang, Z.C. and Pan, Z.Y. and Peng, Z. and Cheng, J. and Xiao, L.W. and Jiang, W. and Cao, Z.G.", TITLE = "Exploring Contextual Attribute Density in Referring Expression Counting", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "19587-19596", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229960"} @inproceedings{bb234984, AUTHOR = "Chen, X. and Luo, Y.X. and Luo, G. and Ji, J.Y. and Ding, H.H. and Zhou, Y.", TITLE = "DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14347-14357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229961"} @inproceedings{bb234985, AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.", TITLE = "Learning Visual Grounding from Generative Vision and Language Model", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "8057-8067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229962"} @inproceedings{bb234986, AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.", TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "3107-3117", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229963"} @inproceedings{bb234987, AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.", TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression Comprehension with Cross-modality Attention", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "III: 131-147", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229964"} @inproceedings{bb234988, AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.", TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring Expression Segmentation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIV: 485-503", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229965"} @inproceedings{bb234989, AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.", TITLE = "Referring Expression Counting", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16985-16995", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229966"} @inproceedings{bb234990, AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.", TITLE = "Zero-Shot Referring Expression Comprehension via Structural Similarity Between Images and Captions", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14364-14375", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229967"} @inproceedings{bb234991, AUTHOR = "Su, W. and Miao, P.H. and Dou, H.Z. and Li, X.", TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively Scanning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13449-13458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229968"} @inproceedings{bb234992, AUTHOR = "Yu, Z.H. and Li, R.", TITLE = "Revisiting Counterfactual Problems in Referring Expression Comprehension", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13438-13448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229969"} @inproceedings{bb234993, AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.", TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models Under Perturbations", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "IX: 211-230", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229970"} @inproceedings{bb234994, AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.", TITLE = "Mask Grounding for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26563-26573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229971"} @inproceedings{bb234995, AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.", TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12903-12913", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229972"} @inproceedings{bb234996, AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.", TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12998-13008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229973"} @inproceedings{bb234997, AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.", TITLE = "Advancing Referring Expression Segmentation Beyond Single Image", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2628-2638", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229974"} @inproceedings{bb234998, AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.", TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person Perception of Ego4D", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15168-15178", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229975"} @inproceedings{bb234999, AUTHOR = "Qiao, Y. and Qi, Y.K. and Yu, Z. and Liu, J. and Wu, Q.", TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring Expression", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15712-15721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229976"}