@inproceedings{bb231900, AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H.W. and Qiu, Q.R.", TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language Queries at Phrase Level", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "8275-8282", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226879"} @inproceedings{bb231901, AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.", TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form Sentences", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10665-10674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226880"} @inproceedings{bb231902, AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.", TITLE = "Video Object Grounding Using Semantic Roles in Language Description", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10414-10424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226881"} @inproceedings{bb231903, AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.", TITLE = "Learning to Generate Grounded Visual Captions Without Localization Supervision", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVIII:353-370", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226882"} @inproceedings{bb231904, AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.", TITLE = "Dense Regression Network for Video Grounding", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10284-10293", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226883"} @inproceedings{bb231905, AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.", TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "III:752-768", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226884"} @inproceedings{bb231906, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIX:589-605", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226885"} @inproceedings{bb231907, AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.", TITLE = "Visual Relation Grounding in Videos", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "VI:447-464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226886"} @inproceedings{bb231908, AUTHOR = "Mun, J. and Cho, M. and Han, B.", TITLE = "Local-Global Video-Text Interactions for Temporal Grounding", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10807-10816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226887"} @inproceedings{bb231909, AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.", TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10213-10222", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226888"} @inproceedings{bb231910, AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.", TITLE = "Object Grounding via Iterative Context Reasoning", BOOKTITLE = MDALC19, YEAR = "2019", PAGES = "1407-1415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226889"} @inproceedings{bb231911, AUTHOR = "Sinha, A. and Akilesh, B. and Sarkar, M. and Krishnamurthy, B.", TITLE = "Attention Based Natural Language Grounding by Navigating Virtual Environment", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "236-244", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226890"} @inproceedings{bb231912, AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.", TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With Contextual Similarity and Visual Clustering Losses", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "10436-10444", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226891"} @inproceedings{bb231913, AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.", TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by Image-Caption Alignment", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2601-2610", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226892"} @inproceedings{bb231914, AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.", TITLE = "Modularized Textual Grounding for Counterfactual Resilience", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6371-6381", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226893"} @inproceedings{bb231915, AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.", TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery Through Dialogs and Queries", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4252-4261", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226894"} @inproceedings{bb231916, AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.", TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query Construction", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIV:387-404", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226895"} @inproceedings{bb231917, AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.", TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4672-4681", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226896"} @inproceedings{bb231918, AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.", TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4693-4702", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226897"} @inproceedings{bb231919, AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.", TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4682-4692", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226898"} @inproceedings{bb231920, AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.", TITLE = "Generating Descriptions with Grounded and Co-referenced People", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4196-4206", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226899"} @inproceedings{bb231921, AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.", TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by Watching Movies and Reading Books", BOOKTITLE = ICCV15, YEAR = "2015", PAGES = "19-27", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226900"} @article{bb231922, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Memex Question Answering", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "1893-1908", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226902"} @inproceedings{bb231923, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6135-6143", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226903"} @article{bb231924, AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.", TITLE = "Explaining VQA predictions using visual grounding and a knowledge base", JOURNAL = IVC, VOLUME = "101", YEAR = "2020", PAGES = "103968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226904"} @article{bb231925, AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.", TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New Benchmark and Strong Baseline", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "2935-2949", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226905"} @article{bb231926, AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.L.", TITLE = "Dual-decoder transformer network for answer grounding in visual question answering", JOURNAL = PRL, VOLUME = "171", YEAR = "2023", PAGES = "53-60", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226906"} @inproceedings{bb231927, AUTHOR = "Chen, K. and Wu, X.Q.", TITLE = "VTQA: Visual Text Question Answering via Entity Alignment and Cross-Media Reasoning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27208-27217", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226907"} @inproceedings{bb231928, AUTHOR = "Di, S.Z. and Xie, W.", TITLE = "Grounded Question-Answering in Long Egocentric Videos", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12934-12943", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226908"} @inproceedings{bb231929, AUTHOR = "Chen, C.Y. and Anjum, S. and Gurari, D.", TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding Answers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15269-15279", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226909"} @inproceedings{bb231930, AUTHOR = "Le, T.M. and Le, V. and Gupta, S.I. and Venkatesh, S. and Tran, T.", TITLE = "Guiding Visual Question Answering with Attention Priors", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4370-4379", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226910"} @inproceedings{bb231931, AUTHOR = "Khan, A.U. and Kuehne, H. and Gan, C. and da Vitoria Lobo, N. and Shah, M.", TITLE = "Weakly Supervised Grounding for VQA in Vision-Language Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:652-670", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226911"} @inproceedings{bb231932, AUTHOR = "Gupta, K. and Gautam, D. and Mamidi, R.", TITLE = "cViL: Cross-Lingual Training of Vision-Language Models using Knowledge Distillation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1734-1741", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226912"} @inproceedings{bb231933, AUTHOR = "Li, Y.C. and Wang, X. and Xiao, J.B. and Ji, W. and Chua, T.S.", TITLE = "Invariant Grounding for Video Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "2918-2927", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226913"} @inproceedings{bb231934, AUTHOR = "Lu, X.P. and Fan, Z. and Wang, Y. and Oh, J. and Rose, C.P.", TITLE = "Localize, Group, and Select: Boosting Text-VQA by Scene Text Modeling", BOOKTITLE = XSAnim21, YEAR = "2021", PAGES = "2631-2639", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226914"} @inproceedings{bb231935, AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.", TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question Answering using Capsules", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8461-8470", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226915"} @inproceedings{bb231936, AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.", TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10000-10008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226916"} @inproceedings{bb231937, AUTHOR = "Gouthaman, K.V. and Mittal, A.", TITLE = "Reducing Language Biases in Visual Question Answering with Visually-grounded Question Encoder", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIII:18-34", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226917"} @inproceedings{bb231938, AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.", TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative Applications", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "1426-1430", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226918"} @inproceedings{bb231939, AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.", TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language Models More Grounded", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2591-2600", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226919"} @inproceedings{bb231940, AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.", TITLE = "Interpretable Visual Question Answering by Visual Grounding From Attention Supervision Mining", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "349-357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226920"} @article{bb231941, AUTHOR = "Li, X. and Jiang, S.", TITLE = "Bundled Object Context for Referring Expressions", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "10", MONTH = "October", PAGES = "2749-2760", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226921"} @article{bb231942, AUTHOR = "Wang, J.M. and Cui, E. and Liu, K.L. and Sun, Y.K. and Liang, J.Y. and Yuan, C.M. and Duan, X.J. and Jin, G.H. and Chung, T.S.", TITLE = "Referring expression comprehension model with matching detection and linguistic feedback", JOURNAL = IET-CV, VOLUME = "14", YEAR = "2020", NUMBER = "8", MONTH = "December", PAGES = "625-633", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226922"} @article{bb231943, AUTHOR = "Qiao, Y.Y. and Deng, C.R. and Wu, Q.", TITLE = "Referring Expression Comprehension: A Survey of Methods and Datasets", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "4426-4440", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226923"} @article{bb231944, AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.", TITLE = "Variational Context: Exploiting Visual and Textual Context for Grounding Referring Expressions", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "1", MONTH = "January", PAGES = "347-359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226924"} @article{bb231945, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Relationship-Embedded Representation Learning for Grounding Referring Expressions", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "8", MONTH = "August", PAGES = "2765-2779", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226925"} @inproceedings{bb231946, AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.", TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "4140-4149", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226926"} @article{bb231947, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.", TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring Expression Grounding", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "4189-4195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226927"} @article{bb231948, AUTHOR = "Lin, L. and Yan, P.X. and Xu, X.Q. and Yang, S. and Zeng, K. and Li, G.B.", TITLE = "Structured Attention Network for Referring Image Segmentation", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", NUMBER = "2022", PAGES = "1922-1932", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226928"} @article{bb231949, AUTHOR = "Yang, X. and Wang, H. and Xie, D. and Deng, C. and Tao, D.C.", TITLE = "Object-Agnostic Transformers for Video Referring Segmentation", JOURNAL = IP, VOLUME = "31", YEAR = "2022", NUMBER = "2022", PAGES = "2839-2849", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226929"} @article{bb231950, AUTHOR = "Wang, X. and Xie, D. and Zheng, Y.S.", TITLE = "Referring expression grounding by multi-context reasoning", JOURNAL = PRL, VOLUME = "160", YEAR = "2022", PAGES = "66-72", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226930"} @article{bb231951, AUTHOR = "Shen, H.T. and Chen, C. and Wang, P. and Gao, L.L. and Wang, M. and Song, J.K.", TITLE = "Continual Referring Expression Comprehension via Dual Modular Memorization", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "6694-6706", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226931"} @article{bb231952, AUTHOR = "Chen, Y.W. and Tsai, Y.H. and Yang, M.H.", TITLE = "Understanding Synonymous Referring Expressions via Contrastive Features", JOURNAL = IJCV, VOLUME = "130", YEAR = "2022", NUMBER = "10", MONTH = "October", PAGES = "2501-2516", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226932"} @article{bb231953, AUTHOR = "Suo, W. and Sun, M.Y. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "Rethinking and Improving Feature Pyramids for One-Stage Referring Expression Comprehension", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "854-864", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226933"} @article{bb231954, AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Li, Z.C. and Tian, Q. and Huang, Q.M.", TITLE = "Entity-Enhanced Adaptive Reconstruction Network for Weakly Supervised Referring Expression Grounding", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "3", MONTH = "March", PAGES = "3003-3018", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226934"} @inproceedings{bb231955, AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.", TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring Expression Grounding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2611-2620", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226935"} @article{bb231956, AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.", TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention Network", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7654-7667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226936"} @inproceedings{bb231957, AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.", TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring Image Segmentation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "15501-15510", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226937"} @article{bb231958, AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.", TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "2491-2505", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226938"} @article{bb231959, AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced Learning", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "1611-1621", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226939"} @article{bb231960, AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "A Proposal-Free One-Stage Framework for Referring Expression Comprehension and Generation via Dense Cross-Attention", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "2446-2458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226940"} @article{bb231961, AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.", TITLE = "Multi-level attention for referring expression comprehension", JOURNAL = PRL, VOLUME = "172", YEAR = "2023", PAGES = "252-258", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226941"} @article{bb231962, AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.", TITLE = "Unified Transformer with Isomorphic Branches for Natural Language Tracking", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "4529-4541", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226942"} @article{bb231963, AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.", TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With End-to-End Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "5999-6012", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226943"} @article{bb231964, AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.", TITLE = "Instance-Specific Feature Propagation for Referring Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3657-3667", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226944"} @article{bb231965, AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.", TITLE = "Decoupling-Cooperative Framework for Referring Expression Comprehension", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1542-1546", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226945"} @article{bb231966, AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.", TITLE = "Multiple Relational Learning Network for Joint Referring Expression Comprehension and Segmentation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "8805-8816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226946"} @article{bb231967, AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.", TITLE = "InterREC: An Interpretable Method for Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "9330-9342", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226947"} @article{bb231968, AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2480-2492", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226948"} @article{bb231969, AUTHOR = "Ke, J.C. and Wang, J. and Wong, W.K. and Toomey, A. and Wen, J.", TITLE = "Graph-Based Group Division Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "6170-6183", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226949"} @article{bb231970, AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.", TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3937-3950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226950"} @article{bb231971, AUTHOR = "Luo, G. and Zhou, Y. and Sun, J. and Sun, X.S. and Ji, R.R.", TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study of One-Stage Referring Expression Comprehension", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3689-3700", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226951"} @article{bb231972, AUTHOR = "Miao, P.H. and Su, W. and Wang, G.A. and Li, X.W. and Xi, L.", TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "1497-1507", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226952"} @article{bb231973, AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.", TITLE = "Unified Referring Expression Generation for Bounding Boxes and Segmentations", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "636-640", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226953"} @article{bb231974, AUTHOR = "Zhang, Y.J. and Li, Q.Z. and Pan, Y. and Zhao, X.G. and Tan, M.", TITLE = "Multi-Stage Image-Language Cross-Generative Fusion Network for Video-Based Referring Expression Comprehension", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "3256-3270", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226954"} @article{bb231975, AUTHOR = "Lu, M.C. and Li, R.F. and Feng, F.X. and Ma, Z.Y. and Wang, X.J.", TITLE = "LGR-NET: Language Guided Reasoning Network for Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "7771-7784", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226955"} @article{bb231976, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Wang, W. and Zhang, Z. and Shang, X.B.", TITLE = "Language conditioned multi-scale visual attention networks for visual grounding", JOURNAL = IVC, VOLUME = "150", YEAR = "2024", PAGES = "105242", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226956"} @article{bb231977, AUTHOR = "Ji, Z. and Wu, J. and Wang, Y. and Yang, A. and Han, J.G.", TITLE = "Progressive Semantic Reconstruction Network for Weakly Supervised Referring Expression Grounding", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "13058-13070", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226957"} @article{bb231978, AUTHOR = "Qiu, H.Q. and Wang, L.X. and Zhao, T. and Meng, F.M. and Wu, Q.B. and Li, H.L.", TITLE = "MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for Zero-Shot Referring Expression Comprehension", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "754-768", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226958"} @article{bb231979, AUTHOR = "Ke, J.C. and Zhang, Q. and Wang, J. and Ding, H.Q. and Zhang, P.F. and Wen, J.", TITLE = "Graph-based referring expression comprehension with expression-guided selective filtering and noun-oriented reasoning", JOURNAL = PR, VOLUME = "161", YEAR = "2025", PAGES = "111222", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226959"} @article{bb231980, AUTHOR = "Ke, J.C. and Wang, D. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.", TITLE = "Make Graph-Based Referring Expression Comprehension Great Again Through Expression-Guided Dynamic Gating and Regression", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "1950-1961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226960"} @article{bb231981, AUTHOR = "Huang, S.J. and Li, F. and Zhang, H. and Liu, S.L. and Zhang, L. and Wang, L.W.", TITLE = "A Mutual Supervision Framework for Referring Expression Segmentation and Generation", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "Psges 3597-3612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226961"} @article{bb231982, AUTHOR = "Ke, X. and Xu, P. and Guo, W.Z.", TITLE = "Language-Image Consistency Augmentation and Distillation Network for visual grounding", JOURNAL = PR, VOLUME = "166", YEAR = "2025", PAGES = "111663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226962"} @inproceedings{bb231983, AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.", TITLE = "Learning Visual Grounding from Generative Vision and Language Model", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "8057-8067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226963"} @inproceedings{bb231984, AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.", TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "3107-3117", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226964"} @inproceedings{bb231985, AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.", TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression Comprehension with Cross-modality Attention", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "III: 131-147", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226965"} @inproceedings{bb231986, AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.", TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring Expression Segmentation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLIV: 485-503", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226966"} @inproceedings{bb231987, AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.", TITLE = "Referring Expression Counting", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16985-16995", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226967"} @inproceedings{bb231988, AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.", TITLE = "Zero-Shot Referring Expression Comprehension via Structural Similarity Between Images and Captions", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14364-14375", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226968"} @inproceedings{bb231989, AUTHOR = "Su, W. and Miao, P. and Dou, H.Z. and Li, X.", TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively Scanning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13449-13458", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226969"} @inproceedings{bb231990, AUTHOR = "Yu, Z.H. and Li, R.", TITLE = "Revisiting Counterfactual Problems in Referring Expression Comprehension", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13438-13448", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226970"} @inproceedings{bb231991, AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.", TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models Under Perturbations", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "IX: 211-230", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226971"} @inproceedings{bb231992, AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.", TITLE = "Mask Grounding for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26563-26573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226972"} @inproceedings{bb231993, AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.", TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12903-12913", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226973"} @inproceedings{bb231994, AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.", TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12998-13008", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226974"} @inproceedings{bb231995, AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.", TITLE = "Advancing Referring Expression Segmentation Beyond Single Image", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2628-2638", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226975"} @inproceedings{bb231996, AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.", TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person Perception of Ego4D", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15168-15178", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226976"} @inproceedings{bb231997, AUTHOR = "Qiao, Y. and Qi, Y. and Yu, Z. and Liu, J. and Wu, Q.", TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring Expression", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15712-15721", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226977"} @inproceedings{bb231998, AUTHOR = "Chen, Y. and Du, R. and Liang, K. and Ma, Z.Y.", TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "3060-3064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226978"} @inproceedings{bb231999, AUTHOR = "Sun, J. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G.N. and Wang, Z.Y. and Ji, R.R.", TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring Expression Comprehension", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19144-19154", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226979"}