@inproceedings{bb234900,
        AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.",
        TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form
Sentences",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10665-10674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229876"}

@inproceedings{bb234901,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10414-10424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229877"}

@inproceedings{bb234902,
        AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
        TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:353-370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229878"}

@inproceedings{bb234903,
        AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.",
        TITLE = "Dense Regression Network for Video Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10284-10293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229879"}

@inproceedings{bb234904,
        AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
        TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "III:752-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229880"}

@inproceedings{bb234905,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIX:589-605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229881"}

@inproceedings{bb234906,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
        TITLE = "Visual Relation Grounding in Videos",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "VI:447-464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229882"}

@inproceedings{bb234907,
        AUTHOR = "Mun, J. and Cho, M. and Han, B.",
        TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10807-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229883"}

@inproceedings{bb234908,
        AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
        TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10213-10222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229884"}

@inproceedings{bb234909,
        AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
        TITLE = "Object Grounding via Iterative Context Reasoning",
        BOOKTITLE = MDALC19,
        YEAR = "2019",
        PAGES = "1407-1415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229885"}

@inproceedings{bb234910,
        AUTHOR = "Sinha, A. and Akilesh, B. and Sarkar, M. and Krishnamurthy, B.",
        TITLE = "Attention Based Natural Language Grounding by Navigating Virtual
Environment",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "236-244",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229886"}

@inproceedings{bb234911,
        AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.",
        TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With
Contextual Similarity and Visual Clustering Losses",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10436-10444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229887"}

@inproceedings{bb234912,
        AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
        TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2601-2610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229888"}

@inproceedings{bb234913,
        AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
        TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6371-6381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229889"}

@inproceedings{bb234914,
        AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4252-4261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229890"}

@inproceedings{bb234915,
        AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.",
        TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIV:387-404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229891"}

@inproceedings{bb234916,
        AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
        TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4672-4681",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229892"}

@inproceedings{bb234917,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4693-4702",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229893"}

@inproceedings{bb234918,
        AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
        TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4682-4692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229894"}

@inproceedings{bb234919,
        AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
        TITLE = "Generating Descriptions with Grounded and Co-referenced People",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4196-4206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229895"}

@inproceedings{bb234920,
        AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.",
        TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by
Watching Movies and Reading Books",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "19-27",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT229896"}

@article{bb234921,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229898"}

@inproceedings{bb234922,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229899"}

@article{bb234923,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229900"}

@article{bb234924,
        AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.",
        TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New
Benchmark and Strong Baseline",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2935-2949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229901"}

@article{bb234925,
        AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.L.",
        TITLE = "Dual-decoder transformer network for answer grounding in visual
question answering",
        JOURNAL = PRL,
        VOLUME = "171",
        YEAR = "2023",
        PAGES = "53-60",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229902"}

@inproceedings{bb234926,
        AUTHOR = "Huang, J.Y. and Jia, B.X. and Wang, Y. and Zhu, Z.Y. and Linghu, X.K. and Li, Q. and Zhu, S.C. and Huang, S.Y.",
        TITLE = "Unveiling the Mist over 3D Vision-Language Understanding:
Object-centric Evaluation with Chain-of-Analysis",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24570-24581",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229903"}

@inproceedings{bb234927,
        AUTHOR = "Chen, K. and Wu, X.Q.",
        TITLE = "VTQA: Visual Text Question Answering via Entity Alignment and
Cross-Media Reasoning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "27208-27217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229904"}

@inproceedings{bb234928,
        AUTHOR = "Di, S.Z. and Xie, W.",
        TITLE = "Grounded Question-Answering in Long Egocentric Videos",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12934-12943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229905"}

@inproceedings{bb234929,
        AUTHOR = "Chen, C.Y. and Anjum, S. and Gurari, D.",
        TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15269-15279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229906"}

@inproceedings{bb234930,
        AUTHOR = "Le, T.M. and Le, V. and Gupta, S.I. and Venkatesh, S. and Tran, T.",
        TITLE = "Guiding Visual Question Answering with Attention Priors",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4370-4379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229907"}

@inproceedings{bb234931,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Gan, C. and da Vitoria Lobo, N. and Shah, M.",
        TITLE = "Weakly Supervised Grounding for VQA in Vision-Language Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:652-670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229908"}

@inproceedings{bb234932,
        AUTHOR = "Gupta, K. and Gautam, D. and Mamidi, R.",
        TITLE = "cViL: Cross-Lingual Training of Vision-Language Models using
Knowledge Distillation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1734-1741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229909"}

@inproceedings{bb234933,
        AUTHOR = "Li, Y.C. and Wang, X. and Xiao, J.B. and Ji, W. and Chua, T.S.",
        TITLE = "Invariant Grounding for Video Question Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2918-2927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229910"}

@inproceedings{bb234934,
        AUTHOR = "Lu, X.P. and Fan, Z. and Wang, Y. and Oh, J. and Rose, C.P.",
        TITLE = "Localize, Group, and Select: Boosting Text-VQA by Scene Text Modeling",
        BOOKTITLE = XSAnim21,
        YEAR = "2021",
        PAGES = "2631-2639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229911"}

@inproceedings{bb234935,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.",
        TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question
Answering using Capsules",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8461-8470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229912"}

@inproceedings{bb234936,
        AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.",
        TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10000-10008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229913"}

@inproceedings{bb234937,
        AUTHOR = "Gouthaman, K.V. and Mittal, A.",
        TITLE = "Reducing Language Biases in Visual Question Answering with
Visually-grounded Question Encoder",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229914"}

@inproceedings{bb234938,
        AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.",
        TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative
Applications",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1426-1430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229915"}

@inproceedings{bb234939,
        AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.",
        TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language
Models More Grounded",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2591-2600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229916"}

@inproceedings{bb234940,
        AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.",
        TITLE = "Interpretable Visual Question Answering by Visual Grounding From
Attention Supervision Mining",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "349-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT229917"}

@article{bb234941,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Bundled Object Context for Referring Expressions",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2749-2760",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229918"}

@article{bb234942,
        AUTHOR = "Wang, J.M. and Cui, E. and Liu, K.L. and Sun, Y.K. and Liang, J.Y. and Yuan, C.M. and Duan, X.J. and Jin, G.H. and Chung, T.S.",
        TITLE = "Referring expression comprehension model with matching detection and
linguistic feedback",
        JOURNAL = IET-CV,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "December",
        PAGES = "625-633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229919"}

@article{bb234943,
        AUTHOR = "Qiao, Y.Y. and Deng, C.R. and Wu, Q.",
        TITLE = "Referring Expression Comprehension: A Survey of Methods and Datasets",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4426-4440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229920"}

@article{bb234944,
        AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.",
        TITLE = "Variational Context: Exploiting Visual and Textual Context for
Grounding Referring Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "347-359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229921"}

@article{bb234945,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Relationship-Embedded Representation Learning for Grounding Referring
Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2765-2779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229922"}

@inproceedings{bb234946,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4140-4149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229923"}

@article{bb234947,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.",
        TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring
Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4189-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229924"}

@article{bb234948,
        AUTHOR = "Lin, L. and Yan, P.X. and Xu, X.Q. and Yang, S. and Zeng, K. and Li, G.B.",
        TITLE = "Structured Attention Network for Referring Image Segmentation",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "1922-1932",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229925"}

@article{bb234949,
        AUTHOR = "Yang, X. and Wang, H. and Xie, D. and Deng, C. and Tao, D.C.",
        TITLE = "Object-Agnostic Transformers for Video Referring Segmentation",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "2839-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229926"}

@article{bb234950,
        AUTHOR = "Wang, X. and Xie, D. and Zheng, Y.S.",
        TITLE = "Referring expression grounding by multi-context reasoning",
        JOURNAL = PRL,
        VOLUME = "160",
        YEAR = "2022",
        PAGES = "66-72",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229927"}

@article{bb234951,
        AUTHOR = "Shen, H.T. and Chen, C. and Wang, P. and Gao, L.L. and Wang, M. and Song, J.K.",
        TITLE = "Continual Referring Expression Comprehension via Dual Modular
Memorization",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "6694-6706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229928"}

@article{bb234952,
        AUTHOR = "Chen, Y.W. and Tsai, Y.H. and Yang, M.H.",
        TITLE = "Understanding Synonymous Referring Expressions via Contrastive Features",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2501-2516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229929"}

@article{bb234953,
        AUTHOR = "Suo, W. and Sun, M.Y. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "Rethinking and Improving Feature Pyramids for One-Stage Referring
Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "854-864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229930"}

@article{bb234954,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Li, Z.C. and Tian, Q. and Huang, Q.M.",
        TITLE = "Entity-Enhanced Adaptive Reconstruction Network for Weakly Supervised
Referring Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3003-3018",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229931"}

@inproceedings{bb234955,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.",
        TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring
Expression Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2611-2620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229932"}

@article{bb234956,
        AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.",
        TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention
Network",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7654-7667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229933"}

@inproceedings{bb234957,
        AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.",
        TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring
Image Segmentation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15501-15510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229934"}

@article{bb234958,
        AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.",
        TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic
Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2491-2505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229935"}

@article{bb234959,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced
Learning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1611-1621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229936"}

@article{bb234960,
        AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "A Proposal-Free One-Stage Framework for Referring Expression
Comprehension and Generation via Dense Cross-Attention",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2446-2458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229937"}

@article{bb234961,
        AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.",
        TITLE = "Multi-level attention for referring expression comprehension",
        JOURNAL = PRL,
        VOLUME = "172",
        YEAR = "2023",
        PAGES = "252-258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229938"}

@article{bb234962,
        AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.",
        TITLE = "Unified Transformer with Isomorphic Branches for Natural Language
Tracking",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4529-4541",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229939"}

@article{bb234963,
        AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With
End-to-End Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "5999-6012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229940"}

@article{bb234964,
        AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.",
        TITLE = "Instance-Specific Feature Propagation for Referring Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3657-3667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229941"}

@article{bb234965,
        AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.",
        TITLE = "Decoupling-Cooperative Framework for Referring Expression
Comprehension",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1542-1546",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229942"}

@article{bb234966,
        AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.",
        TITLE = "Multiple Relational Learning Network for Joint Referring Expression
Comprehension and Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8805-8816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229943"}

@article{bb234967,
        AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.",
        TITLE = "InterREC: An Interpretable Method for Referring Expression
Comprehension",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "9330-9342",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229944"}

@article{bb234968,
        AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.",
        TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring
Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2480-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229945"}

@article{bb234969,
        AUTHOR = "Ke, J.C. and Wang, J. and Wong, W.K. and Toomey, A. and Wen, J.",
        TITLE = "Graph-Based Group Division Network for Referring Expression
Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "6170-6183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229946"}

@article{bb234970,
        AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R.Z. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.",
        TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3937-3950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229947"}

@article{bb234971,
        AUTHOR = "Luo, G. and Zhou, Y.Y. and Sun, J. and Sun, X.S. and Ji, R.R.",
        TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study
of One-Stage Referring Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3689-3700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229948"}

@article{bb234972,
        AUTHOR = "Miao, P.H. and Su, W. and Wang, G.A. and Li, X.W. and Xi, L.",
        TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for
Referring Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1497-1507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229949"}

@article{bb234973,
        AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.",
        TITLE = "Unified Referring Expression Generation for Bounding Boxes and
Segmentations",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "636-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229950"}

@article{bb234974,
        AUTHOR = "Zhang, Y.J. and Li, Q.Z. and Pan, Y. and Zhao, X.G. and Tan, M.",
        TITLE = "Multi-Stage Image-Language Cross-Generative Fusion Network for
Video-Based Referring Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "3256-3270",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229951"}

@article{bb234975,
        AUTHOR = "Lu, M.C. and Li, R.F. and Feng, F.X. and Ma, Z.Y. and Wang, X.J.",
        TITLE = "LGR-NET: Language Guided Reasoning Network for Referring Expression
Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "7771-7784",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229952"}

@article{bb234976,
        AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Wang, W. and Zhang, Z. and Shang, X.B.",
        TITLE = "Language conditioned multi-scale visual attention networks for visual
grounding",
        JOURNAL = IVC,
        VOLUME = "150",
        YEAR = "2024",
        PAGES = "105242",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229953"}

@article{bb234977,
        AUTHOR = "Ji, Z. and Wu, J. and Wang, Y.D. and Yang, A.P. and Han, J.G.",
        TITLE = "Progressive Semantic Reconstruction Network for Weakly Supervised
Referring Expression Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "13058-13070",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229954"}

@article{bb234978,
        AUTHOR = "Qiu, H.Q. and Wang, L.X. and Zhao, T. and Meng, F.M. and Wu, Q.B. and Li, H.L.",
        TITLE = "MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for
Zero-Shot Referring Expression Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "754-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229955"}

@article{bb234979,
        AUTHOR = "Ke, J.C. and Zhang, Q. and Wang, J. and Ding, H.Q. and Zhang, P.F. and Wen, J.",
        TITLE = "Graph-based referring expression comprehension with expression-guided
selective filtering and noun-oriented reasoning",
        JOURNAL = PR,
        VOLUME = "161",
        YEAR = "2025",
        PAGES = "111222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229956"}

@article{bb234980,
        AUTHOR = "Ke, J.C. and Wang, D. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.",
        TITLE = "Make Graph-Based Referring Expression Comprehension Great Again
Through Expression-Guided Dynamic Gating and Regression",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1950-1961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229957"}

@article{bb234981,
        AUTHOR = "Huang, S.J. and Li, F. and Zhang, H. and Liu, S.L. and Zhang, L. and Wang, L.W.",
        TITLE = "A Mutual Supervision Framework for Referring Expression Segmentation
and Generation",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "Psges 3597-3612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229958"}

@article{bb234982,
        AUTHOR = "Ke, X. and Xu, P. and Guo, W.Z.",
        TITLE = "Language-Image Consistency Augmentation and Distillation Network for
visual grounding",
        JOURNAL = PR,
        VOLUME = "166",
        YEAR = "2025",
        PAGES = "111663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229959"}

@inproceedings{bb234983,
        AUTHOR = "Wang, Z.C. and Pan, Z.Y. and Peng, Z. and Cheng, J. and Xiao, L.W. and Jiang, W. and Cao, Z.G.",
        TITLE = "Exploring Contextual Attribute Density in Referring Expression
Counting",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19587-19596",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229960"}

@inproceedings{bb234984,
        AUTHOR = "Chen, X. and Luo, Y.X. and Luo, G. and Ji, J.Y. and Ding, H.H. and Zhou, Y.",
        TITLE = "DViN: Dynamic Visual Routing Network for Weakly Supervised Referring
Expression Comprehension",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14347-14357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229961"}

@inproceedings{bb234985,
        AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.",
        TITLE = "Learning Visual Grounding from Generative Vision and Language Model",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8057-8067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229962"}

@inproceedings{bb234986,
        AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.",
        TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "3107-3117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229963"}

@inproceedings{bb234987,
        AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.",
        TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression
Comprehension with Cross-modality Attention",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 131-147",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229964"}

@inproceedings{bb234988,
        AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.",
        TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring
Expression Segmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIV: 485-503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229965"}

@inproceedings{bb234989,
        AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.",
        TITLE = "Referring Expression Counting",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "16985-16995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229966"}

@inproceedings{bb234990,
        AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.",
        TITLE = "Zero-Shot Referring Expression Comprehension via Structural
Similarity Between Images and Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14364-14375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229967"}

@inproceedings{bb234991,
        AUTHOR = "Su, W. and Miao, P.H. and Dou, H.Z. and Li, X.",
        TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively
Scanning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13449-13458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229968"}

@inproceedings{bb234992,
        AUTHOR = "Yu, Z.H. and Li, R.",
        TITLE = "Revisiting Counterfactual Problems in Referring Expression
Comprehension",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13438-13448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229969"}

@inproceedings{bb234993,
        AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.",
        TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models
Under Perturbations",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "IX: 211-230",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229970"}

@inproceedings{bb234994,
        AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.",
        TITLE = "Mask Grounding for Referring Image Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26563-26573",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229971"}

@inproceedings{bb234995,
        AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.",
        TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image
Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12903-12913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229972"}

@inproceedings{bb234996,
        AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.",
        TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring
Expression Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12998-13008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229973"}

@inproceedings{bb234997,
        AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.",
        TITLE = "Advancing Referring Expression Segmentation Beyond Single Image",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2628-2638",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229974"}

@inproceedings{bb234998,
        AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.",
        TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person
Perception of Ego4D",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15168-15178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229975"}

@inproceedings{bb234999,
        AUTHOR = "Qiao, Y. and Qi, Y.K. and Yu, Z. and Liu, J. and Wu, Q.",
        TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring
Expression",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15712-15721",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT229976"}

Last update:Sep 10, 2025 at 12:00:25