@inproceedings{bb231900,
        AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H.W. and Qiu, Q.R.",
        TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language
Queries at Phrase Level",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8275-8282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226879"}

@inproceedings{bb231901,
        AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.",
        TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form
Sentences",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10665-10674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226880"}

@inproceedings{bb231902,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10414-10424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226881"}

@inproceedings{bb231903,
        AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
        TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:353-370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226882"}

@inproceedings{bb231904,
        AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.",
        TITLE = "Dense Regression Network for Video Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10284-10293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226883"}

@inproceedings{bb231905,
        AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
        TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "III:752-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226884"}

@inproceedings{bb231906,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIX:589-605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226885"}

@inproceedings{bb231907,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
        TITLE = "Visual Relation Grounding in Videos",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "VI:447-464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226886"}

@inproceedings{bb231908,
        AUTHOR = "Mun, J. and Cho, M. and Han, B.",
        TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10807-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226887"}

@inproceedings{bb231909,
        AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
        TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10213-10222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226888"}

@inproceedings{bb231910,
        AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
        TITLE = "Object Grounding via Iterative Context Reasoning",
        BOOKTITLE = MDALC19,
        YEAR = "2019",
        PAGES = "1407-1415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226889"}

@inproceedings{bb231911,
        AUTHOR = "Sinha, A. and Akilesh, B. and Sarkar, M. and Krishnamurthy, B.",
        TITLE = "Attention Based Natural Language Grounding by Navigating Virtual
Environment",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "236-244",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226890"}

@inproceedings{bb231912,
        AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.",
        TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With
Contextual Similarity and Visual Clustering Losses",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10436-10444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226891"}

@inproceedings{bb231913,
        AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
        TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2601-2610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226892"}

@inproceedings{bb231914,
        AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
        TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6371-6381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226893"}

@inproceedings{bb231915,
        AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4252-4261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226894"}

@inproceedings{bb231916,
        AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.",
        TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIV:387-404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226895"}

@inproceedings{bb231917,
        AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
        TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4672-4681",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226896"}

@inproceedings{bb231918,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4693-4702",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226897"}

@inproceedings{bb231919,
        AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
        TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4682-4692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226898"}

@inproceedings{bb231920,
        AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
        TITLE = "Generating Descriptions with Grounded and Co-referenced People",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4196-4206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226899"}

@inproceedings{bb231921,
        AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.",
        TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by
Watching Movies and Reading Books",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "19-27",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT226900"}

@article{bb231922,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226902"}

@inproceedings{bb231923,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226903"}

@article{bb231924,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226904"}

@article{bb231925,
        AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.",
        TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New
Benchmark and Strong Baseline",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2935-2949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226905"}

@article{bb231926,
        AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.L.",
        TITLE = "Dual-decoder transformer network for answer grounding in visual
question answering",
        JOURNAL = PRL,
        VOLUME = "171",
        YEAR = "2023",
        PAGES = "53-60",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226906"}

@inproceedings{bb231927,
        AUTHOR = "Chen, K. and Wu, X.Q.",
        TITLE = "VTQA: Visual Text Question Answering via Entity Alignment and
Cross-Media Reasoning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "27208-27217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226907"}

@inproceedings{bb231928,
        AUTHOR = "Di, S.Z. and Xie, W.",
        TITLE = "Grounded Question-Answering in Long Egocentric Videos",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12934-12943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226908"}

@inproceedings{bb231929,
        AUTHOR = "Chen, C.Y. and Anjum, S. and Gurari, D.",
        TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15269-15279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226909"}

@inproceedings{bb231930,
        AUTHOR = "Le, T.M. and Le, V. and Gupta, S.I. and Venkatesh, S. and Tran, T.",
        TITLE = "Guiding Visual Question Answering with Attention Priors",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4370-4379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226910"}

@inproceedings{bb231931,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Gan, C. and da Vitoria Lobo, N. and Shah, M.",
        TITLE = "Weakly Supervised Grounding for VQA in Vision-Language Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:652-670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226911"}

@inproceedings{bb231932,
        AUTHOR = "Gupta, K. and Gautam, D. and Mamidi, R.",
        TITLE = "cViL: Cross-Lingual Training of Vision-Language Models using
Knowledge Distillation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1734-1741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226912"}

@inproceedings{bb231933,
        AUTHOR = "Li, Y.C. and Wang, X. and Xiao, J.B. and Ji, W. and Chua, T.S.",
        TITLE = "Invariant Grounding for Video Question Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2918-2927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226913"}

@inproceedings{bb231934,
        AUTHOR = "Lu, X.P. and Fan, Z. and Wang, Y. and Oh, J. and Rose, C.P.",
        TITLE = "Localize, Group, and Select: Boosting Text-VQA by Scene Text Modeling",
        BOOKTITLE = XSAnim21,
        YEAR = "2021",
        PAGES = "2631-2639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226914"}

@inproceedings{bb231935,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.",
        TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question
Answering using Capsules",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8461-8470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226915"}

@inproceedings{bb231936,
        AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.",
        TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10000-10008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226916"}

@inproceedings{bb231937,
        AUTHOR = "Gouthaman, K.V. and Mittal, A.",
        TITLE = "Reducing Language Biases in Visual Question Answering with
Visually-grounded Question Encoder",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226917"}

@inproceedings{bb231938,
        AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.",
        TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative
Applications",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1426-1430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226918"}

@inproceedings{bb231939,
        AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.",
        TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language
Models More Grounded",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2591-2600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226919"}

@inproceedings{bb231940,
        AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.",
        TITLE = "Interpretable Visual Question Answering by Visual Grounding From
Attention Supervision Mining",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "349-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgrqa3.html#TT226920"}

@article{bb231941,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Bundled Object Context for Referring Expressions",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2749-2760",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226921"}

@article{bb231942,
        AUTHOR = "Wang, J.M. and Cui, E. and Liu, K.L. and Sun, Y.K. and Liang, J.Y. and Yuan, C.M. and Duan, X.J. and Jin, G.H. and Chung, T.S.",
        TITLE = "Referring expression comprehension model with matching detection and
linguistic feedback",
        JOURNAL = IET-CV,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "December",
        PAGES = "625-633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226922"}

@article{bb231943,
        AUTHOR = "Qiao, Y.Y. and Deng, C.R. and Wu, Q.",
        TITLE = "Referring Expression Comprehension: A Survey of Methods and Datasets",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4426-4440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226923"}

@article{bb231944,
        AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.",
        TITLE = "Variational Context: Exploiting Visual and Textual Context for
Grounding Referring Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "347-359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226924"}

@article{bb231945,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Relationship-Embedded Representation Learning for Grounding Referring
Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2765-2779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226925"}

@inproceedings{bb231946,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4140-4149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226926"}

@article{bb231947,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.",
        TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring
Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4189-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226927"}

@article{bb231948,
        AUTHOR = "Lin, L. and Yan, P.X. and Xu, X.Q. and Yang, S. and Zeng, K. and Li, G.B.",
        TITLE = "Structured Attention Network for Referring Image Segmentation",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "1922-1932",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226928"}

@article{bb231949,
        AUTHOR = "Yang, X. and Wang, H. and Xie, D. and Deng, C. and Tao, D.C.",
        TITLE = "Object-Agnostic Transformers for Video Referring Segmentation",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "2839-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226929"}

@article{bb231950,
        AUTHOR = "Wang, X. and Xie, D. and Zheng, Y.S.",
        TITLE = "Referring expression grounding by multi-context reasoning",
        JOURNAL = PRL,
        VOLUME = "160",
        YEAR = "2022",
        PAGES = "66-72",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226930"}

@article{bb231951,
        AUTHOR = "Shen, H.T. and Chen, C. and Wang, P. and Gao, L.L. and Wang, M. and Song, J.K.",
        TITLE = "Continual Referring Expression Comprehension via Dual Modular
Memorization",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "6694-6706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226931"}

@article{bb231952,
        AUTHOR = "Chen, Y.W. and Tsai, Y.H. and Yang, M.H.",
        TITLE = "Understanding Synonymous Referring Expressions via Contrastive Features",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2501-2516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226932"}

@article{bb231953,
        AUTHOR = "Suo, W. and Sun, M.Y. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "Rethinking and Improving Feature Pyramids for One-Stage Referring
Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "854-864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226933"}

@article{bb231954,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Li, Z.C. and Tian, Q. and Huang, Q.M.",
        TITLE = "Entity-Enhanced Adaptive Reconstruction Network for Weakly Supervised
Referring Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3003-3018",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226934"}

@inproceedings{bb231955,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.",
        TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring
Expression Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2611-2620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226935"}

@article{bb231956,
        AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.",
        TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention
Network",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7654-7667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226936"}

@inproceedings{bb231957,
        AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.",
        TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring
Image Segmentation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15501-15510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226937"}

@article{bb231958,
        AUTHOR = "Liu, D.Z. and Zhou, P. and Xu, Z. and Wang, H.Z. and Li, R.X.",
        TITLE = "Few-Shot Temporal Sentence Grounding via Memory-Guided Semantic
Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2491-2505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226938"}

@article{bb231959,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Cycle-Free Weakly Referring Expression Grounding With Self-Paced
Learning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1611-1621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226939"}

@article{bb231960,
        AUTHOR = "Sun, M.Y. and Suo, W. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "A Proposal-Free One-Stage Framework for Referring Expression
Comprehension and Generation via Dense Cross-Attention",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2446-2458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226940"}

@article{bb231961,
        AUTHOR = "Sun, Y.F. and Zhang, Y. and Jiang, H. and Hu, Y.L. and Yin, B.C.",
        TITLE = "Multi-level attention for referring expression comprehension",
        JOURNAL = PRL,
        VOLUME = "172",
        YEAR = "2023",
        PAGES = "252-258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226941"}

@article{bb231962,
        AUTHOR = "Wang, R. and Tang, Z. and Zhou, Q.L. and Liu, X.Q. and Hui, T.R. and Tan, Q. and Liu, S.",
        TITLE = "Unified Transformer with Isomorphic Branches for Natural Language
Tracking",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4529-4541",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226942"}

@article{bb231963,
        AUTHOR = "Li, H. and Sun, M.J. and Xiao, J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Fully and Weakly Supervised Referring Expression Segmentation With
End-to-End Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "5999-6012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226943"}

@article{bb231964,
        AUTHOR = "Liu, C. and Jiang, X.D. and Ding, H.H.",
        TITLE = "Instance-Specific Feature Propagation for Referring Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3657-3667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226944"}

@article{bb231965,
        AUTHOR = "Song, Y.Z. and Chen, Y.S. and Shuai, H.H.",
        TITLE = "Decoupling-Cooperative Framework for Referring Expression
Comprehension",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1542-1546",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226945"}

@article{bb231966,
        AUTHOR = "Hua, G.G. and Liao, M. and Tian, S. and Zhang, Y.H. and Zou, W.B.",
        TITLE = "Multiple Relational Learning Network for Joint Referring Expression
Comprehension and Segmentation",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8805-8816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226946"}

@article{bb231967,
        AUTHOR = "Wang, W.B. and Pagnucco, M. and Xu, C.P. and Song, Y.",
        TITLE = "InterREC: An Interpretable Method for Referring Expression
Comprehension",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "9330-9342",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226947"}

@article{bb231968,
        AUTHOR = "Ke, J.C. and Wang, J. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.",
        TITLE = "CLIPREC: Graph-Based Domain Adaptive Network for Zero-Shot Referring
Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2480-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226948"}

@article{bb231969,
        AUTHOR = "Ke, J.C. and Wang, J. and Wong, W.K. and Toomey, A. and Wen, J.",
        TITLE = "Graph-Based Group Division Network for Referring Expression
Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "6170-6183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226949"}

@article{bb231970,
        AUTHOR = "Li, X.C. and Fan, B.Y. and Zhang, R. and Zhao, K. and Guo, Z.H. and Zhao, Y.Q. and Li, R.",
        TITLE = "Inexactly Matched Referring Expression Comprehension With Rationale",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3937-3950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226950"}

@article{bb231971,
        AUTHOR = "Luo, G. and Zhou, Y. and Sun, J. and Sun, X.S. and Ji, R.R.",
        TITLE = "A Survivor in the Era of Large-Scale Pretraining: An Empirical Study
of One-Stage Referring Expression Comprehension",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3689-3700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226951"}

@article{bb231972,
        AUTHOR = "Miao, P.H. and Su, W. and Wang, G.A. and Li, X.W. and Xi, L.",
        TITLE = "Self-Paced Multi-Grained Cross-Modal Interaction Modeling for
Referring Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1497-1507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226952"}

@article{bb231973,
        AUTHOR = "Liu, Z.T. and Xu, T.Y. and Song, X.N. and Wu, X.J.",
        TITLE = "Unified Referring Expression Generation for Bounding Boxes and
Segmentations",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "636-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226953"}

@article{bb231974,
        AUTHOR = "Zhang, Y.J. and Li, Q.Z. and Pan, Y. and Zhao, X.G. and Tan, M.",
        TITLE = "Multi-Stage Image-Language Cross-Generative Fusion Network for
Video-Based Referring Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "3256-3270",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226954"}

@article{bb231975,
        AUTHOR = "Lu, M.C. and Li, R.F. and Feng, F.X. and Ma, Z.Y. and Wang, X.J.",
        TITLE = "LGR-NET: Language Guided Reasoning Network for Referring Expression
Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "7771-7784",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226955"}

@article{bb231976,
        AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Wang, W. and Zhang, Z. and Shang, X.B.",
        TITLE = "Language conditioned multi-scale visual attention networks for visual
grounding",
        JOURNAL = IVC,
        VOLUME = "150",
        YEAR = "2024",
        PAGES = "105242",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226956"}

@article{bb231977,
        AUTHOR = "Ji, Z. and Wu, J. and Wang, Y. and Yang, A. and Han, J.G.",
        TITLE = "Progressive Semantic Reconstruction Network for Weakly Supervised
Referring Expression Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "13058-13070",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226957"}

@article{bb231978,
        AUTHOR = "Qiu, H.Q. and Wang, L.X. and Zhao, T. and Meng, F.M. and Wu, Q.B. and Li, H.L.",
        TITLE = "MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for
Zero-Shot Referring Expression Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "754-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226958"}

@article{bb231979,
        AUTHOR = "Ke, J.C. and Zhang, Q. and Wang, J. and Ding, H.Q. and Zhang, P.F. and Wen, J.",
        TITLE = "Graph-based referring expression comprehension with expression-guided
selective filtering and noun-oriented reasoning",
        JOURNAL = PR,
        VOLUME = "161",
        YEAR = "2025",
        PAGES = "111222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226959"}

@article{bb231980,
        AUTHOR = "Ke, J.C. and Wang, D. and Chen, J.C. and Jhuo, I.H. and Lin, C.W. and Lin, Y.Y.",
        TITLE = "Make Graph-Based Referring Expression Comprehension Great Again
Through Expression-Guided Dynamic Gating and Regression",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1950-1961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226960"}

@article{bb231981,
        AUTHOR = "Huang, S.J. and Li, F. and Zhang, H. and Liu, S.L. and Zhang, L. and Wang, L.W.",
        TITLE = "A Mutual Supervision Framework for Referring Expression Segmentation
and Generation",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "Psges 3597-3612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226961"}

@article{bb231982,
        AUTHOR = "Ke, X. and Xu, P. and Guo, W.Z.",
        TITLE = "Language-Image Consistency Augmentation and Distillation Network for
visual grounding",
        JOURNAL = PR,
        VOLUME = "166",
        YEAR = "2025",
        PAGES = "111663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226962"}

@inproceedings{bb231983,
        AUTHOR = "Wang, S.J. and Kim, D. and Taalimi, A. and Sun, C. and Kuo, W.C.",
        TITLE = "Learning Visual Grounding from Generative Vision and Language Model",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8057-8067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226963"}

@inproceedings{bb231984,
        AUTHOR = "Wu, T.Y. and Huang, S.Y. and Wang, Y.C.A.F.",
        TITLE = "Data-Efficient 3D Visual Grounding via Order-Aware Referring",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "3107-3117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226964"}

@inproceedings{bb231985,
        AUTHOR = "Chu, T.Y. and Lin, Y.X. and Huang, C.C. and Hua, K.L.",
        TITLE = "Enhancing Anchor-based Weakly Supervised Referring Expression
Comprehension with Cross-modality Attention",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 131-147",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226965"}

@inproceedings{bb231986,
        AUTHOR = "Nag, S. and Goswami, K. and Karanam, S.",
        TITLE = "Safari: Adaptive Sequence Transformer for Weakly Supervised Referring
Expression Segmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIV: 485-503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226966"}

@inproceedings{bb231987,
        AUTHOR = "Dai, S.Y. and Liu, J. and Cheung, N.M.",
        TITLE = "Referring Expression Counting",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "16985-16995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226967"}

@inproceedings{bb231988,
        AUTHOR = "Han, Z. and Zhu, F. and Lao, Q. and Jiang, H.",
        TITLE = "Zero-Shot Referring Expression Comprehension via Structural
Similarity Between Images and Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14364-14375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226968"}

@inproceedings{bb231989,
        AUTHOR = "Su, W. and Miao, P. and Dou, H.Z. and Li, X.",
        TITLE = "ScanFormer: Referring Expression Comprehension by Iteratively
Scanning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13449-13458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226969"}

@inproceedings{bb231990,
        AUTHOR = "Yu, Z.H. and Li, R.",
        TITLE = "Revisiting Counterfactual Problems in Referring Expression
Comprehension",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13438-13448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226970"}

@inproceedings{bb231991,
        AUTHOR = "Li, X. and Qiu, K. and Wang, J.L. and Xu, X.H. and Singh, R. and Yamazaki, K. and Chen, H. and Huang, X.N. and Raj, B.",
        TITLE = "R^2-Bench: Benchmarking the Robustness of Referring Perception Models
Under Perturbations",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "IX: 211-230",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226971"}

@inproceedings{bb231992,
        AUTHOR = "Chng, Y.X. and Zheng, H. and Han, Y.Z. and Qiu, X. and Huang, G.",
        TITLE = "Mask Grounding for Referring Image Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26563-26573",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226972"}

@inproceedings{bb231993,
        AUTHOR = "Shah, N.A. and VS, V. and Patel, V.M.",
        TITLE = "LQMFormer: Language-Aware Query Mask Transformer for Referring Image
Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12903-12913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226973"}

@inproceedings{bb231994,
        AUTHOR = "Wang, W.X. and Yue, T.T. and Zhang, Y. and Guo, L.T. and He, X.J. and Wang, X.L. and Liu, J.",
        TITLE = "Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring
Expression Segmentation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12998-13008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226974"}

@inproceedings{bb231995,
        AUTHOR = "Wu, Y.X. and Zhang, Z. and Xie, C. and Zhu, F. and Zhao, R.",
        TITLE = "Advancing Referring Expression Segmentation Beyond Single Image",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2628-2638",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226975"}

@inproceedings{bb231996,
        AUTHOR = "Kurita, S. and Katsura, N. and Onami, E.",
        TITLE = "RefEgo: Referring Expression Comprehension Dataset from First-Person
Perception of Ego4D",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15168-15178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226976"}

@inproceedings{bb231997,
        AUTHOR = "Qiao, Y. and Qi, Y. and Yu, Z. and Liu, J. and Wu, Q.",
        TITLE = "March in Chat: Interactive Prompting for Remote Embodied Referring
Expression",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15712-15721",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226977"}

@inproceedings{bb231998,
        AUTHOR = "Chen, Y. and Du, R. and Liang, K. and Ma, Z.Y.",
        TITLE = "Self-Enhanced Training Framework for Referring Expression Grounding",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "3060-3064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226978"}

@inproceedings{bb231999,
        AUTHOR = "Sun, J. and Luo, G. and Zhou, Y. and Sun, X.S. and Jiang, G.N. and Wang, Z.Y. and Ji, R.R.",
        TITLE = "RefTeacher: A Strong Baseline for Semi-Supervised Referring
Expression Comprehension",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19144-19154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT226979"}

Last update:Jul 7, 2025 at 14:35:55