@inproceedings{bb213500,
        AUTHOR = "Xu, Z.P. and Lin, T.W. and Tang, H. and Li, F. and He, D.L. and Sebe, N. and Timofte, R. and Van Gool, L.J. and Ding, E.",
        TITLE = "Predict, Prevent, and Evaluate: Disentangled Text-Driven Image
Manipulation Empowered by Pre-Trained Vision-Language Model",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18208-18217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208578"}

@inproceedings{bb213501,
        AUTHOR = "Du, Y. and Wei, F.Y. and Zhang, Z. and Shi, M.J. and Gao, Y. and Li, G.Q.",
        TITLE = "Learning to Prompt for Open-Vocabulary Object Detection with
Vision-Language Model",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14064-14073",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208579"}

@inproceedings{bb213502,
        AUTHOR = "Chang, Y.S. and Cao, G.H. and Narang, M. and Gao, J.F. and Suzuki, H. and Bisk, Y.",
        TITLE = "WebQA: Multihop and Multimodal QA",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16474-16483",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208580"}

@inproceedings{bb213503,
        AUTHOR = "Zellers, R. and Lu, J. and Lu, X. and Yu, Y. and Zhao, Y.P. and Salehi, M. and Kusupati, A. and Hessel, J. and Farhadi, A. and Choi, Y.",
        TITLE = "MERLOT RESERVE:
Neural Script Knowledge through Vision and Language and Sound",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16354-16366",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208581"}

@inproceedings{bb213504,
        AUTHOR = "Gupta, T. and Kamath, A. and Kembhavi, A. and Hoiem, D.",
        TITLE = "Towards General Purpose Vision Systems:
An End-to-End Task-Agnostic Vision-Language Architecture",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16378-16388",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208582"}

@inproceedings{bb213505,
        AUTHOR = "Suris, D. and Epstein, D. and Vondrick, C.",
        TITLE = "Globetrotter: Connecting Languages by Connecting Images",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16453-16463",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208583"}

@inproceedings{bb213506,
        AUTHOR = "Zhu, H.D. and Sadhu, A. and Zheng, Z.H. and Nevatia, R.",
        TITLE = "Utilizing Every Image Object for Semi-supervised Phrase Grounding",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "2209-2218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208584"}

@inproceedings{bb213507,
        AUTHOR = "Sung, Y.L. and Cho, J. and Bansal, M.",
        TITLE = "VL-ADAPTER: Parameter-Efficient Transfer Learning for
Vision-and-Language Tasks",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5217-5227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208585"}

@inproceedings{bb213508,
        AUTHOR = "Wu, D.M. and Dong, X.P. and Shao, L. and Shen, J.B.",
        TITLE = "Multi-Level Representation Learning with Semantic Alignment for
Referring Video Object Segmentation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "4986-4995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208586"}

@inproceedings{bb213509,
        AUTHOR = "Gao, K. and Chen, L. and Niu, Y. and Shao, J. and Xiao, J.",
        TITLE = "Classification-Then-Grounding: Reformulating Video Scene Graphs as
Temporal Bipartite Graphs",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19475-19484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208587"}

@inproceedings{bb213510,
        AUTHOR = "Kesen, I. and Can, O.A. and Erdem, E. and Erdem, A. and Yuret, D.",
        TITLE = "Modulating Bottom-Up and Top-Down Visual Processing via
Language-Conditional Filters",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4609-4619",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208588"}

@inproceedings{bb213511,
        AUTHOR = "Nebbia, G. and Kovashka, A.",
        TITLE = "Doubling down: sparse grounding with an additional, almost-matching
caption for detection-oriented multimodal pretraining",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4641-4650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208589"}

@inproceedings{bb213512,
        AUTHOR = "Ye, J. and Tian, J.F. and Yan, M. and Yang, X.S. and Wang, X. and Zhang, J. and He, L. and Lin, X.",
        TITLE = "Shifting More Attention to Visual Backbone: Query-modulated
Refinement Networks for End-to-End Visual Grounding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15481-15491",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208590"}

@inproceedings{bb213513,
        AUTHOR = "Jiang, H.J. and Lin, Y.Z. and Han, D.C. and Song, S. and Huang, G.",
        TITLE = "Pseudo-Q: Generating Pseudo Language Queries for Visual Grounding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15492-15502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208591"}

@inproceedings{bb213514,
        AUTHOR = "Huang, S. and Chen, Y.L. and Jia, J.Y. and Wang, L.W.",
        TITLE = "Multi-View Transformer for 3D Visual Grounding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15503-15512",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208592"}

@inproceedings{bb213515,
        AUTHOR = "Chen, S. and Li, B.",
        TITLE = "Multi-Modal Dynamic Graph Transformer for Visual Grounding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15513-15522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208593"}

@inproceedings{bb213516,
        AUTHOR = "Mavroudi, E. and Vidal, R.",
        TITLE = "Weakly-Supervised Generation and Grounding of Visual Descriptions
with Conditional Generative Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15523-15533",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208594"}

@inproceedings{bb213517,
        AUTHOR = "Chen, S. and Zhao, Q.",
        TITLE = "REX: Reasoning-aware and Grounded Explanation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15565-15574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208595"}

@inproceedings{bb213518,
        AUTHOR = "Lou, C. and Han, W.J. and Lin, Y. and Zheng, Z.L.",
        TITLE = "Unsupervised Vision-Language Parsing: Seamlessly Bridging Visual
Scene Graphs with Language Structures via Dependency Relationships",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15586-15595",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208596"}

@inproceedings{bb213519,
        AUTHOR = "Luo, J.Y. and Fu, J. and Kong, X. and Gao, C. and Ren, H.B. and Shen, H. and Xia, H.X. and Liu, S.",
        TITLE = "3D-SPS: Single-Stage 3D Visual Grounding via Referred Point
Progressive Selection",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16433-16442",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208597"}

@inproceedings{bb213520,
        AUTHOR = "Cai, D. and Zhao, L.C. and Zhang, J. and Sheng, L. and Xu, D.",
        TITLE = "3DJCG: A Unified Framework for Joint Dense Captioning and Visual
Grounding on 3D Point Clouds",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16443-16452",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208598"}

@inproceedings{bb213521,
        AUTHOR = "Luo, H.C. and Zhai, W. and Zhang, J. and Cao, Y. and Tao, D.C.",
        TITLE = "Learning Affordance Grounding from Exocentric Images",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2242-2251",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208599"}

@inproceedings{bb213522,
        AUTHOR = "Jiang, X. and Xu, X. and Zhang, J. and Shen, F.M. and Cao, Z. and Shen, H.T.",
        TITLE = "Semi-supervised Video Paragraph Grounding with Contrastive Encoder",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2456-2465",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208600"}

@inproceedings{bb213523,
        AUTHOR = "Yu, W. and Chen, W.X. and Yin, S. and Easterbrook, S. and Garg, A.",
        TITLE = "Modular Action Concept Grounding in Semantic Video Prediction",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3595-3604",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208601"}

@inproceedings{bb213524,
        AUTHOR = "Soldan, M. and Pardo, A. and Alcazar, J.L. and Heilbron, F.C. and Zhao, C. and Giancola, S. and Ghanem, B.",
        TITLE = "MAD: A Scalable Dataset for Language Grounding in Videos from Movie
Audio Descriptions",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5016-5025",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208602"}

@inproceedings{bb213525,
        AUTHOR = "Yang, L. and Xu, Y. and Yuan, C.F. and Liu, W. and Li, B. and Hu, W.M.",
        TITLE = "Improving Visual Grounding with Visual-Linguistic Verification and
Iterative Reasoning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "9489-9498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208603"}

@inproceedings{bb213526,
        AUTHOR = "Li, L.H. and Zhang, P.C. and Zhang, H.T. and Yang, J.W. and Li, C.Y. and Zhong, Y. and Wang, L.J. and Yuan, L. and Zhang, L. and Hwang, J.N. and Chang, K.W. and Gao, J.F.",
        TITLE = "Grounded Language-Image Pre-training",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10955-10965",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208604"}

@inproceedings{bb213527,
        AUTHOR = "Li, Y.C. and Wang, X. and Xiao, J.B. and Ji, W. and Chua, T.S.",
        TITLE = "Invariant Grounding for Video Question Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2918-2927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208605"}

@inproceedings{bb213528,
        AUTHOR = "Yang, Z.Y. and Zhang, S.Y. and Wang, L.W. and Luo, J.B.",
        TITLE = "SAT: 2D Semantics Assisted Training for 3D Visual Grounding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1836-1846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208606"}

@inproceedings{bb213529,
        AUTHOR = "Chen, J.W. and Golisano, Y.K.",
        TITLE = "Explainable Video Entailment with Grounded Visual Evidence",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2001-2010",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208607"}

@inproceedings{bb213530,
        AUTHOR = "Zhao, L.C. and Cai, D. and Sheng, L. and Xu, D.",
        TITLE = "3DVG-Transformer: Relation Modeling for Visual Grounding on Point
Clouds",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2908-2917",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208608"}

@inproceedings{bb213531,
        AUTHOR = "Feng, M. and Li, Z. and Li, Q. and Zhang, L. and Zhang, X. and Zhu, G.M. and Zhang, H. and Wang, Y. and Mian, A.",
        TITLE = "Free-form Description Guided 3D Visual Graph Network for Object
Grounding in Point Cloud",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "3702-3711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208609"}

@inproceedings{bb213532,
        AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Cheng, D. and Li, X.M. and Huang, Z.Y. and Tang, M.Q. and Gao, X.B.",
        TITLE = "Support-Set Based Cross-Supervision for Video Grounding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11553-11562",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208610"}

@inproceedings{bb213533,
        AUTHOR = "Khandelwal, S. and Suhail, M. and Sigal, L.",
        TITLE = "Segmentation-grounded Scene Graph Generation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "15859-15869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208611"}

@inproceedings{bb213534,
        AUTHOR = "Patel, S. and Wani, S. and Jain, U. and Schwing, A. and Lazebnik, S. and Savva, M. and Chang, A.X.",
        TITLE = "Interpretation of Emergent Communication in Heterogeneous
Collaborative Embodied Agents",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "15993-15943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208612"}

@inproceedings{bb213535,
        AUTHOR = "Shi, J. and Zhong, Y. and Xu, N. and Li, Y. and Xu, C.L.",
        TITLE = "A Simple Baseline for Weakly-Supervised Scene Graph Generation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "16373-16382",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208613"}

@inproceedings{bb213536,
        AUTHOR = "Su, R. and Yu, Q. and Xu, D.",
        TITLE = "STVGBert: A Visual-linguistic Transformer based Framework for
Spatio-temporal Video Grounding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1513-1522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208614"}

@inproceedings{bb213537,
        AUTHOR = "Cui, C.Y.Q. and Khandelwal, A. and Artzi, Y. and Snavely, N. and Averbuch Elor, H.",
        TITLE = "Who's Waldo? Linking People Across Text and Images",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1354-1364",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208615"}

@inproceedings{bb213538,
        AUTHOR = "Gonzalez, C. and Ayobi, N. and Hernandez, I. and Hernandez, J. and Pont Tuset, J. and Arbelaez, P.",
        TITLE = "Panoptic Narrative Grounding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1344-1353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208616"}

@inproceedings{bb213539,
        AUTHOR = "Hong, Y. and Li, Q. and Zhu, S.C. and Huang, S.Y.",
        TITLE = "VLGrammar: Grounded Grammar Induction of Vision and Language",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1645-1654",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208617"}

@inproceedings{bb213540,
        AUTHOR = "Yuan, Z.H. and Yan, X. and Liao, Y.H. and Zhang, R. and Wang, S. and Li, Z. and Cui, S.G.",
        TITLE = "InstanceRefer: Cooperative Holistic Understanding for Visual
Grounding on Point Clouds through Instance Multi-level Contextual
Referring",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1771-1780",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208618"}

@inproceedings{bb213541,
        AUTHOR = "Soldan, M. and Xu, M.M. and Qu, S. and Tegner, J. and Ghanem, B.",
        TITLE = "VLG-Net: Video-Language Graph Matching Network for Video Grounding",
        BOOKTITLE = CVEU21,
        YEAR = "2021",
        PAGES = "3217-3227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208619"}

@inproceedings{bb213542,
        AUTHOR = "Lu, X.P. and Fan, Z. and Wang, Y. and Oh, J. and Rose, C.P.",
        TITLE = "Localize, Group, and Select: Boosting Text-VQA by Scene Text Modeling",
        BOOKTITLE = XSAnim21,
        YEAR = "2021",
        PAGES = "2631-2639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208620"}

@inproceedings{bb213543,
        AUTHOR = "Tian, Y.P. and Hu, D. and Xu, C.L.",
        TITLE = "Cyclic Co-Learning of Sounding Object Visual Grounding and Sound
Separation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "2744-2753",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208621"}

@inproceedings{bb213544,
        AUTHOR = "Nan, G.S. and Qiao, R. and Xiao, Y. and Liu, J. and Leng, S.C. and Zhang, H. and Lu, W.",
        TITLE = "Interventional Video Grounding with Dual Contrastive Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "2764-2774",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208622"}

@inproceedings{bb213545,
        AUTHOR = "Zhao, Y. and Zhao, Z. and Zhang, Z. and Lin, Z.J.",
        TITLE = "Cascaded Prediction Network via Segment Tree for Temporal Video
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4195-4204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208623"}

@inproceedings{bb213546,
        AUTHOR = "Liu, Y.F. and Wan, B. and Ma, L. and He, X.M.",
        TITLE = "Relation-aware Instance Refinement for Weakly Supervised Visual
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5608-5617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208624"}

@inproceedings{bb213547,
        AUTHOR = "Liu, H.L. and Lin, A. and Han, X.G. and Yang, L. and Yu, Y.Z. and Cui, S.G.",
        TITLE = "Refer-it-in-RGBD: A Bottom-up Approach for 3D Visual Grounding in
RGBD Images",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "6028-6037",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208625"}

@inproceedings{bb213548,
        AUTHOR = "Lin, X.R. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Scene-Intuitive Agent for Remote Embodied Visual Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7032-7041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208626"}

@inproceedings{bb213549,
        AUTHOR = "Liu, D.Z. and Qu, X.Y. and Dong, J.F. and Zhou, P. and Cheng, Y. and Wei, W. and Xu, Z. and Xie, Y.",
        TITLE = "Context-aware Biaffine Localizing Network for Temporal Sentence
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "11230-11239",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208627"}

@inproceedings{bb213550,
        AUTHOR = "Meng, Z. and Yu, L.C. and Zhang, N. and Berg, T. and Damavandi, B. and Singh, V. and Bearman, A.",
        TITLE = "Connecting What to Say With Where to Look by Modeling Human Attention
Traces",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12674-12683",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208628"}

@inproceedings{bb213551,
        AUTHOR = "Wang, L.W. and Huang, J. and Li, Y. and Xu, K. and Yang, Z.Y. and Yu, D.",
        TITLE = "Improving Weakly Supervised Visual Grounding by Contrastive Knowledge
Distillation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14085-14095",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208629"}

@inproceedings{bb213552,
        AUTHOR = "Huang, B.B. and Lian, D.Z. and Luo, W.X. and Gao, S.H.",
        TITLE = "Look Before You Leap:
Learning Landmark Features for One-Stage Visual Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16883-16892",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208630"}

@inproceedings{bb213553,
        AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Chen, Y.J. and Hu, C.P.",
        TITLE = "Embracing Uncertainty: Decoupling and De-bias for Robust Temporal
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8441-8450",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208631"}

@inproceedings{bb213554,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.",
        TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question
Answering using Capsules",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8461-8470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208632"}

@inproceedings{bb213555,
        AUTHOR = "Zhang, S.Y. and Jiang, T. and Wang, T. and Kuang, K. and Zhao, Z. and Zhu, J. and Yu, J. and Yang, H.X. and Wu, F.",
        TITLE = "DeVLBert: Out-of-distribution Visio-Linguistic Pretraining with
Causality",
        BOOKTITLE = CiV21,
        YEAR = "2021",
        PAGES = "1744-1747",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208633"}

@inproceedings{bb213556,
        AUTHOR = "Nguyen, A.T. and Richards, L.E. and Kebe, G.Y. and Raff, E. and Darvish, K. and Ferraro, F. and Matuszek, C.",
        TITLE = "Practical Cross-modal Manifold Alignment for Robotic Grounded
Language Learning",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1613-1622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208634"}

@inproceedings{bb213557,
        AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H. and Qiu, Q.",
        TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language
Queries at Phrase Level",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8275-8282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208635"}

@inproceedings{bb213558,
        AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.",
        TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form
Sentences",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10665-10674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208636"}

@inproceedings{bb213559,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10414-10424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208637"}

@inproceedings{bb213560,
        AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
        TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:353-370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208638"}

@inproceedings{bb213561,
        AUTHOR = "Gouthaman, K.V. and Mittal, A.",
        TITLE = "Reducing Language Biases in Visual Question Answering with
Visually-grounded Question Encoder",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208639"}

@inproceedings{bb213562,
        AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.",
        TITLE = "Dense Regression Network for Video Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10284-10293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208640"}

@inproceedings{bb213563,
        AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
        TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "III:752-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208641"}

@inproceedings{bb213564,
        AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.",
        TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative
Applications",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1426-1430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208642"}

@inproceedings{bb213565,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIX:589-605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208643"}

@inproceedings{bb213566,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
        TITLE = "Visual Relation Grounding in Videos",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "VI:447-464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208644"}

@inproceedings{bb213567,
        AUTHOR = "Mun, J. and Cho, M. and Han, B.",
        TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10807-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208645"}

@inproceedings{bb213568,
        AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
        TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10213-10222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208646"}

@inproceedings{bb213569,
        AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.",
        TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10000-10008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208647"}

@inproceedings{bb213570,
        AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
        TITLE = "Object Grounding via Iterative Context Reasoning",
        BOOKTITLE = MDALC19,
        YEAR = "2019",
        PAGES = "1407-1415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208648"}

@inproceedings{bb213571,
        AUTHOR = "Sinha, A. and Akilesh, B. and Sarkar, M. and Krishnamurthy, B.",
        TITLE = "Attention Based Natural Language Grounding by Navigating Virtual
Environment",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "236-244",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208649"}

@inproceedings{bb213572,
        AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.",
        TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language
Models More Grounded",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2591-2600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208650"}

@inproceedings{bb213573,
        AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.",
        TITLE = "Interpretable Visual Question Answering by Visual Grounding From
Attention Supervision Mining",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "349-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208651"}

@inproceedings{bb213574,
        AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.",
        TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With
Contextual Similarity and Visual Clustering Losses",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10436-10444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208652"}

@inproceedings{bb213575,
        AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
        TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2601-2610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208653"}

@inproceedings{bb213576,
        AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
        TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6371-6381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208654"}

@inproceedings{bb213577,
        AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4252-4261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208655"}

@inproceedings{bb213578,
        AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.",
        TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIV:387-404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208656"}

@inproceedings{bb213579,
        AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
        TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4672-4681",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208657"}

@inproceedings{bb213580,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4693-4702",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208658"}

@inproceedings{bb213581,
        AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
        TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4682-4692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208659"}

@inproceedings{bb213582,
        AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
        TITLE = "Generating Descriptions with Grounded and Co-referenced People",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4196-4206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208660"}

@inproceedings{bb213583,
        AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.",
        TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by
Watching Movies and Reading Books",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "19-27",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208661"}

@article{bb213584,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Bundled Object Context for Referring Expressions",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2749-2760",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208662"}

@article{bb213585,
        AUTHOR = "Wang, J.M. and Cui, E. and Liu, K.L. and Sun, Y.K. and Liang, J.Y. and Yuan, C.M. and Duan, X.J. and Jin, G.H. and Chung, T.S.",
        TITLE = "Referring expression comprehension model with matching detection and
linguistic feedback",
        JOURNAL = IET-CV,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "December",
        PAGES = "625-633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208663"}

@article{bb213586,
        AUTHOR = "Qiao, Y.Y. and Deng, C.R. and Wu, Q.",
        TITLE = "Referring Expression Comprehension: A Survey of Methods and Datasets",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4426-4440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208664"}

@article{bb213587,
        AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.",
        TITLE = "Variational Context: Exploiting Visual and Textual Context for
Grounding Referring Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "347-359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208665"}

@article{bb213588,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Relationship-Embedded Representation Learning for Grounding Referring
Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2765-2779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208666"}

@inproceedings{bb213589,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4140-4149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208667"}

@article{bb213590,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.",
        TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring
Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4189-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208668"}

@article{bb213591,
        AUTHOR = "Lin, L. and Yan, P.X. and Xu, X.Q. and Yang, S. and Zeng, K. and Li, G.B.",
        TITLE = "Structured Attention Network for Referring Image Segmentation",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "1922-1932",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208669"}

@article{bb213592,
        AUTHOR = "Yang, X. and Wang, H. and Xie, D. and Deng, C. and Tao, D.C.",
        TITLE = "Object-Agnostic Transformers for Video Referring Segmentation",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "2839-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208670"}

@article{bb213593,
        AUTHOR = "Wang, X. and Xie, D. and Zheng, Y.S.",
        TITLE = "Referring expression grounding by multi-context reasoning",
        JOURNAL = PRL,
        VOLUME = "160",
        YEAR = "2022",
        PAGES = "66-72",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208671"}

@article{bb213594,
        AUTHOR = "Shen, H.T. and Chen, C. and Wang, P. and Gao, L.L. and Wang, M. and Song, J.K.",
        TITLE = "Continual Referring Expression Comprehension via Dual Modular
Memorization",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "6694-6706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208672"}

@article{bb213595,
        AUTHOR = "Chen, Y.W. and Tsai, Y.H. and Yang, M.H.",
        TITLE = "Understanding Synonymous Referring Expressions via Contrastive Features",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2501-2516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208673"}

@article{bb213596,
        AUTHOR = "Suo, W. and Sun, M.Y. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "Rethinking and Improving Feature Pyramids for One-Stage Referring
Expression Comprehension",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "854-864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208674"}

@article{bb213597,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Li, Z.C. and Tian, Q. and Huang, Q.M.",
        TITLE = "Entity-Enhanced Adaptive Reconstruction Network for Weakly Supervised
Referring Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3003-3018",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208675"}

@inproceedings{bb213598,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.",
        TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring
Expression Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2611-2620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208676"}

@article{bb213599,
        AUTHOR = "Feng, G. and Zhang, L. and Sun, J. and Hu, Z.W. and Lu, H.C.",
        TITLE = "Referring Segmentation via Encoder-Fused Cross-Modal Attention
Network",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7654-7667",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803refex3.html#TT208677"}

Last update:Apr 18, 2024 at 11:38:49