@inproceedings{bb179000,
        AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.",
        TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question
Answering",
        BOOKTITLE = IbPRIA17,
        YEAR = "2017",
        PAGES = "372-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174336"}

@inproceedings{bb179001,
        AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.",
        TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 485-501",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174337"}

@inproceedings{bb179002,
        AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.",
        TITLE = "Visual Question Generation as Dual Task of Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6116-6124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174338"}

@inproceedings{bb179003,
        AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.",
        TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual
Question Answering",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6632-6641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174339"}

@inproceedings{bb179004,
        AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.",
        TITLE = "Simple and effective visual question answering in a single modality",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "2276-2280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174340"}

@inproceedings{bb179005,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "An Analysis of Visual Question Answering Algorithms",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1983-1991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174341"}

@inproceedings{bb179006,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "Answer-Type Prediction for Visual Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4976-4984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174342"}

@inproceedings{bb179007,
        AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.",
        TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to
Answer New Questions",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3909-3918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174343"}

@inproceedings{bb179008,
        AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.",
        TITLE = "Multi-level Attention Networks for Visual Question Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4187-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174344"}

@inproceedings{bb179009,
        AUTHOR = "Kembhavi, A. and Seo, M. and Schwenk, D. and Choi, J. and Farhadi, A. and Hajishirzi, H.",
        TITLE = "Are You Smarter Than a Sixth Grader? Textbook Question Answering for
Multimodal Machine Comprehension",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "5376-5384",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174345"}

@inproceedings{bb179010,
        AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.",
        TITLE = "What's in a Question:
Using Visual Questions as a Form of Supervision",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6422-6431",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174346"}

@inproceedings{bb179011,
        AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.",
        TITLE = "An Empirical Evaluation of Visual Question Answering for Novel
Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7312-7321",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174347"}

@inproceedings{bb179012,
        AUTHOR = "Maharaj, T. and Ballas, N. and Rohrbach, A. and Courville, A. and Pal, C.",
        TITLE = "A Dataset and Exploration of Models for Understanding Video Data
through Fill-in-the-Blank Question-Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7359-7368",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174348"}

@inproceedings{bb179013,
        AUTHOR = "Xu, H.J. and Saenko, K.",
        TITLE = "Ask, Attend and Answer:
Exploring Question-Guided Spatial Attention for Visual Question Answering",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VII: 451-466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174349"}

@inproceedings{bb179014,
        AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.",
        TITLE = "Revisiting Visual Question Answering Baselines",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VIII: 727-739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174350"}

@inproceedings{bb179015,
        AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
        TITLE = "Stacked Attention Networks for Image Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "21-29",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174351"}

@inproceedings{bb179016,
        AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
        TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1456-1464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174352"}

@inproceedings{bb179017,
        AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
        TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
        BOOKTITLE = ICIP12,
        YEAR = "2012",
        PAGES = "1917-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174353"}

@inproceedings{bb179018,
        AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
        TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
        BOOKTITLE = "3DTV09",
        YEAR = "2009",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT174354"}

@article{bb179019,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174356"}

@inproceedings{bb179020,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174357"}

@article{bb179021,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174358"}

@article{bb179022,
        AUTHOR = "Niu, Y.L. and Zhang, H.W. and Lu, Z.W. and Chang, S.F.",
        TITLE = "Variational Context: Exploiting Visual and Textual Context for
Grounding Referring Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "347-359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174359"}

@article{bb179023,
        AUTHOR = "Yang, S. and Li, G. and Yu, Y.Z.",
        TITLE = "Relationship-Embedded Representation Learning for Grounding Referring
Expressions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2765-2779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174360"}

@inproceedings{bb179024,
        AUTHOR = "Yang, S. and Li, G. and Yu, Y.Z.",
        TITLE = "Cross-Modal Relationship Inference for Grounding Referring Expressions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4140-4149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174361"}

@article{bb179025,
        AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.",
        TITLE = "Grounding-Tracking-Integration",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3433-3443",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174362"}

@article{bb179026,
        AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.",
        TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate
Adversarial Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3469-3481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174363"}

@article{bb179027,
        AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.",
        TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "36",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "3-12",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174364"}

@article{bb179028,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G. and Liu, S. and Goulermas, J.Y.",
        TITLE = "Discriminative Triad Matching and Reconstruction for Weakly Referring
Expression Grounding",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4189-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174365"}

@article{bb179029,
        AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.",
        TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained
Model Decisions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4196-4202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174366"}

@article{bb179030,
        AUTHOR = "Yang, W.F. and Zhang, T.Z. and Zhang, Y.D. and Wu, F.",
        TITLE = "Local Correspondence Network for Weakly Supervised Temporal Sentence
Grounding",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "3252-3262",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174367"}

@inproceedings{bb179031,
        AUTHOR = "Luo, W. and Zhang, T.Z. and Yang, W. and Liu, J.G. and Mei, T. and Wu, F. and Zhang, Y.D.",
        TITLE = "Action Unit Memory Network for Weakly Supervised Temporal Action
Localization",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9964-9974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174368"}

@inproceedings{bb179032,
        AUTHOR = "Song, S. and Lin, X.D. and Liu, J.Y. and Guo, Z.M. and Chang, S.F.",
        TITLE = "Co-Grounding Networks with Semantic Attention for Referring
Expression Comprehension in Videos",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1346-1355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174369"}

@inproceedings{bb179033,
        AUTHOR = "Tian, Y.P. and Hu, D. and Xu, C.L.",
        TITLE = "Cyclic Co-Learning of Sounding Object Visual Grounding and Sound
Separation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "2744-2753",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174370"}

@inproceedings{bb179034,
        AUTHOR = "Nan, G. and Qiao, R. and Xiao, Y. and Liu, J. and Leng, S. and Zhang, H. and Lu, W.",
        TITLE = "Interventional Video Grounding with Dual Contrastive Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "2764-2774",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174371"}

@inproceedings{bb179035,
        AUTHOR = "Zhao, Y. and Zhao, Z. and Zhang, Z. and Lin, Z.J.",
        TITLE = "Cascaded Prediction Network via Segment Tree for Temporal Video
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4195-4204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174372"}

@inproceedings{bb179036,
        AUTHOR = "Liu, Y. and Wan, B. and Ma, L. and He, X.M.",
        TITLE = "Relation-aware Instance Refinement for Weakly Supervised Visual
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5608-5617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174373"}

@inproceedings{bb179037,
        AUTHOR = "Liu, H. and Lin, A. and Han, X.G. and Yang, L. and Yu, Y.Z. and Cui, S.G.",
        TITLE = "Refer-it-in-RGBD: A Bottom-up Approach for 3D Visual Grounding in
RGBD Images",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "6028-6037",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174374"}

@inproceedings{bb179038,
        AUTHOR = "Lin, X.R. and Li, G. and Yu, Y.Z.",
        TITLE = "Scene-Intuitive Agent for Remote Embodied Visual Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7032-7041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174375"}

@inproceedings{bb179039,
        AUTHOR = "Liu, D. and Qu, X.Y. and Dong, J.F. and Zhou, P. and Cheng, Y. and Wei, W. and Xu, Z. and Xie, Y.",
        TITLE = "Context-aware Biaffine Localizing Network for Temporal Sentence
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "11230-11239",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174376"}

@inproceedings{bb179040,
        AUTHOR = "Meng, Z. and Yu, L.C. and Zhang, N. and Berg, T. and Damavandi, B. and Singh, V. and Bearman, A.",
        TITLE = "Connecting What to Say With Where to Look by Modeling Human Attention
Traces",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12674-12683",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174377"}

@inproceedings{bb179041,
        AUTHOR = "Sun, M.J. and Xiao, J. and Lim, E.G.",
        TITLE = "Iterative Shrinking for Referring Expression Grounding Using Deep
Reinforcement Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14055-14064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174378"}

@inproceedings{bb179042,
        AUTHOR = "Wang, L. and Huang, J. and Li, Y. and Xu, K. and Yang, Z.Y. and Yu, D.",
        TITLE = "Improving Weakly Supervised Visual Grounding by Contrastive Knowledge
Distillation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14085-14095",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174379"}

@inproceedings{bb179043,
        AUTHOR = "Feng, G. and Hu, Z.W. and Zhang, L. and Lu, H.C.",
        TITLE = "Encoder Fusion Network with Co-Attention Embedding for Referring
Image Segmentation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15501-15510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174380"}

@inproceedings{bb179044,
        AUTHOR = "Huang, B. and Lian, D.Z. and Luo, W.X. and Gao, S.H.",
        TITLE = "Look Before You Leap:
Learning Landmark Features for One-Stage Visual Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16883-16892",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174381"}

@inproceedings{bb179045,
        AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Chen, Y.J. and Hu, C.P.",
        TITLE = "Embracing Uncertainty: Decoupling and De-bias for Robust Temporal
Grounding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8441-8450",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174382"}

@inproceedings{bb179046,
        AUTHOR = "Whitehead, S. and Wu, H. and Ji, H. and Feris, R. and Saenko, K.",
        TITLE = "Separating Skills and Concepts for Novel Visual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5628-5637",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174383"}

@inproceedings{bb179047,
        AUTHOR = "Khan, A.U. and Kuehne, H. and Duarte, K. and Gan, C. and Lobo, N. and Shah, M.",
        TITLE = "Found a Reason for me? Weakly-supervised Grounded Visual Question
Answering using Capsules",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8461-8470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174384"}

@inproceedings{bb179048,
        AUTHOR = "Zhang, S.Y. and Jiang, T. and Wang, T. and Kuang, K. and Zhao, Z. and Zhu, J. and Yu, J. and Yang, H.X. and Wu, F.",
        TITLE = "DeVLBert: Out-of-distribution Visio-Linguistic Pretraining with
Causality",
        BOOKTITLE = CiV21,
        YEAR = "2021",
        PAGES = "1744-1747",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174385"}

@inproceedings{bb179049,
        AUTHOR = "Nguyen, A.T. and Richards, L.E. and Kebe, G.Y. and Raff, E. and Darvish, K. and Ferraro, F. and Matuszek, C.",
        TITLE = "Practical Cross-modal Manifold Alignment for Robotic Grounded
Language Learning",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1613-1622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174386"}

@inproceedings{bb179050,
        AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H. and Qiu, Q.",
        TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language
Queries at Phrase Level",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8275-8282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174387"}

@inproceedings{bb179051,
        AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.",
        TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form
Sentences",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10665-10674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174388"}

@inproceedings{bb179052,
        AUTHOR = "Burns, A. and Tan, R. and Saenko, K. and Sclaroff, S. and Plummer, B.",
        TITLE = "Language Features Matter: Effective Language Representations for
Vision-Language Tasks",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7473-7482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174389"}

@inproceedings{bb179053,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10414-10424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174390"}

@inproceedings{bb179054,
        AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
        TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:353-370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174391"}

@inproceedings{bb179055,
        AUTHOR = "Gouthaman, K.V. and Mittal, A.",
        TITLE = "Reducing Language Biases in Visual Question Answering with
Visually-grounded Question Encoder",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174392"}

@inproceedings{bb179056,
        AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.",
        TITLE = "Dense Regression Network for Video Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10284-10293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174393"}

@inproceedings{bb179057,
        AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
        TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "III:752-768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174394"}

@inproceedings{bb179058,
        AUTHOR = "Tan, H.L. and Leong, M.C. and Xu, Q. and Li, L. and Fang, F. and Cheng, Y. and Gauthier, N. and Sun, Y. and Lim, J.H.",
        TITLE = "Task-Oriented Multi-Modal Question Answering For Collaborative
Applications",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1426-1430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174395"}

@inproceedings{bb179059,
        AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
        TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIX:589-605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174396"}

@inproceedings{bb179060,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
        TITLE = "Visual Relation Grounding in Videos",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "VI:447-464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174397"}

@inproceedings{bb179061,
        AUTHOR = "Mun, J. and Cho, M. and Han, B.",
        TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10807-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174398"}

@inproceedings{bb179062,
        AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
        TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10213-10222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174399"}

@inproceedings{bb179063,
        AUTHOR = "Selvaraju, R.R. and Tendulkar, P. and Parikh, D. and Horvitz, E. and Tulio Ribeiro, M. and Nushi, B. and Kamar, E.",
        TITLE = "SQuINTing at VQA Models: Introspecting VQA Models With Sub-Questions",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10000-10008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174400"}

@inproceedings{bb179064,
        AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
        TITLE = "Object Grounding via Iterative Context Reasoning",
        BOOKTITLE = MDALC19,
        YEAR = "2019",
        PAGES = "1407-1415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174401"}

@inproceedings{bb179065,
        AUTHOR = "Selvaraju, R.R. and Lee, S. and Shen, Y. and Jin, H. and Ghosh, S. and Heck, L. and Batra, D. and Parikh, D.",
        TITLE = "Taking a HINT: Leveraging Explanations to Make Vision and Language
Models More Grounded",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2591-2600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174402"}

@inproceedings{bb179066,
        AUTHOR = "Zhang, Y. and Niebles, J.C. and Soto, A.",
        TITLE = "Interpretable Visual Question Answering by Visual Grounding From
Attention Supervision Mining",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "349-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174403"}

@inproceedings{bb179067,
        AUTHOR = "Shi, J. and Xu, J. and Gong, B. and Xu, C.L.",
        TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With
Contextual Similarity and Visual Clustering Losses",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10436-10444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174404"}

@inproceedings{bb179068,
        AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
        TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2601-2610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174405"}

@inproceedings{bb179069,
        AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
        TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6371-6381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174406"}

@inproceedings{bb179070,
        AUTHOR = "Liu, X.J. and Li, L. and Wang, S.H. and Zha, Z.J. and Meng, D.C. and Huang, Q.M.",
        TITLE = "Adaptive Reconstruction Network for Weakly Supervised Referring
Expression Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2611-2620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174407"}

@inproceedings{bb179071,
        AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4252-4261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174408"}

@inproceedings{bb179072,
        AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L. and Luo, J.B.",
        TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIV:387-404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174409"}

@inproceedings{bb179073,
        AUTHOR = "Zhang, H.W. and Niu, Y.L. and Chang, S.F.",
        TITLE = "Grounding Referring Expressions in Images by Variational Context",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4158-4166",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174410"}

@inproceedings{bb179074,
        AUTHOR = "Yu, L.C. and Lin, Z. and Shen, X.H. and Yang, J.M. and Lu, X. and Bansal, M. and Berg, T.L.",
        TITLE = "MAttNet: Modular Attention Network for Referring Expression
Comprehension",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "1307-1315",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174411"}

@inproceedings{bb179075,
        AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
        TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4672-4681",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174412"}

@inproceedings{bb179076,
        AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
        TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4693-4702",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174413"}

@inproceedings{bb179077,
        AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
        TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4682-4692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174414"}

@inproceedings{bb179078,
        AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
        TITLE = "Generating Descriptions with Grounded and Co-referenced People",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4196-4206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174415"}

@inproceedings{bb179079,
        AUTHOR = "Zhu, Y. and Kiros, R. and Zemel, R. and Salakhutdinov, R. and Urtasun, R. and Torralba, A.B. and Fidler, S.",
        TITLE = "Aligning Books and Movies: Towards Story-Like Visual Explanations by
Watching Movies and Reading Books",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "19-27",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT174416"}

@article{bb179080,
        AUTHOR = "Su, H.H. and Chen, T.W. and Kao, C.C. and Hsu, W.H. and Chien, S.Y.",
        TITLE = "Preference-Aware View Recommendation System for Scenic Photos Based on
Bag-of-Aesthetics-Preserving Features",
        JOURNAL = MultMed,
        VOLUME = "14",
        YEAR = "2012",
        NUMBER = "3",
        PAGES = "833-843",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174417"}

@article{bb179081,
        AUTHOR = "Chen, L. and Xu, D. and Tsang, I.W. and Luo, J.",
        TITLE = "Tag-Based Image Retrieval Improved by Augmented Features and
Group-Based Refinement",
        JOURNAL = MultMed,
        VOLUME = "14",
        YEAR = "2012",
        NUMBER = "4",
        PAGES = "1057-1067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174418"}

@article{bb179082,
        AUTHOR = "Chen, L. and Xu, D. and Tsang, I.W. and Li, X.",
        TITLE = "Spectral Embedded Hashing for Scalable Image Retrieval",
        JOURNAL = Cyber,
        VOLUME = "44",
        YEAR = "2014",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1180-1190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174419"}

@article{bb179083,
        AUTHOR = "Jiao, B. and Yang, L. and Xu, J. and Tian, Q. and Wu, F.",
        TITLE = "Visually Summarizing Web Pages Through Internal and External Images",
        JOURNAL = MultMed,
        VOLUME = "14",
        YEAR = "2012",
        NUMBER = "6",
        PAGES = "1673-1683",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174420"}

@article{bb179084,
        AUTHOR = "Sawant, N. and Wang, J.Z. and Li, J.",
        TITLE = "Enhancing Training Collections for Image Annotation:
An Instance-Weighted Mixture Modeling Approach",
        JOURNAL = IP,
        VOLUME = "22",
        YEAR = "2013",
        NUMBER = "9",
        PAGES = "3562-3577",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174421"}

@article{bb179085,
        AUTHOR = "Zhou, N. and Fan, J.P.",
        TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
        JOURNAL = PR,
        VOLUME = "48",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "205-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174422"}

@article{bb179086,
        AUTHOR = "Tian, J. and Huang, Y. and Guo, Z. and Qi, X. and Chen, Z. and Huang, T.",
        TITLE = "A Multi-Modal Topic Model for Image Annotation Using Text Analysis",
        JOURNAL = SPLetters,
        VOLUME = "22",
        YEAR = "2015",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "886-890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174423"}

@article{bb179087,
        AUTHOR = "Rodriguez Vaamonde, S. and Torresani, L. and Fitzgibbon, A.W.",
        TITLE = "What Can Pictures Tell Us About Web Pages? Improving Document Search
Using Images",
        JOURNAL = PAMI,
        VOLUME = "37",
        YEAR = "2015",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "1274-1285",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174424"}

@article{bb179088,
        AUTHOR = "Tirkaz, C. and Eisenstein, J. and Sezgin, T.M. and Yanikoglu, B.A.",
        TITLE = "Identifying visual attributes for object recognition from text and
taxonomy",
        JOURNAL = CVIU,
        VOLUME = "137",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "12-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174425"}

@article{bb179089,
        AUTHOR = "Lu, Y.J. and Yang, L. and Yang, K. and Rui, Y.",
        TITLE = "Mining Latent Attributes From Click-Through Logs for Image
Recognition",
        JOURNAL = MultMed,
        VOLUME = "17",
        YEAR = "2015",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1213-1224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174426"}

@article{bb179090,
        AUTHOR = "Fu, J. and Wang, J. and Rui, Y. and Wang, X. and Mei, T. and Lu, H.",
        TITLE = "Image Tag Refinement With View-Dependent Concept Representations",
        JOURNAL = CirSysVideo,
        VOLUME = "25",
        YEAR = "2015",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1409-1422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174427"}

@article{bb179091,
        AUTHOR = "Belongie, S. and Perona, P.",
        TITLE = "Visipedia circa 2015",
        JOURNAL = PRL,
        VOLUME = "72",
        YEAR = "2016",
        NUMBER = "1",
        PAGES = "15-24",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174428"}

@article{bb179092,
        AUTHOR = "Niu, L. and Li, W. and Xu, D.",
        TITLE = "Exploiting Privileged Information from Web Data for Action and Event
Recognition",
        JOURNAL = IJCV,
        VOLUME = "118",
        YEAR = "2016",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "130-150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174429"}

@inproceedings{bb179093,
        AUTHOR = "Li, W. and Niu, L. and Xu, D.",
        TITLE = "Exploiting Privileged Information from Web Data for Image
Categorization",
        BOOKTITLE = ECCV14,
        YEAR = "2014",
        PAGES = "V: 437-452",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174430"}

@article{bb179094,
        AUTHOR = "Sejal, D. and Rashmi, V. and Venugopal, K.R. and Iyengar, S.S. and Patnaik, L.M.",
        TITLE = "Image recommendation based on keyword relevance using absorbing Markov
chain and image features",
        JOURNAL = MultInfoRetr,
        VOLUME = "5",
        YEAR = "2016",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "185-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174431"}

@article{bb179095,
        AUTHOR = "Yan, Y. and Nie, F.P. and Li, W. and Gao, C.Q. and Yang, Y. and Xu, D.",
        TITLE = "Image Classification by Cross-Media Active Learning With Privileged
Information",
        JOURNAL = MultMed,
        VOLUME = "18",
        YEAR = "2016",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2494-2502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174432"}

@article{bb179096,
        AUTHOR = "Qian, X. and Li, C. and Lan, K. and Hou, X. and Li, Z. and Han, J.",
        TITLE = "POI Summarization by Aesthetics Evaluation From Crowd Source Social
Media",
        JOURNAL = IP,
        VOLUME = "27",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1178-1189",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174433"}

@article{bb179097,
        AUTHOR = "Li, Z.C. and Tang, J.H. and Mei, T.",
        TITLE = "Deep Collaborative Embedding for Social Image Understanding",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "Sep",
        PAGES = "2070-2083",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174434"}

@article{bb179098,
        AUTHOR = "Zhang, Z.Z. and Chen, P.J. and Shi, X.S. and Yang, L.",
        TITLE = "Text-Guided Neural Network Training for Image Recognition in Natural
Scenes and Medicine",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1733-1745",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174435"}

@article{bb179099,
        AUTHOR = "Zheng, C.M. and Wu, Z.W. and Wang, T. and Cai, Y. and Li, Q.",
        TITLE = "Object-Aware Multimodal Named Entity Recognition in Social Media
Posts With Adversarial Learning",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2520-2532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803we1.html#TT174436"}

Last update:Nov 30, 2021 at 22:19:38