@article{bb212900,
        AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X. and Liu, A.A.",
        TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1867-1880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207980"}

@article{bb212901,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y. and Guo, L.",
        TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3827-3841",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207981"}

@article{bb212902,
        AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.",
        TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on
Domain-Selective Attention",
        JOURNAL = PR,
        VOLUME = "149",
        YEAR = "2024",
        PAGES = "110273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207982"}

@article{bb212903,
        AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.",
        TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic
Consistency of Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4912-4925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207983"}

@inproceedings{bb212904,
        AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
        TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15159-15168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207984"}

@inproceedings{bb212905,
        AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C. and Wei, Z.M.",
        TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and
Similarity Filtering",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "635-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207985"}

@inproceedings{bb212906,
        AUTHOR = "Li, Z. and Nian, X.H. and Pan, C. and Yang, D. and Xiong, H.Y. and Wang, H.B.",
        TITLE = "Relation Graph Reasoning for Image-Text Matching",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "319-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207986"}

@inproceedings{bb212907,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Wang, Q. and Zhang, Y.D.",
        TITLE = "Negative-Aware Attention Framework for Image-Text Matching",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15640-15649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207987"}

@inproceedings{bb212908,
        AUTHOR = "Long, S. and Han, S.C. and Wan, X.J. and Poon, J.",
        TITLE = "GraDual: Graph-based Dual-modal Representation for Image-Text
Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2463-2472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207988"}

@inproceedings{bb212909,
        AUTHOR = "Biten, A.F. and Mafla, A. and Gomez, L. and Karatzas, D.",
        TITLE = "Is An Image Worth Five Sentences? A New Look into Semantics for
Image-Text Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2483-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207989"}

@inproceedings{bb212910,
        AUTHOR = "Mithun, N.C. and Pasricha, R. and Papalexakis, E. and Roy Chowdhury, A.K.",
        TITLE = "Webly Supervised Image-Text Embedding with Noisy Tag Refinement",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7454-7461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207990"}

@inproceedings{bb212911,
        AUTHOR = "Chen, J.A. and Zhang, L. and Wang, Q. and Bai, C. and Kpalma, K.",
        TITLE = "Intra-Modal Constraint Loss for Image-Text Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "4023-4027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207991"}

@inproceedings{bb212912,
        AUTHOR = "Liu, Y. and Wang, H.Q. and Meng, F.Y. and Liu, M.Y. and Liu, H.",
        TITLE = "Attend, Correct and Focus: A Bidirectional Correct Attention Network
for Image-Text Matching",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2673-2677",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207992"}

@inproceedings{bb212913,
        AUTHOR = "Yang, S.T. and Huang, K.H. and Howe, B.",
        TITLE = "JECL: Joint Embedding and Cluster Learning for Image-Text Pairs",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8344-8351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207993"}

@inproceedings{bb212914,
        AUTHOR = "Mikriukov, G. and Ravanbakhsh, M. and Demir, B.",
        TITLE = "An Unsupervised Cross-Modal Hashing Method Robust to Noisy Training
Image-Text Correspondences in Remote Sensing",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2556-2560",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207994"}

@inproceedings{bb212915,
        AUTHOR = "Anwaar, M.U. and Labintcev, E. and Kleinsteuber, M.",
        TITLE = "Compositional Learning of Image-Text Query for Image Retrieval",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "1139-1148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207995"}

@inproceedings{bb212916,
        AUTHOR = "Messina, N. and Falchi, F. and Esuli, A. and Amato, G.",
        TITLE = "Transformer Reasoning Network for Image-Text Matching and Retrieval",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "5222-5229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207996"}

@inproceedings{bb212917,
        AUTHOR = "Zhang, Q. and Lei, Z. and Zhang, Z.X. and Li, S.Z.",
        TITLE = "Context-Aware Attention Network for Image-Text Retrieval",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "3533-3542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207997"}

@inproceedings{bb212918,
        AUTHOR = "Chen, Y.C. and Li, L.J. and Yu, L.C. and El Kholy, A. and Ahmed, F. and Gan, Z. and Cheng, Y. and Liu, J.J.",
        TITLE = "Uniter: Universal Image-Text Representation Learning",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXX: 104-120",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207998"}

@inproceedings{bb212919,
        AUTHOR = "Wang, H.R. and Zhang, Y. and Ji, Z. and Pang, Y.W. and Ma, L.",
        TITLE = "Consensus-aware Visual-semantic Embedding for Image-Text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207999"}

@inproceedings{bb212920,
        AUTHOR = "Chen, T.L. and Deng, J.J. and Luo, J.B.",
        TITLE = "Adaptive Offline Quintuplet Loss for Image-text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:549-565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT208000"}

@inproceedings{bb212921,
        AUTHOR = "Lee, K.H. and Chen, X. and Hua, G. and Hu, H.D. and He, X.D.",
        TITLE = "Stacked Cross Attention for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 212-228",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT208001"}

@inproceedings{bb212922,
        AUTHOR = "Zhang, Y. and Lu, H.C.",
        TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 707-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT208002"}

@inproceedings{bb212923,
        AUTHOR = "Plummer, B.A. and Kordas, P. and Kiapour, M.H. and Zheng, S. and Piramuthu, R. and Lazebnik, S.",
        TITLE = "Conditional Image-Text Embedding Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 258-274",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT208003"}

@article{bb212924,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208005"}

@inproceedings{bb212925,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208006"}

@article{bb212926,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208007"}

@article{bb212927,
        AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.",
        TITLE = "Grounding-Tracking-Integration",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3433-3443",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208008"}

@article{bb212928,
        AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.",
        TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate
Adversarial Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3469-3481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208009"}

@article{bb212929,
        AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.",
        TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "36",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "3-12",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208010"}

@article{bb212930,
        AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.",
        TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained
Model Decisions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4196-4202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208011"}

@article{bb212931,
        AUTHOR = "Yang, W.F. and Zhang, T.Z. and Zhang, Y.D. and Wu, F.",
        TITLE = "Local Correspondence Network for Weakly Supervised Temporal Sentence
Grounding",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "3252-3262",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208012"}

@inproceedings{bb212932,
        AUTHOR = "Luo, W. and Zhang, T.Z. and Yang, W.F. and Liu, J.G. and Mei, T. and Wu, F. and Zhang, Y.D.",
        TITLE = "Action Unit Memory Network for Weakly Supervised Temporal Action
Localization",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9964-9974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208013"}

@article{bb212933,
        AUTHOR = "Hong, R.C. and Liu, D. and Mo, X.Y. and He, X.N. and Zhang, H.W.",
        TITLE = "Learning to Compose and Reason with Language Tree Structures for
Visual Grounding",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "684-696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208014"}

@inproceedings{bb212934,
        AUTHOR = "Tang, K.H. and Zhang, H.W. and Wu, B.Y. and Luo, W.H. and Liu, W.",
        TITLE = "Learning to Compose Dynamic Tree Structures for Visual Contexts",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6612-6621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208015"}

@article{bb212935,
        AUTHOR = "Bin, Y. and Ding, Y.J. and Peng, B. and Peng, L. and Yang, Y. and Chua, T.S.",
        TITLE = "Entity Slot Filling for Visual Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "52-62",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208016"}

@article{bb212936,
        AUTHOR = "Chu, C. and Oliveira, V. and Virgo, F.G. and Otani, M. and Garcia, N. and Nakashima, Y.",
        TITLE = "The semantic typology of visually grounded paraphrases",
        JOURNAL = CVIU,
        VOLUME = "215",
        YEAR = "2022",
        PAGES = "103333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208017"}

@article{bb212937,
        AUTHOR = "Deng, C.R. and Wu, Q. and Wu, Q.Y. and Hu, F.Y. and Lyu, F. and Tan, M.K.",
        TITLE = "Visual Grounding Via Accumulated Attention",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1670-1684",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208018"}

@inproceedings{bb212938,
        AUTHOR = "Tan, M.K. and Lyu, F. and Hu, F.Y. and Wu, Q.Y. and Wu, Q. and Deng, C.R.",
        TITLE = "Visual Grounding Via Accumulated Attention",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7746-7755",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208018"}

@article{bb212939,
        AUTHOR = "Plummer, B.A. and Shih, K.J. and Li, Y.C. and Xu, K. and Lazebnik, S. and Sclaroff, S. and Saenko, K.",
        TITLE = "Revisiting Image-Language Networks for Open-Ended Phrase Detection",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2155-2167",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208019"}

@inproceedings{bb212940,
        AUTHOR = "Burns, A. and Tan, R. and Saenko, K. and Sclaroff, S. and Plummer, B.A.",
        TITLE = "Language Features Matter: Effective Language Representations for
Vision-Language Tasks",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7473-7482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208020"}

@inproceedings{bb212941,
        AUTHOR = "Arbelle, A. and Doveh, S. and Alfassy, A. and Shtok, J. and Lev, G. and Schwartz, E. and Kuehne, H. and Levi, H.B. and Sattigeri, P. and Panda, R. and Chen, C.F. and Bronstein, A.M. and Saenko, K. and Ullman, S. and Giryes, R. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Detector-Free Weakly Supervised Grounding by Separation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1781-1792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208021"}

@inproceedings{bb212942,
        AUTHOR = "Whitehead, S. and Wu, H. and Ji, H. and Feris, R.S. and Saenko, K.",
        TITLE = "Separating Skills and Concepts for Novel Visual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5628-5637",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208022"}

@article{bb212943,
        AUTHOR = "Yu, X.T. and Zhang, H.M. and Hong, R.X. and Song, Y.Q. and Zhang, C.S.",
        TITLE = "VD-PCR: Improving visual dialog with pronoun coreference resolution",
        JOURNAL = PR,
        VOLUME = "125",
        YEAR = "2022",
        PAGES = "108540",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208023"}

@article{bb212944,
        AUTHOR = "Yuan, Y.T. and Ma, L. and Wang, J.W. and Liu, W. and Zhu, W.W.",
        TITLE = "Semantic Conditioned Dynamic Modulation for Temporal Sentence
Grounding in Videos",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2725-2741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208024"}

@article{bb212945,
        AUTHOR = "He, S. and Yang, X.F. and Lin, G.S.",
        TITLE = "Learning language to symbol and language to vision mapping for visual
grounding",
        JOURNAL = IVC,
        VOLUME = "122",
        YEAR = "2022",
        PAGES = "104451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208025"}

@article{bb212946,
        AUTHOR = "Jiang, W.H. and Zhu, M. and Fang, Y.M. and Shi, G.M. and Zhao, X.W. and Liu, Y.",
        TITLE = "Visual Cluster Grounding for Image Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "3920-3934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208026"}

@article{bb212947,
        AUTHOR = "Liao, Y. and Zhang, A. and Chen, Z.Y. and Hui, T.R. and Liu, S.",
        TITLE = "Progressive Language-Customized Visual Feature Learning for One-Stage
Visual Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4266-4277",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208027"}

@article{bb212948,
        AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Huang, Z.Y. and Li, X.M. and Tang, M.Q. and Liu, T.L. and Gao, X.B.",
        TITLE = "Exploring Language Hierarchy for Video Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4693-4706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208028"}

@article{bb212949,
        AUTHOR = "Wang, Y. and Deng, J.J. and Zhou, W.G. and Li, H.Q.",
        TITLE = "Weakly Supervised Temporal Adjacent Network for Language Grounding",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "3276-3286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208029"}

@article{bb212950,
        AUTHOR = "Xu, Z. and Chen, D. and Wei, K. and Deng, C. and Xue, H.",
        TITLE = "HiSA: Hierarchically Semantic Associating for Video Temporal
Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5178-5188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208030"}

@article{bb212951,
        AUTHOR = "Gao, J.L. and Sun, X. and Ghanem, B. and Zhou, X. and Ge, S.M.",
        TITLE = "Efficient Video Grounding With Which-Where Reading Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6900-6913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208031"}

@article{bb212952,
        AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Hu, C.P. and Zhang, W.J.",
        TITLE = "Thinking Inside Uncertainty: Interest Moment Perception for Diverse
Temporal Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "7190-7203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208032"}

@article{bb212953,
        AUTHOR = "Tang, Z.H. and Liao, Y. and Liu, S. and Li, G.B. and Jin, X.J. and Jiang, H.X. and Yu, Q. and Xu, D.",
        TITLE = "Human-Centric Spatio-Temporal Video Grounding With Visual
Transformers",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8238-8249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208033"}

@article{bb212954,
        AUTHOR = "Tang, H.Y. and Zhu, J. and Wang, L. and Zheng, Q.H. and Zhang, T.W.",
        TITLE = "Multi-Level Query Interaction for Temporal Language Grounding",
        JOURNAL = ITS,
        VOLUME = "23",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "25479-25488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208034"}

@article{bb212955,
        AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Weakly-Supervised Video Object Grounding via Causal Intervention",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3933-3948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208035"}

@article{bb212956,
        AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Weakly-Supervised Video Object Grounding via Learning Uni-Modal
Associations",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6329-6340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208036"}

@article{bb212957,
        AUTHOR = "Nayyeri, M. and Xu, C.J. and Alam, M.M. and Lehmann, J. and Yazdi, H.S.",
        TITLE = "LogicENN: A Neural Based Knowledge Graphs Embedding Model With
Logical Rules",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7050-7062",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208037"}

@article{bb212958,
        AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.",
        TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New
Benchmark and Strong Baseline",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2935-2949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208038"}

@article{bb212959,
        AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.",
        TITLE = "Dual-decoder transformer network for answer grounding in visual
question answering",
        JOURNAL = PRL,
        VOLUME = "171",
        YEAR = "2023",
        PAGES = "53-60",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208039"}

@article{bb212960,
        AUTHOR = "Chen, T. and Wang, W. and Han, K. and Xu, H.J.",
        TITLE = "SaGCN: Semantic-Aware Graph Calibration Network for Temporal Sentence
Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3003-3016",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208040"}

@article{bb212961,
        AUTHOR = "Zhang, H. and Sun, A. and Jing, W. and Zhou, J.T.Y.",
        TITLE = "Temporal Sentence Grounding in Videos: A Survey and Future Directions",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "10443-10465",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208041"}

@article{bb212962,
        AUTHOR = "Deng, J.J. and Yang, Z.Y. and Liu, D. and Chen, T.L. and Zhou, W.G. and Zhang, Y. and Li, H.Q. and Ouyang, W.L.",
        TITLE = "TransVG++: End-to-End Visual Grounding With Language Conditioned
Vision Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "13636-13652",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208042"}

@inproceedings{bb212963,
        AUTHOR = "Deng, J.J. and Yang, Z.Y. and Chen, T.L. and Zhou, W.G. and Li, H.Q.",
        TITLE = "TransVG: End-to-End Visual Grounding with Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1749-1759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208043"}

@article{bb212964,
        AUTHOR = "Li, J.C. and Tang, S.L. and Zhu, L.C. and Zhang, W.Q. and Yang, Y. and Chua, T.S. and Wu, F. and Zhuang, Y.T.",
        TITLE = "Variational Cross-Graph Reasoning and Adaptive Structured Semantics
Learning for Compositional Temporal Grounding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12601-12617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208044"}

@inproceedings{bb212965,
        AUTHOR = "Li, J.C. and Xie, J.L. and Qian, L. and Zhu, L.C. and Tang, S.L. and Wu, F. and Yang, Y. and Zhuang, Y.T. and Wang, X.E.",
        TITLE = "Compositional Temporal Grounding with Structured Variational
Cross-Graph Correspondence Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3022-3031",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208045"}

@article{bb212966,
        AUTHOR = "Gonzalez, C. and Ayobi, N. and Hernandez, I. and Pont Tuset, J. and Arbelaez, P.",
        TITLE = "PiGLET:
Pixel-Level Grounding of Language Expressions With Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12206-12221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208046"}

@article{bb212967,
        AUTHOR = "Zhang, R.S. and Wang, C. and Liu, C.L.",
        TITLE = "Cycle-Consistent Weakly Supervised Visual Grounding With Individual
and Contextual Representations",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5167-5180",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208047"}

@article{bb212968,
        AUTHOR = "Wang, Y. and Su, Y.T. and Li, W.H. and Xiao, J. and Li, X.Y. and Liu, A.A.",
        TITLE = "Dual-Path Rare Content Enhancement Network for Image and Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6144-6158",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208048"}

@article{bb212969,
        AUTHOR = "Xu, Z. and Wei, K. and Yang, X. and Deng, C.",
        TITLE = "Point-Supervised Video Temporal Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6121-6131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208049"}

@article{bb212970,
        AUTHOR = "Luo, F. and Chen, S.X. and Chen, J.J. and Wu, Z. and Jiang, Y.G.",
        TITLE = "Self-Supervised Learning for Semi-Supervised Temporal Language
Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7747-7757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208050"}

@article{bb212971,
        AUTHOR = "Liu, D.Z. and Fang, X. and Hu, W. and Zhou, P.",
        TITLE = "Exploring Optical-Flow-Guided Motion and Detection-Based Appearance
for Temporal Sentence Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8539-8553",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208051"}

@article{bb212972,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Effective End-to-End Vision Language Pretraining With Semantic Visual
Loss",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8408-8417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208052"}

@article{bb212973,
        AUTHOR = "Ma, G.Q. and Bai, Y. and Zhang, W. and Yao, T. and Shihada, B. and Mei, T.",
        TITLE = "Boosting Generic Visual-Linguistic Representation With Dynamic
Contexts",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8445-8457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208053"}

@article{bb212974,
        AUTHOR = "Su, C. and Li, Z. and Lei, T.Y. and Peng, D.Z. and Wang, X.",
        TITLE = "MetaVG: A Meta-Learning Framework for Visual Grounding",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "236-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208054"}

@article{bb212975,
        AUTHOR = "Zeng, Y.W. and Han, N. and Pan, K.Y. and Jin, Q.",
        TITLE = "Temporally Language Grounding With Multi-Modal Multi-Prompt Tuning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3366-3377",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208055"}

@article{bb212976,
        AUTHOR = "Fang, X. and Liu, D. and Zhou, P. and Xu, Z. and Li, R.X.",
        TITLE = "Hierarchical Local-Global Transformer for Temporal Sentence Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3263-3277",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208056"}

@article{bb212977,
        AUTHOR = "Wang, Z. and Yang, C. and Jiang, B. and Yuan, J.S.",
        TITLE = "A Dual Reinforcement Learning Framework for Weakly Supervised Phrase
Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "394-405",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208057"}

@article{bb212978,
        AUTHOR = "Lu, Y. and Quan, R.J. and Zhu, L.C. and Yang, Y.",
        TITLE = "Zero-Shot Video Grounding With Pseudo Query Lookup and Verification",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1643-1654",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208058"}

@article{bb212979,
        AUTHOR = "Wang, W.K. and Su, Y.T. and Liu, J. and Jing, P.G.",
        TITLE = "Adaptive proposal network based on generative adversarial learning
for weakly supervised temporal sentence grounding",
        JOURNAL = PRL,
        VOLUME = "179",
        YEAR = "2024",
        PAGES = "9-16",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208059"}

@article{bb212980,
        AUTHOR = "Liu, D. and Zhu, J.H. and Fang, X. and Xiong, Z. and Wang, H. and Li, R. and Zhou, P.",
        TITLE = "Conditional Video Diffusion Network for Fine-Grained Temporal
Sentence Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "5461-5476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208060"}

@inproceedings{bb212981,
        AUTHOR = "Gong, R. and Huang, J. and Zhao, Y.Z. and Geng, H.R. and Gao, X.F. and Wu, Q.Y. and Ai, W. and Zhou, Z.H. and Terzopoulos, D. and Zhu, S.C. and Jia, B.X. and Huang, S.Y.",
        TITLE = "ARNOLD: A Benchmark for Language-Grounded Task Learning With
Continuous States in Realistic 3D Scenes",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20426-20438",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208061"}

@inproceedings{bb212982,
        AUTHOR = "Wu, Y. and Wei, Y. and Wang, H.Z. and Liu, Y.F. and Yang, S. and He, X.M.",
        TITLE = "Grounded Image Text Matching with Mismatched Relation Reasoning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2964-2975",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208062"}

@inproceedings{bb212983,
        AUTHOR = "Song, C.H. and Sadler, B.M. and Wu, J. and Chao, W.L. and Washington, C. and Su, Y.",
        TITLE = "LLM-Planner: Few-Shot Grounded Planning for Embodied Agents with
Large Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2986-2997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208063"}

@inproceedings{bb212984,
        AUTHOR = "Lee, C. and Kumar, M.G. and Tan, C.",
        TITLE = "DetermiNet: A Large-Scale Diagnostic Dataset for Complex
Visually-Grounded Referencing using Determiners",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "19962-19971",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208064"}

@inproceedings{bb212985,
        AUTHOR = "Lin, K.Q. and Zhang, P. and Chen, J. and Pramanick, S. and Gao, D.F. and Wang, A.J.P. and Yan, R. and Shou, M.Z.",
        TITLE = "UniVTG: Towards Unified Video-Language Temporal Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2782-2792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208065"}

@inproceedings{bb212986,
        AUTHOR = "Liu, Y. and Zhang, J.H. and Chen, Q.C. and Peng, Y.X.",
        TITLE = "Confidence-aware Pseudo-label Learning for Weakly Supervised Visual
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2816-2826",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208066"}

@inproceedings{bb212987,
        AUTHOR = "Khoshsirat, S. and Kambhamettu, C.",
        TITLE = "Sentence Attention Blocks for Answer Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "6057-6067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208067"}

@inproceedings{bb212988,
        AUTHOR = "Li, H.X. and Cao, M. and Cheng, X. and Li, Y. and Zhu, Z.H. and Zou, Y.X.",
        TITLE = "G2L: Semantically Aligned and Uniform Video Grounding via Geodesic
and Game Theory",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11998-12008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208068"}

@inproceedings{bb212989,
        AUTHOR = "Li, H. and Shu, X.J. and He, S. and Qiao, R.Z. and Wen, W. and Guo, T. and Gan, B. and Sun, X.",
        TITLE = "D3G: Exploring Gaussian Prior for Temporal Sentence Grounding with
Glance Annotation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13688-13700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208069"}

@inproceedings{bb212990,
        AUTHOR = "Pan, Y.L. and He, X.T. and Gong, B. and Lv, Y.L. and Shen, Y.J. and Peng, Y.X. and Zhao, D.L.",
        TITLE = "Scanning Only Once: An End-to-end Framework for Fast Temporal
Grounding in Long Videos",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13721-13731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208070"}

@inproceedings{bb212991,
        AUTHOR = "Jang, J. and Park, J. and Kim, J. and Kwon, H. and Sohn, K.H.",
        TITLE = "Knowing Where to Focus: Event-aware Transformer for Video Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13800-13810",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208071"}

@inproceedings{bb212992,
        AUTHOR = "Zhang, Y.M. and Gong, Z. and Chang, A.X.",
        TITLE = "Multi3DRefer: Grounding Text Description to Multiple 3D Objects",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15179-15179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208072"}

@inproceedings{bb212993,
        AUTHOR = "Chen, C. and Anjum, S. and Gurari, D.",
        TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15269-15279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208073"}

@inproceedings{bb212994,
        AUTHOR = "Li, H. and Wei, P. and Ma, Z. and Zheng, N.N.",
        TITLE = "Inverse Compositional Learning for Weakly-supervised Relation
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15431-15441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208074"}

@inproceedings{bb212995,
        AUTHOR = "Chen, D.Z.Y. and Hu, R. and Chen, X.L. and Nießner, M. and Chang, A.X.",
        TITLE = "UniT3D: A Unified Transformer for 3D Dense Captioning and Visual
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "18063-18073",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208075"}

@inproceedings{bb212996,
        AUTHOR = "de la Jara, I.M. and Rodriguez Opazo, C. and Marrese Taylor, E. and Bravo Marquez, F.",
        TITLE = "An empirical study of the effect of video encoders on Temporal Video
Grounding",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2842-2847",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208076"}

@inproceedings{bb212997,
        AUTHOR = "Wang, Z. and Huang, H.F. and Zhao, Y. and Li, L.J. and Cheng, X. and Zhu, Y.C. and Yin, A. and Zhao, Z.",
        TITLE = "Distilling Coarse-to-Fine Semantic Matching Knowledge for Weakly
Supervised 3D Visual Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2662-2671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208077"}

@inproceedings{bb212998,
        AUTHOR = "Guo, Z. and Tang, Y. and Zhang, R. and Wang, D. and Wang, Z.G. and Zhao, B. and Li, X.L.",
        TITLE = "ViewRefer: Grasp the Multi-view Knowledge for 3D Visual Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15326-15337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208078"}

@inproceedings{bb212999,
        AUTHOR = "Li, M. and Wang, C.L. and Feng, W. and Lyu, S.C. and Cheng, G.L. and Li, X.T. and Liu, B. and Zhao, Q.",
        TITLE = "Iterative Robust Visual Grounding with Masked Reference based
Centerpoint Supervision",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4653-4658",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT208079"}

Last update:Apr 10, 2024 at 09:54:40