@article{bb246000,
        AUTHOR = "Zheng, Z.Q. and Ren, H. and Wu, Y. and Zhang, W.C. and Lu, H. and Yang, Y. and Shen, H.T.",
        TITLE = "Fully Unsupervised Domain-Agnostic Image Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5077-5090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240886"}

@article{bb246001,
        AUTHOR = "Zhang, J.Z. and Wang, L. and Zheng, F.Z. and Wang, X. and Zhang, H.",
        TITLE = "An Enhanced Feature Extraction Framework for Cross-Modal Image-Text
Retrieval",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "12",
        PAGES = "2201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240887"}

@article{bb246002,
        AUTHOR = "Cheng, Q.R. and Tan, Z.S. and Wen, K.Y. and Chen, C. and Gu, X.D.",
        TITLE = "Semantic Pre-Alignment and Ranking Learning With Unified Framework
for Cross-Modal Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6503-6516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240888"}

@article{bb246003,
        AUTHOR = "Xue, P. and Niu, S.",
        TITLE = "A novel active contour model based on features for image segmentation",
        JOURNAL = PR,
        VOLUME = "155",
        YEAR = "2024",
        PAGES = "110673",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240889"}

@article{bb246004,
        AUTHOR = "Yan, J. and Deng, C. and Huang, H. and Liu, W.",
        TITLE = "Causality-Invariant Interactive Mining for Cross-Modal Similarity
Learning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "6216-6230",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240890"}

@article{bb246005,
        AUTHOR = "Wu, W.J. and Zhao, Y.Z. and Li, Z. and Li, J.H. and Zhou, H. and Shou, M.Z. and Bai, X.",
        TITLE = "A large cross-modal video retrieval dataset with reading
comprehension",
        JOURNAL = PR,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "110818",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240891"}

@article{bb246006,
        AUTHOR = "Yuan, Z. and Wu, D. and Zhou, L.",
        TITLE = "Achieving the Optimum Rate for Cross-Modal Source Coding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "9722-9735",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240892"}

@article{bb246007,
        AUTHOR = "Chen, R. and Tan, J.P. and Yang, Z.J. and Yang, X.J. and Dai, Q.Y. and Cheng, Y.Q. and Lin, L.",
        TITLE = "DPHANet: Discriminative Parallel and Hierarchical Attention Network
for Natural Language Video Localization",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "9575-9590",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240893"}

@article{bb246008,
        AUTHOR = "Zheng, A. and Yuan, F. and Zhang, H. and Wang, J.X. and Tang, C. and Li, C.L.",
        TITLE = "Public-Private Attributes-Based Variational Adversarial Network for
Audio-Visual Cross-Modal Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "8698-8709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240894"}

@article{bb246009,
        AUTHOR = "Li, D. and Du, S.L.",
        TITLE = "ContextMatcher: Detector-Free Feature Matching With Cross-Modality
Context",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "7922-7934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240895"}

@article{bb246010,
        AUTHOR = "Zhang, F. and Zhou, H. and Hua, X.S. and Chen, C. and Luo, X.",
        TITLE = "HOPE: A Hierarchical Perspective for Semi-Supervised 2D-3D
Cross-Modal Retrieval",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8976-8993",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240896"}

@article{bb246011,
        AUTHOR = "Zhu, Y. and Wu, Y. and Sebe, N. and Yan, Y.",
        TITLE = "Vision + X: A Survey on Multimodal Learning in the Light of Data",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9102-9122",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240897"}

@article{bb246012,
        AUTHOR = "Li, Z. and Guo, C. and Wang, X. and Zhang, H. and Hu, L.",
        TITLE = "Multi-View Visual Semantic Embedding for Cross-Modal Image-Text
Retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111088",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240898"}

@article{bb246013,
        AUTHOR = "Jin, M. and Hu, W.B. and Zhu, L. and Wang, X. and Hong, R.C.",
        TITLE = "Based on Spatial and Temporal Implicit Semantic Relational Inference
for Cross-Modal Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11286-11298",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240899"}

@article{bb246014,
        AUTHOR = "Croitoru, I. and Bogolin, S.V. and Leordeanu, M. and Jin, H.L. and Zisserman, A. and Liu, Y. and Albanie, S.",
        TITLE = "TeachText: CrossModal text-video retrieval through generalized
distillation",
        JOURNAL = AI,
        VOLUME = "338",
        YEAR = "2025",
        PAGES = "104235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240900"}

@inproceedings{bb246015,
        AUTHOR = "Croitoru, I. and Bogolin, S.V. and Leordeanu, M. and Jin, H.L. and Zisserman, A. and Albanie, S. and Liu, Y.",
        TITLE = "TeachText:
CrossModal Generalized Distillation for Text-Video Retrieval",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11563-11573",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240901"}

@article{bb246016,
        AUTHOR = "Wang, T. and Li, F.L. and Zhu, L. and Li, J.J. and Zhang, Z. and Shen, H.T.",
        TITLE = "Cross-Modal Retrieval: A Systematic Review of Methods and Future
Directions",
        JOURNAL = PIEEE,
        VOLUME = "112",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1716-1754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240902"}

@article{bb246017,
        AUTHOR = "Luo, J.Y. and Zhao, Y.S. and Luo, X. and Xiao, Z.P. and Ju, W. and Shen, L. and Tao, D.C. and Zhang, M.",
        TITLE = "Cross-Domain Diffusion With Progressive Alignment for Efficient
Adaptive Retrieval",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "1820-1834",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240903"}

@article{bb246018,
        AUTHOR = "Zhang, H.W. and Yang, Y. and Qi, F. and Qian, S.S. and Xu, C.S.",
        TITLE = "Active Supervised Cross-Modal Retrieval",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5112-5126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240904"}

@article{bb246019,
        AUTHOR = "Dang, Z.H. and Luo, M. and Wang, J.H. and Jia, C.Y. and Han, H.C. and Wan, H. and Dai, G. and Chang, X.J. and Wang, J.D.",
        TITLE = "Disentangled Noisy Correspondence Learning",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "2602-2615",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240905"}

@article{bb246020,
        AUTHOR = "Si, L. and Guo, C. and Li, Z. and Yang, Y.",
        TITLE = "A unified framework of data augmentation using large language models
for text-based cross-modal retrieval",
        JOURNAL = PR,
        VOLUME = "167",
        YEAR = "2025",
        PAGES = "111755",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240906"}

@article{bb246021,
        AUTHOR = "Jin, M. and Hu, W.B. and Hong, R.C. and Zhu, L.",
        TITLE = "Revealing Security Flaws in Cross-Modal Retrieval Models Through
Video Poisoning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "6184-6194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240907"}

@article{bb246022,
        AUTHOR = "Li, Y. and Deng, S. and Guan, C.M. and Gao, J.",
        TITLE = "Complementary two-branch Transformer for multi-label image retrieval",
        JOURNAL = PR,
        VOLUME = "168",
        YEAR = "2025",
        PAGES = "111806",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240908"}

@article{bb246023,
        AUTHOR = "Zheng, C.Y. and Li, X. and Liang, X.Y. and Huang, L. and Du, S. and Nie, J. and Dong, J.Y.",
        TITLE = "Cross-Modal Progressive Perspective Matching Network for Remote
Sensing Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "3966-3978",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240909"}

@article{bb246024,
        AUTHOR = "Pu, R. and Qin, Y. and Peng, D.Z. and Song, X.M. and Zheng, H.M.",
        TITLE = "Deep Reversible Consistency Learning for Cross-Modal Retrieval",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "4095-4106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240910"}

@article{bb246025,
        AUTHOR = "Xu, Y. and Feng, Y.F. and Zhong, X. and Gao, Y. and Wu, Z.Z.",
        TITLE = "Hypergraph-Based Remaining Prototype Alignment for Open-Set
Cross-Domain Image Retrieval",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "4627-4642",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240911"}

@article{bb246026,
        AUTHOR = "Jiang, C. and Wang, Y.P. and Xiong, B.P.",
        TITLE = "Dual similarity enhanced hybrid orthogonal fusion for multimodal
named entity recognition",
        JOURNAL = PR,
        VOLUME = "169",
        YEAR = "2026",
        PAGES = "111940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240912"}

@article{bb246027,
        AUTHOR = "Wang, Z. and Zhu, X.Z. and Yang, X. and Luo, G. and Li, H. and Tian, C.Y. and Dou, W.H. and Ge, J.Q. and Lu, L.W. and Qiao, Y. and Dai, J.F.",
        TITLE = "Parameter-Inverted Image Pyramid Networks for Visual Perception and
Multimodal Understanding",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10142-10159",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240913"}

@article{bb246028,
        AUTHOR = "Jin, M. and Zhu, L. and Hong, R.C.",
        TITLE = "BiSeR-LMA: A Bidirectional Semantic Reasoning and Large Model
Enhancement Approach for Text-Video Cross-Modal Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11655-11666",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240914"}

@inproceedings{bb246029,
        AUTHOR = "Gizdov, A. and Ullman, S. and Harari, D.",
        TITLE = "Seeing more with less: human-like representations in vision models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4408-4417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240915"}

@inproceedings{bb246030,
        AUTHOR = "Liu, Y.K. and Zhang, Y.J. and Cai, J.Y. and Jiang, X.L. and Hu, Y. and Yao, J.C. and Wang, Y.F. and Xie, W.",
        TITLE = "LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4015-4025",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240916"}

@inproceedings{bb246031,
        AUTHOR = "Chen, W. and Li, L. and Yang, Y.Q. and Wen, B. and Yang, F. and Gao, T.T. and Wu, Y. and Chen, L.",
        TITLE = "CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal
Understanding and Generation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8073-8082",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240917"}

@inproceedings{bb246032,
        AUTHOR = "Kim, S. and Zhu, X.L. and Lin, X.F. and Bastan, M. and Gray, D. and Kwak, S.",
        TITLE = "GENIUS: A Generative Framework for Universal Multimodal Search",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19659-19669",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240918"}

@inproceedings{bb246033,
        AUTHOR = "Duan, S.Y. and Sun, Y. and Peng, D.Z. and Liu, Z. and Song, X.M. and Hu, P.",
        TITLE = "Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "20747-20756",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240919"}

@inproceedings{bb246034,
        AUTHOR = "Hur, C. and Hong, J.H. and Lee, D.H. and Kang, D. and Myeong, S. and Park, S.H. and Park, H.",
        TITLE = "Narrating the Video: Boosting Text-Video Retrieval via Comprehensive
Utilization of Frame-Level Captions",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24077-24086",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240920"}

@inproceedings{bb246035,
        AUTHOR = "Zha, Q.X. and Liu, X. and Peng, S.J. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
        TITLE = "ReCon: Enhancing True Correspondence Discrimination through Relation
Consistency for Robust Noisy Correspondence Learning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29680-29689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240921"}

@inproceedings{bb246036,
        AUTHOR = "Cui, Y.H. and Zu, X.X. and Zhang, W.H. and Zhao, Z.Z. and Gao, J.Y.",
        TITLE = "Incorporating Dense Knowledge Alignment into Unified Multimodal
Representation Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29733-29743",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240922"}

@inproceedings{bb246037,
        AUTHOR = "Lin, Z.R. and Wang, Z. and Qian, T.W. and Mu, P. and Chan, S. and Bai, C.",
        TITLE = "NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9263-9273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240923"}

@inproceedings{bb246038,
        AUTHOR = "Zhang, X. and Zhang, Y.Z. and Xie, W. and Li, M.X. and Dai, Z.Q. and Long, D.K. and Xie, P.J. and Zhang, M. and Li, W.J. and Zhang, M.",
        TITLE = "Bridging Modalities: Improving Universal Multimodal Retrieval by
Multimodal Large Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9274-9285",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240924"}

@inproceedings{bb246039,
        AUTHOR = "Zhao, S. and Xia, Q.M. and Guo, X. and Zou, P. and Zheng, M. and Wu, H. and Wen, C. and Wang, C.",
        TITLE = "SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate
Cross-Modal Semantic Prompts",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29374-29384",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240925"}

@inproceedings{bb246040,
        AUTHOR = "Wei, C. and Chen, Y. and Chen, H. and Hu, H.X. and Zhang, G. and Fu, J. and Ritter, A. and Chen, W.",
        TITLE = "UNIIR: Training and Benchmarking Universal Multimodal Information
Retrievers",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXXVII: 387-404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240926"}

@inproceedings{bb246041,
        AUTHOR = "Chen, S.J. and Chen, X. and Zhang, C. and Li, M.S. and Yu, G. and Fei, H. and Zhu, H.Y. and Fan, J.Y. and Chen, T.",
        TITLE = "LL3DA: Visual Interactive Instruction Tuning for Omni-3D
Understanding, Reasoning, and Planning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26418-26428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240927"}

@inproceedings{bb246042,
        AUTHOR = "Xu, H.R. and Peng, P.X. and Tan, G. and Li, Y. and Xu, X.H. and Tian, Y.H.",
        TITLE = "DMR: Decomposed Multi-Modality Representations for Frames and Events
Fusion in Visual Reinforcement Learning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26498-26508",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240928"}

@inproceedings{bb246043,
        AUTHOR = "You, C.Y. and Mint, Y.F. and Dai, W.C. and Sekhon, J.S. and Staib, L. and Duncan, J.S.",
        TITLE = "Calibrating Multi-modal Representations:
A Pursuit of Group Robustness without Annotations",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26140-26150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240929"}

@inproceedings{bb246044,
        AUTHOR = "Zhang, Z.H. and Cao, S.C. and Wang, Y.X.",
        TITLE = "TAMM: TriAdapter Multi-Modal Learning for 3D Shape Understanding",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "21413-21423",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240930"}

@inproceedings{bb246045,
        AUTHOR = "Zhao, Z. and Chen, M.X. and Dai, T.J. and Yao, J.C. and Han, B. and Zhang, Y. and Wang, Y.F.",
        TITLE = "Mitigating Noisy Correspondence by Geometrical Structure Consistency
Learning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "27371-27380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240931"}

@inproceedings{bb246046,
        AUTHOR = "Tuzcuoglu, O. and Koksal, A. and Sofu, B. and Kalkan, S. and Alatan, A.A.",
        TITLE = "XoFTR: Cross-modal Feature Matching Transformer",
        BOOKTITLE = IMW24,
        YEAR = "2024",
        PAGES = "4275-4286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240932"}

@inproceedings{bb246047,
        AUTHOR = "Wu, J.L. and Hu, X. and Wang, Y.Q. and Pang, B. and Soricut, R.",
        TITLE = "Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture
of Low-Rank Experts",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14205-14215",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240933"}

@inproceedings{bb246048,
        AUTHOR = "Sun, Q. and Cui, Y.F. and Zhang, X.S. and Zhang, F. and Yu, Q. and Wang, Y.Z. and Rao, Y.M. and Liu, J.J. and Huang, T.J. and Wang, X.L.",
        TITLE = "Generative Multimodal Models are In-Context Learners",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14398-14409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240934"}

@inproceedings{bb246049,
        AUTHOR = "Zhao, S.T. and Li, Z.W. and Lu, Y.D. and Yuille, A.L. and Wang, Y.",
        TITLE = "Causal-CoG: A Causal-Effect Look at Context Generation for Boosting
Multi-Modal Language Models",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13342-13351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240935"}

@inproceedings{bb246050,
        AUTHOR = "Li, Z. and Yang, B. and Liu, Q. and Ma, Z.Y. and Zhang, S. and Yang, J.X. and Sun, Y. and Liu, Y.L. and Bai, X.",
        TITLE = "Monkey: Image Resolution and Text Label are Important Things for
Large Multi-Modal Models",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26753-26763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240936"}

@inproceedings{bb246051,
        AUTHOR = "Han, H.C. and Zheng, Q.H. and Dai, G. and Luo, M. and Wang, J.D.",
        TITLE = "Learning to Rematch Mismatched Pairs for Robust Cross-Modal Retrieval",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26669-26678",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240937"}

@inproceedings{bb246052,
        AUTHOR = "Yuan, J.L. and Yu, Y. and Mittal, G. and Hall, M. and Sajeev, S. and Chen, M.",
        TITLE = "Rethinking Multimodal Content Moderation from an Asymmetric Angle
with Mixed-modality",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "8517-8527",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240938"}

@inproceedings{bb246053,
        AUTHOR = "Shoshan, A. and Linial, O. and Bhonker, N. and Hirsch, E. and Zamir, L. and Kviatkovsky, I. and Medioni, G.",
        TITLE = "Asymmetric Image Retrieval with Cross Model Compatible Ensembles",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "1-11",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240939"}

@inproceedings{bb246054,
        AUTHOR = "Honig, R. and Ackermann, J. and Chi, M.Y.",
        TITLE = "Bi-Encoder Cascades for Efficient Image Search",
        BOOKTITLE = REDLCV23,
        YEAR = "2023",
        PAGES = "1350-1355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240940"}

@inproceedings{bb246055,
        AUTHOR = "Cao, Y.C. and Tang, Q.F. and Yang, F. and Su, X. and You, S. and Lu, X.B. and Xu, C.",
        TITLE = "Re-mine, Learn and Reason: Exploring the Cross-modal Semantic
Correlations for Language-guided HOI detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "23435-23446",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240941"}

@inproceedings{bb246056,
        AUTHOR = "Trinci, T. and Bianconcini, T. and Sarti, L. and Taccari, L. and Sambo, F.",
        TITLE = "Cross-model temporal cooperation via saliency maps for efficient
frame classification",
        BOOKTITLE = REDLCV23,
        YEAR = "2023",
        PAGES = "1156-1160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240942"}

@inproceedings{bb246057,
        AUTHOR = "Long, T. and van Noord, N.",
        TITLE = "Cross-modal Scalable Hyperbolic Hierarchical Clustering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16609-16618",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240943"}

@inproceedings{bb246058,
        AUTHOR = "Li, H. and Li, X.Y. and Hu, P. and Lei, Y. and Li, C.X. and Zhou, Y.",
        TITLE = "Boosting Multi-modal Model Performance with Adaptive Gradient
Modulation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22157-22167",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240944"}

@inproceedings{bb246059,
        AUTHOR = "Zhao, L.J. and Wang, Y. and Kato, J.",
        TITLE = "Using Classifier Discrepancy for Cross-Domain Image Retrieval",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "3314-3318",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240945"}

@inproceedings{bb246060,
        AUTHOR = "Era, Y. and Togo, R. and Maeda, K. and Ogawa, T. and Haseyama, M.",
        TITLE = "Video-Music Retrieval with Fine-Grained Cross-Modal Alignment",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "2005-2009",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240946"}

@inproceedings{bb246061,
        AUTHOR = "Yu, Y. and Chung, J. and Yun, H. and Hessel, J. and Park, J.S. and Lu, X.M. and Zellers, R. and Ammanabrolu, P. and Le Bras, R. and Kim, G. and Choi, Y.",
        TITLE = "Fusing Pre-Trained Language Models with Multimodal Prompts through
Reinforcement Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10845-10856",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240947"}

@inproceedings{bb246062,
        AUTHOR = "Huang, S. and Gong, B. and Pan, Y.L. and Jiang, J.W. and Lv, Y.L. and Li, Y.Y. and Wang, D.L.",
        TITLE = "VoP: Text-Video Co-Operative Prompt Tuning for Cross-Modal Retrieval",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6565-6574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240948"}

@inproceedings{bb246063,
        AUTHOR = "Chen, M.X. and Xing, L.Y. and Wang, Y. and Zhang, X.",
        TITLE = "Enhanced Multimodal Representation Learning with Cross-Modal KD",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11766-11775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240949"}

@inproceedings{bb246064,
        AUTHOR = "Yang, S. and Xu, Z. and Wang, K. and You, Y. and Yao, H.X. and Liu, T.L. and Xu, M.",
        TITLE = "BiCro: Noisy Correspondence Rectification for Multi-modality Data via
Bi-directional Cross-modal Similarity Consistency",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19883-19892",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240950"}

@inproceedings{bb246065,
        AUTHOR = "Kim, D. and Kim, N. and Kwak, S.",
        TITLE = "Improving Cross-Modal Retrieval with Set of Diverse Embeddings",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23422-23431",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240951"}

@inproceedings{bb246066,
        AUTHOR = "Kim, J.M. and Koepke, A.S. and Schmid, C. and Akata, Z.",
        TITLE = "Exposing and Mitigating Spurious Correlations for Cross-Modal
Retrieval",
        BOOKTITLE = MULA23,
        YEAR = "2023",
        PAGES = "2585-2595",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240952"}

@inproceedings{bb246067,
        AUTHOR = "Tran, V. and Balasubramanian, N. and Hoai, M.",
        TITLE = "From Within to Between: Knowledge Distillation for Cross Modality
Retrieval",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:605-622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240953"}

@inproceedings{bb246068,
        AUTHOR = "Fragomeni, A. and Wray, M. and Damen, D.",
        TITLE = "Contra: (con)text (tra)nsformer for Cross-modal Video Retrieval",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:451-468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240954"}

@inproceedings{bb246069,
        AUTHOR = "Zheng, Y.C. and Zhang, X.W.",
        TITLE = "Heterogeneous Interactive Learning Network for Unsupervised Cross-modal
Retrieval",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:692-707",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240955"}

@inproceedings{bb246070,
        AUTHOR = "Arnold, R. and Sauter, L. and Schuldt, H.",
        TITLE = "Free-Form Multi-Modal Multimedia Retrieval (4MR)",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 678-683",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240956"}

@inproceedings{bb246071,
        AUTHOR = "Xuan, H. and Chen, X.S.",
        TITLE = "Dissecting Deep Metric Learning Losses for Image-Text Retrieval",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2163-2172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240957"}

@inproceedings{bb246072,
        AUTHOR = "Ge, X. and Chen, F. and Xu, S. and Tao, F. and Jose, J.M.",
        TITLE = "Cross-modal Semantic Enhanced Interaction for Image-Sentence
Retrieval",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1022-1031",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240958"}

@inproceedings{bb246073,
        AUTHOR = "Jawade, B. and Mohan, D.D. and Ali, N.M. and Setlur, S. and Govindaraju, V.",
        TITLE = "NAPReg: Nouns As Proxies Regularization for Semantically Aware
Cross-Modal Embeddings",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1135-1144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240959"}

@inproceedings{bb246074,
        AUTHOR = "Nakatsuka, T. and Hamasaki, M. and Goto, M.",
        TITLE = "Content-Based Music-Image Retrieval Using Self- and Cross-Modal
Feature Embedding Memory",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2173-2183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240960"}

@inproceedings{bb246075,
        AUTHOR = "Chen, Y.X. and Yuan, J.B. and Zhao, L. and Chen, T.L. and Luo, R. and Davis, L. and Metaxas, D.N.",
        TITLE = "More Than Just Attention: Improving Cross-Modal Attentions with
Contrastive Constraints for Image-Text Matching",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4421-4429",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240961"}

@inproceedings{bb246076,
        AUTHOR = "Agarwal, A. and Karanam, S. and Srinivasan, B.V. and Banerjee, B.",
        TITLE = "Contrastive Learning of Semantic Concepts for Open-set Cross-domain
Retrieval",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4104-4113",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240962"}

@inproceedings{bb246077,
        AUTHOR = "Yang, Y. and Shen, H. and Yang, M.",
        TITLE = "Relation-Guided Network for Image-Text Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1856-1860",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240963"}

@inproceedings{bb246078,
        AUTHOR = "Sumbul, G. and Muller, M. and Demir, B.",
        TITLE = "A Novel Self-Supervised Cross-Modal Image Retrieval Method in Remote
Sensing",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2426-2430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240964"}

@inproceedings{bb246079,
        AUTHOR = "Wang, H. and Zhang, J.P. and Chen, Y.H. and Ma, C.B. and Avery, J. and Hull, L. and Carneiro, G.",
        TITLE = "Uncertainty-Aware Multi-modal Learning via Cross-Modal Random Network
Prediction",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:200-217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240965"}

@inproceedings{bb246080,
        AUTHOR = "de Almeida, L.B. and Valem, L.P. and Pedronette, D.C.G.",
        TITLE = "Graph Convolutional Networks and Manifold Ranking for Multimodal
Video Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2811-2815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240966"}

@inproceedings{bb246081,
        AUTHOR = "Liang, T. and Lin, G.S. and Wan, M.Y. and Li, T.R. and Ma, G.J. and Lv, F.M.",
        TITLE = "Expanding Large Pre-trained Unimodal Models with Multimodal
Information Injection for Image-Text Multimodal Classification",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15471-15480",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240967"}

@inproceedings{bb246082,
        AUTHOR = "Yang, J.H. and Chen, X.Y. and Jiang, M. and Chen, S. and Wang, L. and Zhao, Q.",
        TITLE = "VisualHow: Multimodal Problem Solving",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15606-15616",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240968"}

@inproceedings{bb246083,
        AUTHOR = "Girdhar, R. and Singh, M. and Ravi, N. and van der Maaten, L. and Joulin, A. and Misra, I.",
        TITLE = "Omnivore: A Single Model for Many Visual Modalities",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16081-16091",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240969"}

@inproceedings{bb246084,
        AUTHOR = "Ma, M.M. and Ren, J. and Zhao, L. and Testuggine, D. and Peng, X.",
        TITLE = "Are Multimodal Transformers Robust to Missing Modality?",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18156-18165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240970"}

@inproceedings{bb246085,
        AUTHOR = "Han, Z.B. and Yang, F. and Huang, J.Z. and Zhang, C.Q. and Yao, J.H.",
        TITLE = "Multimodal Dynamics: Dynamical Fusion for Trustworthy Multimodal
Classification",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "20675-20685",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240971"}

@inproceedings{bb246086,
        AUTHOR = "Gupta, V. and Mittal, T. and Mathur, P. and Mishra, V. and Maheshwari, M. and Bera, A. and Mukherjee, D. and Manocha, D.",
        TITLE = "3MASSIV: Multilingual, Multimodal and Multi-Aspect dataset of Social
Media Short Videos",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "21032-21043",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240972"}

@inproceedings{bb246087,
        AUTHOR = "Bogolin, S.V. and Croitoru, I. and Jin, H.L. and Liu, Y. and Albanie, S.",
        TITLE = "Cross Modal Retrieval with Querybank Normalisation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5184-5195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240973"}

@inproceedings{bb246088,
        AUTHOR = "Yang, E. and Yao, D.R. and Liu, T.L. and Deng, C.",
        TITLE = "Mutual Quantization for Cross-Modal Search with Noisy Labels",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7541-7550",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240974"}

@inproceedings{bb246089,
        AUTHOR = "Neculai, A. and Chen, Y.B. and Akata, Z.",
        TITLE = "Probabilistic Compositional Embeddings for Multimodal Image Retrieval",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4546-4556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240975"}

@inproceedings{bb246090,
        AUTHOR = "Couairon, G. and Douze, M. and Cord, M. and Schwenk, H.",
        TITLE = "Embedding Arithmetic of Multimodal Queries for Image Retrieval",
        BOOKTITLE = ODRUM22,
        YEAR = "2022",
        PAGES = "4946-4954",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240976"}

@inproceedings{bb246091,
        AUTHOR = "Li, Y.H. and Yu, J. and Cai, Z.P. and Pan, Y.",
        TITLE = "Cross-modal Target Retrieval for Tracking by Natural Language",
        BOOKTITLE = ODRUM22,
        YEAR = "2022",
        PAGES = "4927-4936",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240977"}

@inproceedings{bb246092,
        AUTHOR = "Thomas, C. and Kovashka, A.",
        TITLE = "Emphasizing Complementary Samples for Non-literal Cross-modal
Retrieval",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4631-4640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240978"}

@inproceedings{bb246093,
        AUTHOR = "Xu, B. and Xiong, Y.H. and Zhang, R. and Feng, Y. and Wu, H.F.",
        TITLE = "Natural Language-Based Vehicle Retrieval with Explicit Cross-Modal
Representation Learning",
        BOOKTITLE = AICity22,
        YEAR = "2022",
        PAGES = "3141-3148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240979"}

@inproceedings{bb246094,
        AUTHOR = "Shvetsova, N. and Chen, B. and Rouditchenko, A. and Thomas, S. and Kingsbury, B. and Feris, R.S. and Harwath, D. and Glass, J. and Kuehne, H.",
        TITLE = "Everything at Once - Multi-modal Fusion Transformer for Video
Retrieval",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19988-19997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240980"}

@inproceedings{bb246095,
        AUTHOR = "Andonian, A. and Chen, S.X. and Hamid, R.",
        TITLE = "Robust Cross-Modal Representation Learning with Progressive
Self-Distillation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16409-16420",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240981"}

@inproceedings{bb246096,
        AUTHOR = "Lu, H.Y. and Fei, N. and Huo, Y.Q. and Gao, Y.Z. and Lu, Z.W. and Wen, J.R.",
        TITLE = "COTS: Collaborative Two-Stream Vision-Language Pre-Training Model for
Cross-Modal Retrieval",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15671-15680",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240982"}

@inproceedings{bb246097,
        AUTHOR = "Abdelnabi, S. and Hasan, R. and Fritz, M.",
        TITLE = "Open-Domain, Content-based, Multi-modal Fact-checking of
Out-of-Context Images via Online Resources",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14920-14929",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240983"}

@inproceedings{bb246098,
        AUTHOR = "Wang, Y. and Zhang, T. and Zhang, X. and Cui, Z. and Huang, Y. and Shen, P.C. and Li, S.X. and Yang, J.",
        TITLE = "Wasserstein Coupled Graph Learning for Cross-Modal Retrieval",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1793-1802",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240984"}

@inproceedings{bb246099,
        AUTHOR = "Cai, G.Y. and Zhang, J. and Jiang, X.Y. and Gong, Y.F. and He, L.H. and Yu, F. and Peng, P. and Guo, X.W. and Huang, F.Y. and Sun, X.",
        TITLE = "Ask amp;Confirm: Active Detail Enriching for Cross-Modal Retrieval
with Partial Query",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1815-1824",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT240985"}

Last update:Nov 26, 2025 at 20:24:09