@inproceedings{bb242000,
        AUTHOR = "Cheng, H.K. and Ishii, M. and Hayakawa, A. and Shibuya, T. and Schwing, A. and Mitsufuji, Y.",
        TITLE = "MMAudio: Taming Multimodal Joint Training for High-Quality
Video-to-Audio Synthesis",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "28901-28911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236904"}

@inproceedings{bb242001,
        AUTHOR = "Lee, M.J. and Gong, D. and Cho, M.",
        TITLE = "Video Summarization with Large Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "18981-18991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236905"}

@inproceedings{bb242002,
        AUTHOR = "Pasca, R.G. and Gavryushin, A. and Hamza, M. and Kuo, Y.L. and Mo, K. and Van Gool, L.J. and Hilliges, O. and Wang, X.",
        TITLE = "Summarize the Past to Predict the Future: Natural Language
Descriptions of Context Boost Multimodal Object Interaction
Anticipation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18286-18296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236906"}

@inproceedings{bb242003,
        AUTHOR = "Qiu, J.L. and Zhu, J.C. and Han, W. and Kumar, A. and Mittal, K. and Jin, C. and Yang, Z.Y. and Li, L.J. and Wang, J.F. and Zhao, D. and Li, B. and Wang, L.J.",
        TITLE = "MMSum: A Dataset for Multimodal Summarization and Thumbnail
Generation of Videos",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "21909-21921",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236907"}

@inproceedings{bb242004,
        AUTHOR = "He, B. and Wang, J. and Qiu, J.L. and Bui, T. and Shrivastava, A. and Wang, Z.W.",
        TITLE = "Align and Attend:
Multimodal Summarization with Dual Contrastive Losses",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14867-14878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236908"}

@inproceedings{bb242005,
        AUTHOR = "Li, H.P. and Ke, Q.H. and Gong, M.M. and Drummond, T.",
        TITLE = "Progressive Video Summarization via Multimodal Self-supervised
Learning",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "5573-5582",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236909"}

@inproceedings{bb242006,
        AUTHOR = "Elfeki, M. and Wang, L.Q. and Borji, A.",
        TITLE = "Multi-stream dynamic video Summarization",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "185-195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236910"}

@inproceedings{bb242007,
        AUTHOR = "Panda, R. and Das, A. and Roy Chowdhury, A.K.",
        TITLE = "Embedded sparse coding for summarizing multi-view videos",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "191-195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236911"}

@inproceedings{bb242008,
        AUTHOR = "Das, P. and Xu, C.L. and Doell, R.F. and Corso, J.J.",
        TITLE = "A Thousand Frames in Just a Few Words: Lingual Description of Videos
through Latent Topics and Sparse Object Stitching",
        BOOKTITLE = CVPR13,
        YEAR = "2013",
        PAGES = "2634-2641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236912"}

@inproceedings{bb242009,
        AUTHOR = "Dale, K. and Shechtman, E. and Avidan, S. and Pfister, H.",
        TITLE = "Multi-video browsing and summarization",
        BOOKTITLE = LSVSM12,
        YEAR = "2012",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236913"}

@inproceedings{bb242010,
        AUTHOR = "de Leo, C. and Manjunath, B.S.",
        TITLE = "Multicamera Video Summarization from Optimal Reconstruction",
        BOOKTITLE = VS10,
        YEAR = "2010",
        PAGES = "94-103",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236914"}

@inproceedings{bb242011,
        AUTHOR = "Bagga, A. and Hu, J.Y. and Zhong, J.L. and Ramesh, G.",
        TITLE = "Multi-source combined-media video tracking for summarization",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "II: 818-821",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825multidsum6.html#TT236915"}

@article{bb242012,
        AUTHOR = "Chen, F. and de Vleeschouwer, C. and Cavallaro, A.",
        TITLE = "Resource Allocation for Personalized Video Summarization",
        JOURNAL = MultMed,
        VOLUME = "16",
        YEAR = "2014",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "455-469",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236916"}

@article{bb242013,
        AUTHOR = "Zhang, L. and Jing, P. and Su, Y. and Zhang, C. and Shaoz, L.",
        TITLE = "SnapVideo: Personalized Video Generation for a Sightseeing Trip",
        JOURNAL = Cyber,
        VOLUME = "47",
        YEAR = "2017",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3866-3878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236917"}

@article{bb242014,
        AUTHOR = "Yin, Y. and Thapliya, R. and Zimmermann, R.",
        TITLE = "Encoded Semantic Tree for Automatic User Profiling Applied to
Personalized Video Summarization",
        JOURNAL = CirSysVideo,
        VOLUME = "28",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "181-192",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236918"}

@article{bb242015,
        AUTHOR = "Qayyum, H. and Majid, M. and ul Haq, E. and Anwar, S.M.",
        TITLE = "Generation of personalized video summaries by detecting viewer's
emotion using electroencephalography",
        JOURNAL = JVCIR,
        VOLUME = "65",
        YEAR = "2019",
        PAGES = "102672",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236919"}

@article{bb242016,
        AUTHOR = "Narwal, P. and Duhan, N. and Kumar Bhatia, K.",
        TITLE = "A comprehensive survey and mathematical insights towards video
summarization",
        JOURNAL = JVCIR,
        VOLUME = "89",
        YEAR = "2022",
        PAGES = "103670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236920"}

@article{bb242017,
        AUTHOR = "Akhare, R. and Shinde, S.K.",
        TITLE = "Personalised video summarisation using video-text multi-modal fusion",
        JOURNAL = IJCVR,
        VOLUME = "15",
        YEAR = "2025",
        NUMBER = "3",
        PAGES = "379-394",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236921"}

@article{bb242018,
        AUTHOR = "Chen, J.W. and Wang, J. and Wang, X.C. and Wang, X. and Feng, Z. and Liu, R. and Song, M.L.",
        TITLE = "CoEvo-Net: Coevolution Network for Video Highlight Detection",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3788-3797",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236922"}

@article{bb242019,
        AUTHOR = "Ma, C.X. and Lyu, L. and Lu, G.L. and Lyu, C.",
        TITLE = "Adaptive Multiview Graph Difference Analysis for Video Summarization",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8795-8808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236923"}

@article{bb242020,
        AUTHOR = "Zhu, Y. and Zhao, W.T. and Hua, R. and Wu, X.X.",
        TITLE = "Topic-aware video summarization using multimodal transformer",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236924"}

@inproceedings{bb242021,
        AUTHOR = "Udhayanan, P. and Bv, S. and Laturia, P. and Chauhan, D. and Khandelwal, D. and Petrangeli, S. and Srinivasan, B.V.",
        TITLE = "Recipe2Video: Synthesizing Personalized Videos from Recipe Texts",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2267-2276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236925"}

@inproceedings{bb242022,
        AUTHOR = "Saquil, Y. and Chen, D. and He, Y. and Li, C. and Yang, Y.L.",
        TITLE = "Multiple Pairwise Ranking Networks for Personalized Video
Summarization",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1698-1707",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236926"}

@inproceedings{bb242023,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E.R. and Neves, A.C. and Neves, A.C.",
        TITLE = "Personalizing Fast-Forward Videos Based on Visual and Textual
Features from Social Network",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "3260-3269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236927"}

@inproceedings{bb242024,
        AUTHOR = "Teraguchi, M. and Masumitsu, K. and Echigo, T. and Sekiguchi, S. and Etoh, M.",
        TITLE = "Rapid generation of event-based indexes for personalized video digests",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "II: 1041-1044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236928"}

@inproceedings{bb242025,
        AUTHOR = "Oh, T.H. and Joo, K. and Joshi, N. and Wang, B.Y. and Kweon, I.S. and Kang, S.B.",
        TITLE = "Personalized Cinemagraphs Using Semantic Understanding and
Collaborative Learning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5170-5179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236929"}

@inproceedings{bb242026,
        AUTHOR = "Shafeian, H. and Bhanu, B.",
        TITLE = "Integrated personalized video summarization and retrieval",
        BOOKTITLE = ICPR12,
        YEAR = "2012",
        PAGES = "996-999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236930"}

@inproceedings{bb242027,
        AUTHOR = "Han, B.H. and Hamm, J. and Sim, J.",
        TITLE = "Personalized video summarization with human in the loop",
        BOOKTITLE = WACV11,
        YEAR = "2011",
        PAGES = "51-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236931"}

@inproceedings{bb242028,
        AUTHOR = "Miyamori, H.",
        TITLE = "Automatic Generation of Personalized Digest Based on Context Flow and
Distinctive Events",
        BOOKTITLE = CIVR04,
        YEAR = "2004",
        PAGES = "179-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236932"}

@inproceedings{bb242029,
        AUTHOR = "Miyamori, H.",
        TITLE = "Automatic Generation of Personalized Video Summary Based on Context
Flow and Distinctive Events",
        BOOKTITLE = VLBV03,
        YEAR = "2003",
        PAGES = "111-121",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236933"}

@inproceedings{bb242030,
        AUTHOR = "Jaimes, A. and Echigo, T. and Teraguchi, M. and Satoh, F.",
        TITLE = "Learning personalized video highlights from detailed MPEG-7 metadata",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "I: 133-136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT236934"}

@article{bb242031,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
        TITLE = "Learning Deep Spatio-Temporal Dependence for Semantic Video
Segmentation",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "939-949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236935"}

@inproceedings{bb242032,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
        TITLE = "Learning Spatio-Temporal Representation with Pseudo-3D Residual
Networks",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5534-5542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236936"}

@inproceedings{bb242033,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Ngo, C.W. and Tian, X.M. and Mei, T.",
        TITLE = "Learning Spatio-Temporal Representation With Local and Global Diffusion",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12048-12057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236937"}

@inproceedings{bb242034,
        AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Qiu, Z. and Mei, T.",
        TITLE = "Boosting Image Captioning with Attributes",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4904-4912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236938"}

@inproceedings{bb242035,
        AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.",
        TITLE = "Video Captioning with Transferred Semantic Attributes",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "984-992",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236939"}

@article{bb242036,
        AUTHOR = "Zhao, B. and Li, X. and Lu, X.",
        TITLE = "CAM-RNN: Co-Attention Model Based RNN for Video Captioning",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5552-5565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236940"}

@article{bb242037,
        AUTHOR = "Yan, C. and Tu, Y. and Wang, X. and Zhang, Y. and Hao, X. and Zhang, Y. and Dai, Q.",
        TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "229-241",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236941"}

@article{bb242038,
        AUTHOR = "Dai, Q. and Zhang, Y. and Hao, X. and Zhang, Y. and Wang, X. and Tu, Y. and Yan, C.",
        TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "830-830",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236941"}

@article{bb242039,
        AUTHOR = "Aafaq, N. and Mian, A. and Liu, W. and Gilani, S.Z. and Shah, M.",
        TITLE = "Video Description:
A Survey of Methods, Datasets, and Evaluation Metrics",
        JOURNAL = Surveys,
        VOLUME = "52",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "October",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236942"}

@article{bb242040,
        AUTHOR = "Zhang, Z. and Xu, D. and Ouyang, W. and Tan, C.",
        TITLE = "Show, Tell and Summarize: Dense Video Captioning Using Visual Cue
Aided Sentence Summarization",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3130-3139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236943"}

@article{bb242041,
        AUTHOR = "Zhang, W. and Wang, B.R. and Ma, L. and Liu, W.",
        TITLE = "Reconstruct and Represent Video Contents for Captioning via
Reinforcement Learning",
        JOURNAL = PAMI,
        VOLUME = "42",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3088-3101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236944"}

@article{bb242042,
        AUTHOR = "Lee, S. and Kim, I.",
        TITLE = "DVC-Net: A deep neural network model for dense video captioning",
        JOURNAL = IET-CV,
        VOLUME = "15",
        YEAR = "2021",
        NUMBER = "1",
        PAGES = "12-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236945"}

@article{bb242043,
        AUTHOR = "Qi, S.S. and Yang, L.X.",
        TITLE = "Video captioning via a symmetric bidirectional decoder",
        JOURNAL = IET-CV,
        VOLUME = "15",
        YEAR = "2021",
        NUMBER = "4",
        PAGES = "283-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236946"}

@article{bb242044,
        AUTHOR = "Li, L.H. and Zhang, Y.D. and Tang, S. and Xie, L.X. and Li, X.Y. and Tian, Q.",
        TITLE = "Adaptive Spatial Location With Balanced Loss for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "17-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236947"}

@article{bb242045,
        AUTHOR = "Zheng, Y. and Zhang, Y. and Feng, R. and Zhang, T. and Fan, W.G.",
        TITLE = "Stacked Multimodal Attention Network for Context-Aware Video
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "31-42",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236948"}

@article{bb242046,
        AUTHOR = "Li, L. and Gao, X.Y. and Deng, J.C. and Tu, Y.B. and Zha, Z.J. and Huang, Q.M.",
        TITLE = "Long Short-Term Relation Transformer With Global Gating for Video
Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "2726-2738",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236949"}

@article{bb242047,
        AUTHOR = "Munusamy, H. and Sekhar, C.C.",
        TITLE = "Video captioning using Semantically Contextual Generative Adversarial
Network",
        JOURNAL = CVIU,
        VOLUME = "221",
        YEAR = "2022",
        PAGES = "103453",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236950"}

@article{bb242048,
        AUTHOR = "Wang, H. and Lin, G.S. and Hoi, S.C.H. and Miao, C.Y.",
        TITLE = "Cross-Modal Graph With Meta Concepts for Video Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5150-5162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236951"}

@article{bb242049,
        AUTHOR = "Xiao, H. and Shi, J.L.",
        TITLE = "Diverse video captioning through latent variable expansion",
        JOURNAL = PRL,
        VOLUME = "160",
        YEAR = "2022",
        PAGES = "19-25",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236952"}

@article{bb242050,
        AUTHOR = "Prudviraj, J. and Reddy, M.I. and Vishnu, C. and Mohan, C.K.",
        TITLE = "AAP-MIT: Attentive Atrous Pyramid Network and Memory Incorporated
Transformer for Multisentence Video Description",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5559-5569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236953"}

@article{bb242051,
        AUTHOR = "Xu, W. and Miao, Z.J. and Yu, J. and Tian, Y. and Wan, L. and Ji, Q.",
        TITLE = "Bridging Video and Text:
A Two-Step Polishing Transformer for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "6293-6307",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236954"}

@article{bb242052,
        AUTHOR = "Wu, B.F. and Niu, G.C. and Yu, J. and Xiao, X.Y. and Zhang, J. and Wu, H.",
        TITLE = "Towards Knowledge-Aware Video Captioning via Transitive Visual
Relationship Detection",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6753-6765",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236955"}

@article{bb242053,
        AUTHOR = "Yan, L.Q. and Ma, S.Q. and Wang, Q.F. and Chen, Y.J. and Zhang, X.Y. and Savakis, A. and Liu, D.F.",
        TITLE = "Video Captioning Using Global-Local Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6642-6656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236956"}

@article{bb242054,
        AUTHOR = "Subramaniam, A. and Vaidya, J. and Ameen, M.A.M. and Nambiar, A. and Mittal, A.",
        TITLE = "Co-segmentation inspired attention module for video-based computer
vision tasks",
        JOURNAL = CVIU,
        VOLUME = "223",
        YEAR = "2022",
        PAGES = "103532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236957"}

@article{bb242055,
        AUTHOR = "Liu, F.L. and Wu, X. and You, C.Y. and Ge, S. and Zou, Y.X. and Sun, X.",
        TITLE = "Aligning Source Visual and Target Language Domains for Unpaired Video
Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9255-9268",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236958"}

@article{bb242056,
        AUTHOR = "Yuan, Y.T. and Ma, L. and Zhu, W.W.",
        TITLE = "Syntax Customized Video Captioning by Imitating Exemplar Sentences",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10209-10221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236959"}

@article{bb242057,
        AUTHOR = "Chen, H.R. and Li, J.M. and Frintrop, S. and Hu, X.L.",
        TITLE = "The MSR-Video to Text dataset with clean annotations",
        JOURNAL = CVIU,
        VOLUME = "225",
        YEAR = "2022",
        PAGES = "103581",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236960"}

@article{bb242058,
        AUTHOR = "Moctezuma, D. and Ramirez delReal, T. and Ruiz, G. and Gonzalez Chavez, O.",
        TITLE = "Video captioning: A comparative review of where we are and which
could be the route",
        JOURNAL = CVIU,
        VOLUME = "231",
        YEAR = "2023",
        PAGES = "103671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236961"}

@article{bb242059,
        AUTHOR = "Aafaq, N. and Mian, A. and Akhtar, N. and Liu, W. and Shah, M.",
        TITLE = "Dense Video Captioning With Early Linguistic Information Fusion",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2309-2322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236962"}

@inproceedings{bb242060,
        AUTHOR = "Wang, J.W. and Jiang, W.H. and Ma, L. and Liu, W. and Xu, Y.",
        TITLE = "Bidirectional Attentive Fusion with Context Gating for Dense Video
Captioning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7190-7198",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236963"}

@article{bb242061,
        AUTHOR = "He, M.G. and Du, W.J. and Wen, Z.Q. and Du, Q. and Xie, Y.T. and Wu, Q.",
        TITLE = "Multi-Granularity Aggregation Transformer for Joint Video-Audio-Text
Representation Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2990-3002",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236964"}

@article{bb242062,
        AUTHOR = "Qian, Y. and Mao, Y.C. and Chen, Z.H. and Li, C. and Bloh, O.T. and Huang, Q.",
        TITLE = "Dense video captioning based on local attention",
        JOURNAL = IET-IPR,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "9",
        PAGES = "2673-2685",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236965"}

@article{bb242063,
        AUTHOR = "Tang, M.K. and Wang, Z.Y. and Zeng, Z.Y. and Li, X. and Zhou, L.P.",
        TITLE = "Stay in Grid: Improving Video Captioning via Fully Grid-Level
Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3319-3332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236966"}

@article{bb242064,
        AUTHOR = "Velda, V. and Immanuel, S.A. and Hendria, W.F. and Jeong, C.",
        TITLE = "Improving distinctiveness in video captioning with text-video
similarity",
        JOURNAL = IVC,
        VOLUME = "136",
        YEAR = "2023",
        PAGES = "104728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236967"}

@article{bb242065,
        AUTHOR = "Zhu, J.K. and Zeng, P.P. and Gao, L.L. and Li, G.F. and Liao, D.L. and Song, J.K.",
        TITLE = "Complementarity-Aware Space Learning for Video-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "4362-4374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236968"}

@article{bb242066,
        AUTHOR = "Wang, H. and Zhang, L. and Fan, H. and Luo, T.J.",
        TITLE = "Collaborative three-stream transformers for video captioning",
        JOURNAL = CVIU,
        VOLUME = "235",
        YEAR = "2023",
        PAGES = "103799",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236969"}

@inproceedings{bb242067,
        AUTHOR = "Gu, X. and Chen, G. and Wang, Y.F. and Zhang, L. and Luo, T.J. and Wen, L.Y.",
        TITLE = "Text with Knowledge Graph Augmented Transformer for Video Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18941-18951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236970"}

@article{bb242068,
        AUTHOR = "Xu, T. and Cui, Y.Y. and He, X.Y. and Liu, C.H.",
        TITLE = "A latent topic-aware network for dense video captioning",
        JOURNAL = IET-CV,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "7",
        PAGES = "795-803",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236971"}

@inproceedings{bb242069,
        AUTHOR = "Lu, M. and Li, X.Y. and Liu, C.H.",
        TITLE = "Context Visual Information-based Deliberation Network for Video
Captioning",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "9812-9818",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236972"}

@article{bb242070,
        AUTHOR = "Wu, B. and Liu, B. and Huang, P. and Bao, J. and Xi, P. and Yu, J.",
        TITLE = "Concept Parser With Multimodal Graph Learning for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4484-4495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236973"}

@article{bb242071,
        AUTHOR = "Liu, S. and Li, A. and Wang, J.H. and Wang, Y.H.",
        TITLE = "Bidirectional Maximum Entropy Training With Word Co-Occurrence for
Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "4494-4507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236974"}

@article{bb242072,
        AUTHOR = "Yang, B. and Cao, M. and Zou, Y.X.",
        TITLE = "Concept-Aware Video Captioning:
Describing Videos With Effective Prior Information",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5366-5378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236975"}

@article{bb242073,
        AUTHOR = "Luo, X.M. and Luo, X.T. and Wang, D. and Liu, J.H. and Wan, B. and Zhao, L.",
        TITLE = "Global semantic enhancement network for video captioning",
        JOURNAL = PR,
        VOLUME = "145",
        YEAR = "2024",
        PAGES = "109906",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236976"}

@article{bb242074,
        AUTHOR = "Liu, Z. and Wang, T. and Zhang, J. and Zheng, F. and Jiang, W.H. and Lu, K.",
        TITLE = "Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage
Progressive Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7894-7905",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236977"}

@article{bb242075,
        AUTHOR = "Rao, Q. and Yu, X. and Li, G. and Zhu, L.C.",
        TITLE = "CMGNet: Collaborative multi-modal graph network for video captioning",
        JOURNAL = CVIU,
        VOLUME = "238",
        YEAR = "2024",
        PAGES = "103864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236978"}

@article{bb242076,
        AUTHOR = "Li, G.R. and Ye, H.H. and Qi, Y.K. and Wang, S.H. and Qing, L.Y. and Huang, Q.M. and Yang, M.H.",
        TITLE = "Learning Hierarchical Modular Networks for Video Captioning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1049-1064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236979"}

@inproceedings{bb242077,
        AUTHOR = "Ye, H.H. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Huang, Q.M. and Yang, M.H.",
        TITLE = "Hierarchical Modular Network for Video Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17918-17927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236980"}

@article{bb242078,
        AUTHOR = "Xie, Y.L. and Niu, J.J. and Zhang, Y. and Ren, F.",
        TITLE = "Global-Shared Text Representation Based Multi-Stage Fusion
Transformer Network for Multi-Modal Dense Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3164-3179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236981"}

@article{bb242079,
        AUTHOR = "Jing, S. and Zhang, H. and Zeng, P.P. and Gao, L.L. and Song, J.K. and Shen, H.T.",
        TITLE = "Memory-Based Augmentation Network for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2367-2379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236982"}

@article{bb242080,
        AUTHOR = "Liang, Y.Z. and Zhu, L.C. and Wang, X.H. and Yang, Y.",
        TITLE = "IcoCap: Improving Video Captioning by Compounding Images",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4389-4400",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236983"}

@article{bb242081,
        AUTHOR = "Wang, Z.H. and Li, L. and Xie, Z. and Liu, C.B.",
        TITLE = "Video Frame-wise Explanation Driven Contrastive Learning for
Procedural Text Generation",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103954",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236984"}

@article{bb242082,
        AUTHOR = "Chen, Y.X. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Wang, J. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.",
        TITLE = "DARTScore: DuAl-Reconstruction Transformer for Video Captioning
Evaluation",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2041-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236985"}

@article{bb242083,
        AUTHOR = "Liu, C.S. and Zhang, X. and Chang, F. and Li, S. and Hao, P.H. and Lu, Y. and Wang, Y.H.",
        TITLE = "Traffic Scenario Understanding and Video Captioning via Guidance
Attention Captioning Network",
        JOURNAL = ITS,
        VOLUME = "25",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3615-3627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236986"}

@article{bb242084,
        AUTHOR = "Zhang, Y.J. and Xu, T.Y. and Song, X.N. and Zhu, X.F. and Feng, Z.H. and Wu, X.J.",
        TITLE = "Towards accurate unsupervised video captioning with implicit visual
feature injection and explicit",
        JOURNAL = PRL,
        VOLUME = "183",
        YEAR = "2024",
        PAGES = "133-139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236987"}

@article{bb242085,
        AUTHOR = "Im, S.K. and Chan, K.H.",
        TITLE = "Local feature-based video captioning with multiple classifier and
CARU-attention",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "9",
        PAGES = "2304-2317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236988"}

@article{bb242086,
        AUTHOR = "Putra, B.H.H. and Jeong, C.",
        TITLE = "Video captioning based on dual learning via multiple reconstruction
blocks",
        JOURNAL = IVC,
        VOLUME = "148",
        YEAR = "2024",
        PAGES = "105119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236989"}

@article{bb242087,
        AUTHOR = "Chou, S.H. and Little, J.J. and Sigal, L.",
        TITLE = "Implicit and explicit commonsense for multi-sentence video captioning",
        JOURNAL = CVIU,
        VOLUME = "247",
        YEAR = "2024",
        PAGES = "104064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236990"}

@article{bb242088,
        AUTHOR = "Tian, M. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Sheng, Q.Z. and Huang, Q.M.",
        TITLE = "Rethink video retrieval representation for video captioning",
        JOURNAL = PR,
        VOLUME = "156",
        YEAR = "2024",
        PAGES = "110744",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236991"}

@article{bb242089,
        AUTHOR = "Liu, S. and Li, A. and Zhao, Y.W. and Wang, J.H. and Wang, Y.H.",
        TITLE = "EvCap: Element-Aware Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9718-9731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236992"}

@article{bb242090,
        AUTHOR = "Lou, Y. and Zhang, W.J. and Song, X.N. and Hua, Y. and Wu, X.J.",
        TITLE = "EDS: Exploring deeper into semantics for video captioning",
        JOURNAL = PRL,
        VOLUME = "186",
        YEAR = "2024",
        PAGES = "133-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236993"}

@article{bb242091,
        AUTHOR = "Yuan, F.N. and Gu, S. and Zhang, X.F. and Fang, Z.J.",
        TITLE = "Fully exploring object relation interaction and hidden state
attention for video captioning",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236994"}

@article{bb242092,
        AUTHOR = "Che, N. and Liu, J. and Yu, F. and Cheng, L. and Wang, Y.X. and Li, Y.H. and Liu, C.R.",
        TITLE = "Multimodality-guided Visual-Caption Semantic Enhancement",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236995"}

@article{bb242093,
        AUTHOR = "Liu, Y.Y. and Zhu, H. and Wu, Z. and Du, S. and Wu, S. and Shi, J.",
        TITLE = "Adaptive semantic guidance network for video captioning",
        JOURNAL = CVIU,
        VOLUME = "251",
        YEAR = "2025",
        PAGES = "104255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236996"}

@article{bb242094,
        AUTHOR = "Jin, P. and Li, H. and Yuan, L. and Yan, S.C. and Chen, J.",
        TITLE = "Hierarchical Banzhaf Interaction for General Video-Language
Representation Learning",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2125-2139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236997"}

@article{bb242095,
        AUTHOR = "Qasim, I. and Horsch, A. and Prasad, D.",
        TITLE = "Dense Video Captioning: A Survey of Techniques, Datasets and
Evaluation Protocols",
        JOURNAL = Surveys,
        VOLUME = "57",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236998"}

@article{bb242096,
        AUTHOR = "Estevam, V. and Laroca, R. and Pedrini, H. and Menotti, D.",
        TITLE = "Dense video captioning using unsupervised semantic information",
        JOURNAL = JVCIR,
        VOLUME = "107",
        YEAR = "2025",
        PAGES = "104385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT236999"}

@article{bb242097,
        AUTHOR = "Verma, D. and Dutta, T.",
        TITLE = "Syntactically and semantically enhanced captioning network via hybrid
attention and POS tagging prompt",
        JOURNAL = CVIU,
        VOLUME = "255",
        YEAR = "2025",
        PAGES = "104340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT237000"}

@article{bb242098,
        AUTHOR = "Han, T.T. and Xu, Y.C. and Yu, J. and Yu, Z. and Zhao, S.C.",
        TITLE = "Action-Driven Semantic Representation and Aggregation for Video
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "3383-3395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT237001"}

@article{bb242099,
        AUTHOR = "Jiang, W.H. and Liu, L. and Fang, Y.M. and Cheng, Y. and Peng, Y.X. and Liu, Y.",
        TITLE = "Learning Comprehensive Visual Grounding for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "3355-3367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT237002"}

Last update:Oct 6, 2025 at 14:07:43