@article{bb243600,
AUTHOR = "Wang, H. and Zhang, L. and Fan, H. and Luo, T.J.",
TITLE = "Collaborative three-stream transformers for video captioning",
JOURNAL = CVIU,
VOLUME = "235",
YEAR = "2023",
PAGES = "103799",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238494"}
@inproceedings{bb243601,
AUTHOR = "Gu, X. and Chen, G. and Wang, Y.F. and Zhang, L. and Luo, T.J. and Wen, L.Y.",
TITLE = "Text with Knowledge Graph Augmented Transformer for Video Captioning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "18941-18951",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238495"}
@article{bb243602,
AUTHOR = "Xu, T. and Cui, Y.Y. and He, X.Y. and Liu, C.H.",
TITLE = "A latent topic-aware network for dense video captioning",
JOURNAL = IET-CV,
VOLUME = "17",
YEAR = "2023",
NUMBER = "7",
PAGES = "795-803",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238496"}
@inproceedings{bb243603,
AUTHOR = "Lu, M. and Li, X.Y. and Liu, C.H.",
TITLE = "Context Visual Information-based Deliberation Network for Video
Captioning",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "9812-9818",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238497"}
@article{bb243604,
AUTHOR = "Wu, B. and Liu, B. and Huang, P. and Bao, J. and Xi, P. and Yu, J.",
TITLE = "Concept Parser With Multimodal Graph Learning for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "9",
MONTH = "September",
PAGES = "4484-4495",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238498"}
@article{bb243605,
AUTHOR = "Liu, S. and Li, A. and Wang, J.H. and Wang, Y.H.",
TITLE = "Bidirectional Maximum Entropy Training With Word Co-Occurrence for
Video Captioning",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "4494-4507",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238499"}
@article{bb243606,
AUTHOR = "Yang, B. and Cao, M. and Zou, Y.X.",
TITLE = "Concept-Aware Video Captioning:
Describing Videos With Effective Prior Information",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "5366-5378",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238500"}
@article{bb243607,
AUTHOR = "Luo, X.M. and Luo, X.T. and Wang, D. and Liu, J.H. and Wan, B. and Zhao, L.",
TITLE = "Global semantic enhancement network for video captioning",
JOURNAL = PR,
VOLUME = "145",
YEAR = "2024",
PAGES = "109906",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238501"}
@article{bb243608,
AUTHOR = "Liu, Z. and Wang, T. and Zhang, J. and Zheng, F. and Jiang, W.H. and Lu, K.",
TITLE = "Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage
Progressive Training",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "7894-7905",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238502"}
@article{bb243609,
AUTHOR = "Rao, Q. and Yu, X. and Li, G. and Zhu, L.C.",
TITLE = "CMGNet: Collaborative multi-modal graph network for video captioning",
JOURNAL = CVIU,
VOLUME = "238",
YEAR = "2024",
PAGES = "103864",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238503"}
@article{bb243610,
AUTHOR = "Li, G.R. and Ye, H.H. and Qi, Y.K. and Wang, S.H. and Qing, L.Y. and Huang, Q.M. and Yang, M.H.",
TITLE = "Learning Hierarchical Modular Networks for Video Captioning",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "1049-1064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238504"}
@inproceedings{bb243611,
AUTHOR = "Ye, H.H. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Huang, Q.M. and Yang, M.H.",
TITLE = "Hierarchical Modular Network for Video Captioning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "17918-17927",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238505"}
@article{bb243612,
AUTHOR = "Xie, Y.L. and Niu, J.J. and Zhang, Y. and Ren, F.",
TITLE = "Global-Shared Text Representation Based Multi-Stage Fusion
Transformer Network for Multi-Modal Dense Video Captioning",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3164-3179",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238506"}
@article{bb243613,
AUTHOR = "Jing, S. and Zhang, H. and Zeng, P.P. and Gao, L.L. and Song, J.K. and Shen, H.T.",
TITLE = "Memory-Based Augmentation Network for Video Captioning",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "2367-2379",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238507"}
@article{bb243614,
AUTHOR = "Liang, Y.Z. and Zhu, L.C. and Wang, X.H. and Yang, Y.",
TITLE = "IcoCap: Improving Video Captioning by Compounding Images",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4389-4400",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238508"}
@article{bb243615,
AUTHOR = "Wang, Z.H. and Li, L. and Xie, Z.W. and Liu, C.B.",
TITLE = "Video Frame-wise Explanation Driven Contrastive Learning for
Procedural Text Generation",
JOURNAL = CVIU,
VOLUME = "241",
YEAR = "2024",
PAGES = "103954",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238509"}
@article{bb243616,
AUTHOR = "Chen, Y.X. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Wang, J. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.",
TITLE = "DARTScore: DuAl-Reconstruction Transformer for Video Captioning
Evaluation",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "4",
MONTH = "April",
PAGES = "2041-2055",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238510"}
@article{bb243617,
AUTHOR = "Liu, C.S. and Zhang, X. and Chang, F. and Li, S. and Hao, P.H. and Lu, Y. and Wang, Y.H.",
TITLE = "Traffic Scenario Understanding and Video Captioning via Guidance
Attention Captioning Network",
JOURNAL = ITS,
VOLUME = "25",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "3615-3627",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238511"}
@article{bb243618,
AUTHOR = "Zhang, Y.J. and Xu, T.Y. and Song, X.N. and Zhu, X.F. and Feng, Z.H. and Wu, X.J.",
TITLE = "Towards accurate unsupervised video captioning with implicit visual
feature injection and explicit",
JOURNAL = PRL,
VOLUME = "183",
YEAR = "2024",
PAGES = "133-139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238512"}
@article{bb243619,
AUTHOR = "Im, S.K. and Chan, K.H.",
TITLE = "Local feature-based video captioning with multiple classifier and
CARU-attention",
JOURNAL = IET-IPR,
VOLUME = "18",
YEAR = "2024",
NUMBER = "9",
PAGES = "2304-2317",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238513"}
@article{bb243620,
AUTHOR = "Putra, B.H.H. and Jeong, C.",
TITLE = "Video captioning based on dual learning via multiple reconstruction
blocks",
JOURNAL = IVC,
VOLUME = "148",
YEAR = "2024",
PAGES = "105119",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238514"}
@article{bb243621,
AUTHOR = "Chou, S.H. and Little, J.J. and Sigal, L.",
TITLE = "Implicit and explicit commonsense for multi-sentence video captioning",
JOURNAL = CVIU,
VOLUME = "247",
YEAR = "2024",
PAGES = "104064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238515"}
@article{bb243622,
AUTHOR = "Tian, M. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Sheng, Q.Z. and Huang, Q.M.",
TITLE = "Rethink video retrieval representation for video captioning",
JOURNAL = PR,
VOLUME = "156",
YEAR = "2024",
PAGES = "110744",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238516"}
@article{bb243623,
AUTHOR = "Liu, S. and Li, A. and Zhao, Y.W. and Wang, J.H. and Wang, Y.H.",
TITLE = "EvCap: Element-Aware Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "9718-9731",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238517"}
@article{bb243624,
AUTHOR = "Lou, Y. and Zhang, W.J. and Song, X.N. and Hua, Y. and Wu, X.J.",
TITLE = "EDS: Exploring deeper into semantics for video captioning",
JOURNAL = PRL,
VOLUME = "186",
YEAR = "2024",
PAGES = "133-140",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238518"}
@article{bb243625,
AUTHOR = "Yuan, F.N. and Gu, S. and Zhang, X.F. and Fang, Z.J.",
TITLE = "Fully exploring object relation interaction and hidden state
attention for video captioning",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111138",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238519"}
@article{bb243626,
AUTHOR = "Che, N. and Liu, J. and Yu, F. and Cheng, L. and Wang, Y.X. and Li, Y.H. and Liu, C.R.",
TITLE = "Multimodality-guided Visual-Caption Semantic Enhancement",
JOURNAL = CVIU,
VOLUME = "249",
YEAR = "2024",
PAGES = "104139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238520"}
@article{bb243627,
AUTHOR = "Liu, Y.Y. and Zhu, H. and Wu, Z. and Du, S. and Wu, S. and Shi, J.",
TITLE = "Adaptive semantic guidance network for video captioning",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104255",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238521"}
@article{bb243628,
AUTHOR = "Jin, P. and Li, H. and Yuan, L. and Yan, S.C. and Chen, J.",
TITLE = "Hierarchical Banzhaf Interaction for General Video-Language
Representation Learning",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "2125-2139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238522"}
@article{bb243629,
AUTHOR = "Qasim, I. and Horsch, A. and Prasad, D.",
TITLE = "Dense Video Captioning: A Survey of Techniques, Datasets and
Evaluation Protocols",
JOURNAL = Surveys,
VOLUME = "57",
YEAR = "2025",
NUMBER = "6",
MONTH = "February",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238523"}
@article{bb243630,
AUTHOR = "Estevam, V. and Laroca, R. and Pedrini, H. and Menotti, D.",
TITLE = "Dense video captioning using unsupervised semantic information",
JOURNAL = JVCIR,
VOLUME = "107",
YEAR = "2025",
PAGES = "104385",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238524"}
@article{bb243631,
AUTHOR = "Verma, D. and Dutta, T.",
TITLE = "Syntactically and semantically enhanced captioning network via hybrid
attention and POS tagging prompt",
JOURNAL = CVIU,
VOLUME = "255",
YEAR = "2025",
PAGES = "104340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238525"}
@article{bb243632,
AUTHOR = "Han, T.T. and Xu, Y.C. and Yu, J. and Yu, Z. and Zhao, S.C.",
TITLE = "Action-Driven Semantic Representation and Aggregation for Video
Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "3383-3395",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238526"}
@article{bb243633,
AUTHOR = "Jiang, W.H. and Liu, L. and Fang, Y.M. and Cheng, Y. and Peng, Y.X. and Liu, Y.",
TITLE = "Learning Comprehensive Visual Grounding for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "3355-3367",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238527"}
@article{bb243634,
AUTHOR = "Ren, X. and Han, Y.F. and Wei, B. and Tang, X.S. and Hao, K.R.",
TITLE = "From visual features to key concepts: A Dynamic and Static
Concept-driven approach for video captioning",
JOURNAL = PRL,
VOLUME = "193",
YEAR = "2025",
PAGES = "64-70",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238528"}
@article{bb243635,
AUTHOR = "Wang, Y. and Liu, Y.Y. and Zhou, S.P. and Huang, Y.X. and Tang, C. and Zhou, W. and Chen, Z.",
TITLE = "Emotion-Oriented Cross-Modal Prompting and Alignment for
Human-Centric Emotional Video Captioning",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "3766-3780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238529"}
@article{bb243636,
AUTHOR = "Luo, H.L. and Cai, X. and Shark, L.K.",
TITLE = "Frame-by-Frame Multi-Object Tracking-Guided Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "6357-6370",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238530"}
@article{bb243637,
AUTHOR = "Choi, W. and Chen, J. and Yoon, J.W.",
TITLE = "ADVC: Adversarial dense video captioning with unsupervised
pretraining",
JOURNAL = IVC,
VOLUME = "161",
YEAR = "2025",
PAGES = "105595",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238531"}
@article{bb243638,
AUTHOR = "Ma, Y.C. and Qing, L.Y. and Li, G.R. and Qi, Y.K. and Beheshti, A. and Sheng, Q.Z. and Huang, Q.M.",
TITLE = "RETTA: Retrieval-enhanced test-time adaptation for zero-shot video
captioning",
JOURNAL = PR,
VOLUME = "171",
YEAR = "2026",
PAGES = "112170",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238532"}
@article{bb243639,
AUTHOR = "Verma, D. and Dutta, T.",
TITLE = "Seeing the Rare: Meta-Aware Pointer Networks for Long-Tailed Video
Captioning",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "4269-4273",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238533"}
@inproceedings{bb243640,
AUTHOR = "Wu, K. and Li, P. and Fu, J.W. and Li, Y.Z. and Wu, Y. and Liu, Y.H. and Wang, J.J. and Zhou, S.P.",
TITLE = "Event-Equalized Dense Video Captioning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8417-8427",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238534"}
@inproceedings{bb243641,
AUTHOR = "Xue, Z.H. and An, J.B. and Yang, X.T. and Grauman, K.",
TITLE = "Progress-Aware Video Frame Captioning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13639-13650",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238535"}
@inproceedings{bb243642,
AUTHOR = "Lee, S.H. and Wang, J. and Fan, D. and Zhang, Z.K. and Liu, L. and Hao, X. and Bhat, V. and Li, X.Y.",
TITLE = "Now you see Me: Context-Aware Automatic Audio Description",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5530-5539",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238536"}
@inproceedings{bb243643,
AUTHOR = "Kainulainen, J. and Guo, Z.X. and Laaksonen, J.",
TITLE = "Diffusion-based Multimodal Video Captioning",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 148-165",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238537"}
@inproceedings{bb243644,
AUTHOR = "Perrett, T. and Han, T. and Damen, D. and Zisserman, A.",
TITLE = "It's Just Another Day:
Unique Video Captioning by Discriminitive Prompting",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 275-293",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238538"}
@inproceedings{bb243645,
AUTHOR = "Kim, B. and Hwang, D. and Cho, S.J. and Jang, Y.S. and Lee, H.L. and Lee, M.",
TITLE = "Show, Think, and Tell: Thought-Augmented Fine-Tuning of Large
Language Models for Video Captioning",
BOOKTITLE = WhatNext24,
YEAR = "2024",
PAGES = "1808-1817",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238539"}
@inproceedings{bb243646,
AUTHOR = "Shen, Y.H. and Yang, L.J. and Wen, L.Y. and Yu, H.C. and Elhamifar, E. and Wang, H.",
TITLE = "Exploring the Role of Audio in Video Captioning",
BOOKTITLE = MULA24,
YEAR = "2024",
PAGES = "2090-2100",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238540"}
@inproceedings{bb243647,
AUTHOR = "Shoman, M. and Wang, D.D. and Aboah, A. and Abdel Aty, M.",
TITLE = "Enhancing Traffic Safety with Parallel Dense Video Captioning for
End-to-End Event Analysis",
BOOKTITLE = AICity24,
YEAR = "2024",
PAGES = "7125-7133",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238541"}
@inproceedings{bb243648,
AUTHOR = "Wu, H. and Liu, H. and Qiao, Y. and Sun, X.",
TITLE = "DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via
Pseudo Boundary Enrichment and Online Refinement",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18699-18708",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238542"}
@inproceedings{bb243649,
AUTHOR = "Zhou, X.Y. and Arnab, A. and Buch, S. and Yan, S. and Myers, A. and Xiong, X. and Nagrani, A. and Schmid, C.",
TITLE = "Streaming Dense Video Captioning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18243-18252",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238543"}
@inproceedings{bb243650,
AUTHOR = "Kim, M. and Kim, H.B. and Moon, J. and Choi, J. and Kim, S.T.",
TITLE = "Do You Remember? Dense Video Captioning with Cross-Modal Memory
Retrieval",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13894-13904",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238544"}
@inproceedings{bb243651,
AUTHOR = "Xu, J. and Huang, Y.F. and Hou, J.L. and Chen, G. and Zhang, Y. and Feng, R. and Xie, W.",
TITLE = "Retrieval-Augmented Egocentric Video Captioning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13525-13536",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238545"}
@inproceedings{bb243652,
AUTHOR = "Malakan, Z.M. and Hassan, G.M. and Mian, A.",
TITLE = "Sequential Image Storytelling Model Based on Transformer Attention
Pooling",
BOOKTITLE = IVCNZ23,
YEAR = "2023",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238546"}
@inproceedings{bb243653,
AUTHOR = "Sakaino, H.",
TITLE = "Unseen and Adverse Outdoor Scenes Recognition Through Event-based
Captions",
BOOKTITLE = VCL23,
YEAR = "2023",
PAGES = "3596-3603",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238547"}
@inproceedings{bb243654,
AUTHOR = "Ma, Z.Y. and Zhang, Z.Q. and Chen, Y.X. and Qi, Z.A. and Luo, Y.M. and Li, Z.K. and Yuan, C.F. and Li, B. and Qie, X. and Shan, Y. and Hu, W.M.",
TITLE = "Order-Prompted Tag Sequence Generation for Video Tagging",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15635-15644",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238548"}
@inproceedings{bb243655,
AUTHOR = "Bulat, A. and Sanchez, E. and Martinez, B. and Tzimiropoulos, G.",
TITLE = "ReGen: A good Generative zero-shot video classifier should be
Rewarded",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "13477-13487",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238549"}
@inproceedings{bb243656,
AUTHOR = "Shen, Y.J. and Gu, X. and Xu, K. and Fan, H. and Wen, L.Y. and Zhang, L.",
TITLE = "Accurate and Fast Compressed Video Captioning",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15512-15521",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238550"}
@inproceedings{bb243657,
AUTHOR = "Lin, W. and Jin, T. and Wang, Y. and Pan, W.W. and Li, L.J. and Cheng, X.Z. and Zhao, Z.",
TITLE = "Exploring Group Video Captioning with Efficient Relational
Approximation",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15235-15244",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238551"}
@inproceedings{bb243658,
AUTHOR = "Damaceno, R.J.P. and Cesar, R.M.",
TITLE = "An End-to-end Deep Learning Approach for Video Captioning Through
Mobile Devices",
BOOKTITLE = CIARP23,
YEAR = "2023",
PAGES = "I:715-729",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238552"}
@inproceedings{bb243659,
AUTHOR = "Munusamy, H. and Sekhar, C.C.",
TITLE = "Multi-Modal Hierarchical Attention-Based Dense Video Captioning",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "475-479",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238553"}
@inproceedings{bb243660,
AUTHOR = "Chen, K.X. and Di, Q.J. and Lu, Y. and Wang, H.Z.",
TITLE = "Semantic Learning Network for Controllable Video Captioning",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "880-884",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238554"}
@inproceedings{bb243661,
AUTHOR = "Nadeem, A. and Hilton, A. and Dawes, R. and Thomas, G. and Mustafa, A.",
TITLE = "SEM-POS: Grammatically and Semantically Correct Video Captioning",
BOOKTITLE = MULA23,
YEAR = "2023",
PAGES = "2606-2616",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238555"}
@inproceedings{bb243662,
AUTHOR = "Ullah, N. and Mohanta, P.P.",
TITLE = "Thinking Hallucination for Video Captioning",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "IV:623-640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238556"}
@inproceedings{bb243663,
AUTHOR = "Seo, P.H. and Nagrani, A. and Arnab, A. and Schmid, C.",
TITLE = "End-to-end Generative Pretraining for Multimodal Video Captioning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "17938-17947",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238557"}
@inproceedings{bb243664,
AUTHOR = "Lin, K. and Li, L.J. and Lin, C.C. and Ahmed, F. and Gan, Z. and Liu, Z.C. and Lu, Y. and Wang, L.J.",
TITLE = "SwinBERT: End-to-End Transformers with Sparse Attention for Video
Captioning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "17928-17937",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238558"}
@inproceedings{bb243665,
AUTHOR = "Shi, Y. and Yang, X. and Xu, H.Y. and Yuan, C.F. and Li, B. and Hu, W.M. and Zha, Z.J.",
TITLE = "EMScore: Evaluating Video Captioning via Coarse-Grained and
Fine-Grained Embedding Matching",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "17908-17917",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238559"}
@inproceedings{bb243666,
AUTHOR = "Chen, S.X. and Jiang, Y.G.",
TITLE = "Motion Guided Region Message Passing for Video Captioning",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1523-1532",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238560"}
@inproceedings{bb243667,
AUTHOR = "Joshi, P. and Saharia, C. and Singh, V. and Gautam, D. and Ramakrishnan, G. and Jyothi, P.",
TITLE = "A Tale of Two Modalities for Video Captioning",
BOOKTITLE = MMVAMTC19,
YEAR = "2019",
PAGES = "3708-3712",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238561"}
@inproceedings{bb243668,
AUTHOR = "Wang, T. and Zhang, R.M. and Lu, Z.C. and Zheng, F. and Cheng, R. and Luo, P.",
TITLE = "End-to-End Dense Video Captioning with Parallel Decoding",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "6827-6837",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238562"}
@inproceedings{bb243669,
AUTHOR = "Yang, B. and Zou, Y.X.",
TITLE = "Visual Oriented Encoder: Integrating Multimodal and Multi-Scale
Contexts for Video Captioning",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "188-195",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238563"}
@inproceedings{bb243670,
AUTHOR = "Perez Martin, J. and Bustos, B. and Perez, J.",
TITLE = "Attentive Visual Semantic Specialized Network for Video Captioning",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "5767-5774",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238564"}
@inproceedings{bb243671,
AUTHOR = "Olivastri, S. and Singh, G. and Cuzzolin, F.",
TITLE = "End-to-End Video Captioning",
BOOKTITLE = HVU19,
YEAR = "2019",
PAGES = "1474-1482",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238565"}
@inproceedings{bb243672,
AUTHOR = "Li, L. and Gong, B.",
TITLE = "End-to-End Video Captioning With Multitask Reinforcement Learning",
BOOKTITLE = WACV19,
YEAR = "2019",
PAGES = "339-348",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238566"}
@inproceedings{bb243673,
AUTHOR = "Wang, B. and Ma, L. and Zhang, W. and Liu, W.",
TITLE = "Reconstruction Network for Video Captioning",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7622-7631",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238567"}
@inproceedings{bb243674,
AUTHOR = "Li, Y. and Yao, T. and Pan, Y. and Chao, H. and Mei, T.",
TITLE = "Jointly Localizing and Describing Events for Dense Video Captioning",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7492-7500",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238568"}
@inproceedings{bb243675,
AUTHOR = "Wu, X. and Li, G. and Cao, Q. and Ji, Q. and Lin, L.",
TITLE = "Interpretable Video Captioning via Trajectory Structured Localization",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "6829-6837",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238569"}
@inproceedings{bb243676,
AUTHOR = "Wang, X. and Chen, W. and Wu, J. and Wang, Y. and Wang, W.Y.",
TITLE = "Video Captioning via Hierarchical Reinforcement Learning",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4213-4222",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238570"}
@inproceedings{bb243677,
AUTHOR = "Zhou, L. and Zhou, Y. and Corso, J.J. and Socher, R. and Xiong, C.",
TITLE = "End-to-End Dense Video Captioning with Masked Transformer",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "8739-8748",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238571"}
@inproceedings{bb243678,
AUTHOR = "Yang, D. and Yuan, C.",
TITLE = "Hierarchical Context Encoding for Events Captioning in Videos",
BOOKTITLE = ICIP18,
YEAR = "2018",
PAGES = "1288-1292",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238572"}
@inproceedings{bb243679,
AUTHOR = "Shen, Z.Q. and Li, J.G. and Su, Z. and Li, M.J. and Chen, Y.R. and Jiang, Y.G. and Xue, X.Y.",
TITLE = "Weakly Supervised Dense Video Captioning",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "5159-5167",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238573"}
@inproceedings{bb243680,
AUTHOR = "Baraldi, L. and Grana, C. and Cucchiara, R.",
TITLE = "Hierarchical Boundary-Aware Neural Encoder for Video Captioning",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "3185-3194",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238574"}
@inproceedings{bb243681,
AUTHOR = "Pan, P.B. and Xu, Z.W. and Yang, Y. and Wu, F. and Zhuang, Y.T.",
TITLE = "Hierarchical Recurrent Neural Encoder for Video Representation with
Application to Captioning",
BOOKTITLE = CVPR16,
YEAR = "2016",
PAGES = "1029-1038",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238575"}
@inproceedings{bb243682,
AUTHOR = "Yu, H.N. and Wang, J. and Huang, Z.H. and Yang, Y. and Xu, W.",
TITLE = "Video Paragraph Captioning Using Hierarchical Recurrent Neural
Networks",
BOOKTITLE = CVPR16,
YEAR = "2016",
PAGES = "4584-4593",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238576"}
@inproceedings{bb243683,
AUTHOR = "Shin, A. and Ohnishi, K. and Harada, T.",
TITLE = "Beyond caption to narrative: Video captioning with multiple sentences",
BOOKTITLE = ICIP16,
YEAR = "2016",
PAGES = "3364-3368",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238577"}
@inproceedings{bb243684,
AUTHOR = "Valio, F.B. and Pedrini, H. and Leite, N.J.",
TITLE = "Fast Rotation-Invariant Video Caption Detection Based on Visual Rhythm",
BOOKTITLE = CIARP11,
YEAR = "2011",
PAGES = "157-164",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT238578"}
@article{bb243685,
AUTHOR = "Vakali, A. and Hacid, M.S. and Elmagarmid, A.",
TITLE = "MPEG-7 based description schemes for multi-level video content
classification",
JOURNAL = IVC,
VOLUME = "22",
YEAR = "2004",
NUMBER = "5",
MONTH = "May",
PAGES = "367-378",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238579"}
@article{bb243686,
AUTHOR = "Fonseca, P.M. and Pereira, F.",
TITLE = "Automatic video summarization based on MPEG-7 descriptions",
JOURNAL = SP:IC,
VOLUME = "19",
YEAR = "2004",
NUMBER = "8",
MONTH = "September",
PAGES = "685-699",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238580"}
@article{bb243687,
AUTHOR = "Mrak, M. and Calic, J. and Kondoz, A.M.",
TITLE = "Fast analysis of scalable video for adaptive browsing interfaces",
JOURNAL = CVIU,
VOLUME = "113",
YEAR = "2009",
NUMBER = "3",
MONTH = "March",
PAGES = "425-434",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238581"}
@inproceedings{bb243688,
AUTHOR = "Calic, J. and Mrak, M. and Kondoz, A.M.",
TITLE = "Flexible generation of video summaries from layered video bit-streams",
BOOKTITLE = ICIP08,
YEAR = "2008",
PAGES = "2516-2519",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238582"}
@article{bb243689,
AUTHOR = "Herranz, L. and Martinez, J.M.",
TITLE = "An integrated approach to summarization and adaptation using
H.264/MPEG-4 SVC",
JOURNAL = SP:IC,
VOLUME = "24",
YEAR = "2009",
NUMBER = "6",
MONTH = "July",
PAGES = "499-509",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238583"}
@article{bb243690,
AUTHOR = "Herranz, L. and Martinez, J.M.",
TITLE = "On the use of hierarchical prediction structures for efficient summary
generation of H.264/AVC bitstreams",
JOURNAL = SP:IC,
VOLUME = "24",
YEAR = "2009",
NUMBER = "8",
MONTH = "September",
PAGES = "615-629",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238584"}
@article{bb243691,
AUTHOR = "Matos, N. and Pereira, F.",
TITLE = "Automatic creation and evaluation of MPEG-7 compliant summary
descriptions for generic audiovisual content",
JOURNAL = SP:IC,
VOLUME = "23",
YEAR = "2008",
NUMBER = "8",
MONTH = "September",
PAGES = "581-598",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238585"}
@article{bb243692,
AUTHOR = "Ren, J.C. and Jiang, J.M. and Feng, Y.",
TITLE = "Activity-driven content adaptation for effective video summarization",
JOURNAL = JVCIR,
VOLUME = "21",
YEAR = "2010",
NUMBER = "8",
MONTH = "November",
PAGES = "930-938",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238586"}
@article{bb243693,
AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.",
TITLE = "VISON: VIdeo Summarization for ONline applications",
JOURNAL = PRL,
VOLUME = "33",
YEAR = "2012",
NUMBER = "4",
MONTH = "March",
PAGES = "397-409",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238587"}
@inproceedings{bb243694,
AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.",
TITLE = "Comparison of video sequences with histograms of motion patterns",
BOOKTITLE = ICIP11,
YEAR = "2011",
PAGES = "3673-3676",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238588"}
@inproceedings{bb243695,
AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.",
TITLE = "Rapid Cut Detection on Compressed Video",
BOOKTITLE = CIARP11,
YEAR = "2011",
PAGES = "71-78",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238589"}
@article{bb243696,
AUTHOR = "Basavarajaiah, M. and Sharma, P.",
TITLE = "Survey of Compressed Domain Video Summarization Techniques",
JOURNAL = Surveys,
VOLUME = "52",
YEAR = "2019",
NUMBER = "6",
MONTH = "October",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238590"}
@inproceedings{bb243697,
AUTHOR = "Lee, J.H. and Kim, W.Y.",
TITLE = "Video Summarization and Retrieval System Using Face Recognition and
MPEG-7 Descriptors",
BOOKTITLE = CIVR04,
YEAR = "2004",
PAGES = "170-178",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238591"}
@inproceedings{bb243698,
AUTHOR = "Soysal, M. and Alatan, A.A.",
TITLE = "Combining MPEG-7 Based Visual Experts for Reaching Semantics",
BOOKTITLE = VLBV03,
YEAR = "2003",
PAGES = "66-75",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238592"}
@inproceedings{bb243699,
AUTHOR = "Divakaran, A. and Peker, K.A. and Radhakrishnan, R. and Xiong, Z.Y. and Cabasson, R.",
TITLE = "Video Summarization using MPEG-7 Motion Activity and Audio Descriptors",
BOOKTITLE = VideoMining03,
YEAR = "2003",
PAGES = "Chapter 4",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vm3.html#TT238593"}
Last update:Nov 26, 2025 at 20:24:09