@inproceedings{bb242200,
        AUTHOR = "Rudinac, S. and Chua, T.S. and Diaz Ferreyra, N. and Friedland, G. and Gornostaja, T. and Huet, B. and Kaptein, R. and Linden, K. and Moens, M.F. and Peltonen, J. and Redi, M. and Schedl, M. and Shamma, D.A. and Smeaton, A. and Xie, L.X.",
        TITLE = "Rethinking Summarization and Storytelling for Modern Social Multimedia",
        BOOKTITLE = MMMod18,
        YEAR = "2018",
        PAGES = "I:632-644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237102"}

@inproceedings{bb242201,
        AUTHOR = "Hori, C. and Hori, T. and Lee, T.Y. and Zhang, Z.M. and Harsham, B. and Hershey, J.R. and Marks, T.K. and Sumi, K.",
        TITLE = "Attention-Based Multimodal Fusion for Video Description",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4203-4212",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237103"}

@inproceedings{bb242202,
        AUTHOR = "Gupta, R.K. and Yang, Y.P.",
        TITLE = "Leveraging Multi-modal Analyses and Online Knowledge Base for Video
Aboutness Generation",
        BOOKTITLE = ISVC16,
        YEAR = "2016",
        PAGES = "II: 55-64",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237104"}

@inproceedings{bb242203,
        AUTHOR = "Vallet, F. and Essid, S. and Carrive, J. and Richard, G.",
        TITLE = "Robust visual features for the multimodal identification of
unregistered speakers in TV talk-shows",
        BOOKTITLE = ICIP10,
        YEAR = "2010",
        PAGES = "1469-1472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237105"}

@inproceedings{bb242204,
        AUTHOR = "Bailer, W. and Thallinger, G.",
        TITLE = "A framework for multimedia content abstraction and its application to
rushes exploration",
        BOOKTITLE = CIVR07,
        YEAR = "2007",
        PAGES = "146-153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237106"}

@inproceedings{bb242205,
        AUTHOR = "Tesic, J. and Smith, J.R.",
        TITLE = "Efficient Summarizing of Multimedia Archives Using Cluster Labeling",
        BOOKTITLE = CIVR06,
        YEAR = "2006",
        PAGES = "518-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT237107"}

@inproceedings{bb242206,
        AUTHOR = "Mauldin, M.L. and Smith, M.A. and Stevens, S.M. and Wactlar, H.D. and Christel, M.G. and Reddy, D.R.",
        TITLE = "System and method for skimming digital audio/video data",
        BOOKTITLE = US_Patent,
        YEAR = "1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237108"}

@article{bb242207,
        AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.",
        TITLE = "Online video summarization on compressed domain",
        JOURNAL = JVCIR,
        VOLUME = "24",
        YEAR = "2013",
        NUMBER = "6",
        MONTH = "August",
        PAGES = "729-738",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237109"}

@article{bb242208,
        AUTHOR = "Sreeja, M.U. and Kovoor, B.C.",
        TITLE = "Towards genre-specific frameworks for video summarisation: A survey",
        JOURNAL = JVCIR,
        VOLUME = "62",
        YEAR = "2019",
        PAGES = "340-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237110"}

@article{bb242209,
        AUTHOR = "Vivekraj, V.K. and Sen, D. and Raman, B.",
        TITLE = "Video Skimming: Taxonomy and Comprehensive Survey",
        JOURNAL = Surveys,
        VOLUME = "52",
        YEAR = "2019",
        NUMBER = "5",
        MONTH = "October",
        PAGES = "Article No 106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237111"}

@article{bb242210,
        AUTHOR = "Kumar, K.",
        TITLE = "EVS-DK: Event video skimming using deep keyframe",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "345-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237112"}

@article{bb242211,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "A Sparse Sampling-Based Framework for Semantic Fast-Forward of
First-Person Videos",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1438-1444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237113"}

@inproceedings{bb242212,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Chamone, F. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "A Weighted Sparse Sampling and Smoothing Frame Transition Approach
for Semantic Fast-Forward First-Person Videos",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "2383-2392",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237114"}

@inproceedings{bb242213,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "Towards Semantic Fast-Forward and Stabilized Egocentric Videos",
        BOOKTITLE = Egocentric16,
        YEAR = "2016",
        PAGES = "I: 557-571",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237115"}

@inproceedings{bb242214,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "Fast-forward video based on semantic extraction",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "3334-3338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237116"}

@inproceedings{bb242215,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E. and Marcolino, L.S. and Nascimento, E.R.",
        TITLE = "Straight to the Point: Fast-Forwarding Videos via Reinforcement
Learning Using Textual Data",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10928-10937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237117"}

@article{bb242216,
        AUTHOR = "Sun, X.Y. and Wang, H.L. and He, B.",
        TITLE = "MABAN: Multi-Agent Boundary-Aware Network for Natural Language Moment
Retrieval",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "5589-5599",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237118"}

@article{bb242217,
        AUTHOR = "Lan, S. and Wang, Z. and Wei, E. and Roy Chowdhury, A.K. and Zhu, Q.",
        TITLE = "Collaborative Multi-Agent Video Fast-Forwarding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1041-1054",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237119"}

@inproceedings{bb242218,
        AUTHOR = "Lan, S. and Panda, R. and Zhu, Q. and Roy Chowdhury, A.K.",
        TITLE = "FFNet: Video Fast-Forwarding via Reinforcement Learning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6771-6780",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237120"}

@inproceedings{bb242219,
        AUTHOR = "Vivekraj, V.K. and Balasubramanian, R. and Sen, D.",
        TITLE = "Vector R-ordering based selection of segments for video skimming",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "871-876",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237121"}

@inproceedings{bb242220,
        AUTHOR = "Christel, M.G. and Lin, W.H. and Maher, B.",
        TITLE = "Evaluating audio skimming and frame rate acceleration for summarizing
BBC rushes",
        BOOKTITLE = CIVR08,
        YEAR = "2008",
        PAGES = "407-416",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237122"}

@inproceedings{bb242221,
        AUTHOR = "Sundaram, H. and Chang, S.F.",
        TITLE = "Video skims: taxonomies and an optimal generation framework",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "II: 21-24",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237123"}

@inproceedings{bb242222,
        AUTHOR = "Sundaram, H. and Chang, S.F.",
        TITLE = "Constrained Utility Maximizations for Generating Visual Skims",
        BOOKTITLE = CBAIVL01,
        YEAR = "2001",
        PAGES = "124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237124"}

@inproceedings{bb242223,
        AUTHOR = "Ma, Y.F. and Zbang, H.J.",
        TITLE = "A model of motion attention for video skimming",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "I: 129-132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237125"}

@inproceedings{bb242224,
        AUTHOR = "di Lecce, V. and Dimauro, G. and Guerriero, A. and Impedovo, S. and Pirlo, G. and Salzo, A.",
        TITLE = "Image basic features indexing techniques for video skimming",
        BOOKTITLE = CIAP99,
        YEAR = "1999",
        PAGES = "715-720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237126"}

@inproceedings{bb242225,
        AUTHOR = "Smith, M.A. and Kanade, T.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "775-781",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237127"}

@inproceedings{bb242226,
        AUTHOR = "Kanade, T. and Smith, M.A.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = DARPA97,
        YEAR = "1997",
        PAGES = "357-366",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237127"}

@inproceedings{bb242227,
        AUTHOR = "Kanade, T. and Smith, M.A.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237127"}

@inproceedings{bb242228,
        AUTHOR = "Smith, M.A. and Kanade, T.",
        TITLE = "Video Skimming for Quick Browsing based on Audio and
Image Characterization",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT237128"}

@article{bb242229,
        AUTHOR = "Brostow, G.J. and Fauqueur, J. and Cipolla, R.",
        TITLE = "Semantic object classes in video:
A high-definition ground truth database",
        JOURNAL = PRL,
        VOLUME = "30",
        YEAR = "2009",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "88-97",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237130"}

@inproceedings{bb242230,
        AUTHOR = "Aodha, O.M. and Brostow, G.J. and Pollefeys, M.",
        TITLE = "Segmenting video into classes of algorithm-suitability",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1054-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237131"}

@article{bb242231,
        AUTHOR = "Suresha, M. and Kuppa, S. and Raghukumar, D.S.",
        TITLE = "A study on deep learning spatiotemporal models and feature extraction
techniques for video understanding",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "81-101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237132"}

@article{bb242232,
        AUTHOR = "Kavoosifar, M.R. and Apiletti, D. and Baralis, E. and Garza, P. and Huet, B.",
        TITLE = "Effective video hyperlinking by means of enriched feature sets and
monomodal query combinations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "215-227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237133"}

@article{bb242233,
        AUTHOR = "Tang, P.J. and Tan, Y.L. and Li, J.Z. and Tan, B.",
        TITLE = "Translating video into language by enhancing visual and language
representations",
        JOURNAL = JVCIR,
        VOLUME = "72",
        YEAR = "2020",
        PAGES = "102875",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237134"}

@article{bb242234,
        AUTHOR = "Yu, J. and Jiang, X. and Qin, Z. and Zhang, W. and Hu, Y. and Wu, Q.",
        TITLE = "Learning Dual Encoding Model for Adaptive Visual Understanding in
Visual Dialogue",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "220-233",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237135"}

@article{bb242235,
        AUTHOR = "Duan, J.H. and Xu, H. and Lin, X.Z. and Zhu, S.C. and Du, Y.Z.",
        TITLE = "Multi-semantic long-range dependencies capturing for efficient video
representation learning",
        JOURNAL = IVC,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "103988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237136"}

@article{bb242236,
        AUTHOR = "Tan, H.L. and Zhu, H.Y. and Lim, J.H. and Tan, C.",
        TITLE = "A comprehensive survey of procedural video datasets",
        JOURNAL = CVIU,
        VOLUME = "202",
        YEAR = "2021",
        PAGES = "103107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237137"}

@article{bb242237,
        AUTHOR = "Lin, J. and Gan, C. and Wang, K. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient and Scalable Video
Understanding on Edge Devices",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2760-2774",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237138"}

@inproceedings{bb242238,
        AUTHOR = "Lin, J. and Gan, C. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient Video Understanding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7082-7092",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237139"}

@article{bb242239,
        AUTHOR = "Zhou, W. and Hou, Y. and Ouyang, K.W. and Zhou, S.L.",
        TITLE = "Exploring complementary information of self-supervised pretext tasks
for unsupervised video pre-training",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "3",
        PAGES = "255-265",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237140"}

@article{bb242240,
        AUTHOR = "Li, Z.Q. and Wang, W.M. and Li, Z.Y. and Huang, Y.F. and Sato, Y.",
        TITLE = "Spatio-Temporal Perturbations for Video Attribution",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2043-2056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237141"}

@article{bb242241,
        AUTHOR = "Tao, L. and Wang, X.T. and Yamasaki, T.",
        TITLE = "An Improved Inter-Intra Contrastive Learning Framework on
Self-Supervised Video Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5266-5280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237142"}

@article{bb242242,
        AUTHOR = "Huang, L. and Zhang, C. and Zhang, H.Y.",
        TITLE = "Self-Adaptive Training: Bridging Supervised and Self-Supervised
Learning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1362-1377",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237143"}

@inproceedings{bb242243,
        AUTHOR = "Huang, L. and You, S. and Zheng, M.K. and Wang, F. and Qian, C. and Yamasaki, T.",
        TITLE = "Learning Where to Learn in Cross-View Self-Supervised Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14431-14440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237144"}

@article{bb242244,
        AUTHOR = "Hu, Y. and Yin, D.C. and Wang, Y.W. and Chen, Z.Z. and Luo, C.",
        TITLE = "Decomposing style, content, and motion for videos",
        JOURNAL = JVCIR,
        VOLUME = "89",
        YEAR = "2022",
        PAGES = "103686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237145"}

@article{bb242245,
        AUTHOR = "Hong, M.Y. and Zhang, X.F. and Li, G.R. and Huang, Q.M.",
        TITLE = "Fine-Grained Feature Generation for Generalized Zero-Shot Video
Classification",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "1599-1612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237146"}

@article{bb242246,
        AUTHOR = "Jin, X. and Feng, R. and Sun, S. and Feng, R. and He, T.Y. and Chen, Z.B.",
        TITLE = "Semantical video coding: Instill static-dynamic clues into structured
bitstream for AI tasks",
        JOURNAL = JVCIR,
        VOLUME = "93",
        YEAR = "2023",
        PAGES = "103816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237147"}

@article{bb242247,
        AUTHOR = "Schiappa, M.C. and Rawat, Y.S. and Shah, M.",
        TITLE = "Self-Supervised Learning for Videos: A Survey",
        JOURNAL = Surveys,
        VOLUME = "55",
        YEAR = "2023",
        NUMBER = "13s",
        MONTH = "July",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237148"}

@article{bb242248,
        AUTHOR = "Yang, X.M. and Xiong, S. and Wu, K.W. and Shan, D.F. and Xie, Z.",
        TITLE = "Attentive spatial-temporal contrastive learning for self-supervised
video representation",
        JOURNAL = IVC,
        VOLUME = "137",
        YEAR = "2023",
        PAGES = "104765",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237149"}

@article{bb242249,
        AUTHOR = "Miao, J.X. and Wei, Y.C. and Wang, X.H. and Yang, Y.",
        TITLE = "Temporal Pixel-Level Semantic Understanding Through the VSPW Dataset",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "11297-11308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237150"}

@article{bb242250,
        AUTHOR = "Hu, D. and Wang, Z. and Nie, F.P. and Wang, R. and Li, X.L.",
        TITLE = "Self-Supervised Learning for Heterogeneous Audiovisual Scene Analysis",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3534-3545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237151"}

@article{bb242251,
        AUTHOR = "Namitha, K. and Geetha, M. and Athi, N.",
        TITLE = "An Improved Interaction Estimation and Optimization Method for
Surveillance Video Synopsis",
        JOURNAL = MultMedMag,
        VOLUME = "30",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "July",
        PAGES = "25-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237152"}

@article{bb242252,
        AUTHOR = "Assefa, M. and Jiang, W. and Alemu, K.G. and Yilma, G. and Adhikari, D. and Ayalew, M. and Seid, A.M. and Erbad, A.",
        TITLE = "Actor-Aware Self-Supervised Learning for Semi-Supervised Video
Representation Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "6679-6692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237153"}

@article{bb242253,
        AUTHOR = "Hu, Y.F. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Learning Multi-Expert Distribution Calibration for Long-Tailed Video
Classification",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "555-567",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237154"}

@article{bb242254,
        AUTHOR = "Chen, Z.Y. and Wang, H.L. and Chen, C.W.",
        TITLE = "Self-Supervised Video Representation Learning by Serial Restoration
With Elastic Complexity",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2235-2248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237155"}

@article{bb242255,
        AUTHOR = "Chen, Z.L. and Wang, L. and Wang, P. and Gao, P.",
        TITLE = "Question-Aware Global-Local Video Understanding Network for
Audio-Visual Question Answering",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "4109-4119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237156"}

@article{bb242256,
        AUTHOR = "Cao, H.Z. and Xu, Y.C. and Mao, K.Z. and Xie, L.H. and Yin, J.X. and See, S. and Xu, Q.W. and Yang, J.F.",
        TITLE = "Self-Supervised Video Representation Learning by Video Incoherence
Detection",
        JOURNAL = Cyber,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3810-3822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237157"}

@article{bb242257,
        AUTHOR = "Zhang, Z.Q. and Ma, Z.Y. and Yuan, C.F. and Chen, Y.X. and Wang, P. and Qi, Z.A. and Hao, C.L. and Li, B. and Shan, Y. and Hu, W.M. and Maybank, S.J.",
        TITLE = "Chinese Title Generation for Short Videos:
Dataset, Metric and Algorithm",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5192-5208",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237158"}

@article{bb242258,
        AUTHOR = "Bi, S. and Hu, Z.P. and Zhang, H. and Di, J. and Sun, Z.",
        TITLE = "Motion-guided spatiotemporal multitask feature discrimination for
self-supervised video representation learning",
        JOURNAL = PR,
        VOLUME = "155",
        YEAR = "2024",
        PAGES = "110713",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237159"}

@article{bb242259,
        AUTHOR = "Li, D. and Jin, J.D. and Zhang, Y.H. and Zhong, Y.L. and Wu, Y.Y. and Chen, L. and Wang, X. and Luo, B.",
        TITLE = "Semantic-aware frame-event fusion based pattern recognition via large
vision-language models",
        JOURNAL = PR,
        VOLUME = "158",
        YEAR = "2025",
        PAGES = "111080",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237160"}

@article{bb242260,
        AUTHOR = "Wu, W.H. and Wang, X.H. and Luo, H.P. and Wang, J.D. and Yang, Y. and Ouyang, W.L.",
        TITLE = "Cap4Video++: Enhancing Video Understanding With Auxiliary Captions",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5223-5237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237161"}

@article{bb242261,
        AUTHOR = "Verma, D. and Roy, D. and Fernando, B.",
        TITLE = "Effectively Leveraging CLIP for Generating Situational Summaries of
Images and Videos",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5302-5325",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237162"}

@article{bb242262,
        AUTHOR = "Song, X. and Tian, W. and Zhu, Q.Q. and Zhang, X.L.",
        TITLE = "VideoMamba++: Integrating state space model with dual attention for
enhanced video understanding",
        JOURNAL = IVC,
        VOLUME = "161",
        YEAR = "2025",
        PAGES = "105609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237163"}

@article{bb242263,
        AUTHOR = "Li, T.P. and Wang, H. and Li, Q. and Ni, Z.",
        TITLE = "Vision-Language Relational Transformer for Video-to-Text Generation",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "4584-4596",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237164"}

@inproceedings{bb242264,
        AUTHOR = "Liu, Z.C. and Xu, K.L. and Su, B. and Zou, X. and Peng, Y.X. and Zhou, J.H.",
        TITLE = "STOP: Integrated Spatial-Temporal Dynamic Prompting for Video
Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13776-13786",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237165"}

@inproceedings{bb242265,
        AUTHOR = "Wang, Q.H. and Shi, Y.K. and Ou, J. and Chen, R. and Lin, K. and Wang, J.H. and Jiang, B. and Yang, H.T. and Zheng, M. and Tao, X. and Yang, F. and Wan, P.F. and Zhang, D.",
        TITLE = "Koala-36M: A Large-Scale Video Dataset Improving Consistency between
Fine-Grained Conditions and Video Content",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8428-8437",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237166"}

@inproceedings{bb242266,
        AUTHOR = "Ho, D. and Madden, S.",
        TITLE = "DejaVid: Encoder-Agnostic Learned Temporal Matching for Video
Classification",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24023-24032",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237167"}

@inproceedings{bb242267,
        AUTHOR = "Manasyan, A. and Seitzer, M. and Radovic, F. and Martius, G. and Zadaianchuk, A.",
        TITLE = "Temporally Consistent Object-Centric Learning by Contrasting Slots",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "5401-5411",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237168"}

@inproceedings{bb242268,
        AUTHOR = "Bigverdi, M. and Luo, Z. and Hsieh, C.Y. and Shen, E. and Chen, D.P. and Shapiro, L.G. and Krishna, R.",
        TITLE = "Perception Tokens Enhance Visual Reasoning in Multimodal Language
Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3836-3845",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237169"}

@inproceedings{bb242269,
        AUTHOR = "Ren, Z.W. and Wei, Y.C. and Guo, X. and Zhao, Y. and Kang, B. and Feng, J.S. and Jin, X.J.",
        TITLE = "VideoWorld: Exploring Knowledge Learning from Unlabeled Videos",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29029-29039",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237170"}

@inproceedings{bb242270,
        AUTHOR = "Tang, Y.L. and Guo, J.J. and Hua, H. and Liang, S. and Feng, M.Q. and Li, X.Y. and Mao, R. and Huang, C. and Bi, J. and Zhang, Z.L. and Fazli, P. and Xu, C.L.",
        TITLE = "VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8490-8500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237171"}

@inproceedings{bb242271,
        AUTHOR = "Kim, K. and Park, G. and Lee, Y. and Yeo, W. and Hwang, S.J.",
        TITLE = "VideoICL: Confidence-based Iterative In-context Learning for
Out-of-Distribution Video Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3295-3305",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237172"}

@inproceedings{bb242272,
        AUTHOR = "Huang, Z.P. and Li, X.H. and Li, J.Q. and Wang, J. and Zeng, X.Y. and Liang, C. and Wu, T. and Chen, X. and Li, L. and Wang, L.M.",
        TITLE = "Online Video Understanding: OVBench and VideoChat-Online",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3328-3338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237173"}

@inproceedings{bb242273,
        AUTHOR = "Liu, B. and Dong, Y.H. and Wang, Y.Q. and Ma, Z.X. and Tang, Y.S. and Tang, L. and Rao, Y.M. and Ma, W.C. and Krishna, R.",
        TITLE = "Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal
Language Model",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3783-3792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237174"}

@inproceedings{bb242274,
        AUTHOR = "Zhao, Y.L. and Zhang, H. and Xie, L. and Hu, T.Y. and Gan, G. and Long, Y. and Hu, Z.Y. and Chen, W.Y. and Li, C.H. and Xu, Z.J. and Wang, C.Y. and Shangguan, Z.Y. and Liang, Z.W. and Liu, Y.X. and Zhao, C. and Cohan, A.",
        TITLE = "MMVU: Measuring Expert-Level Multi-Discipline Video Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8475-8489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237175"}

@inproceedings{bb242275,
        AUTHOR = "Lv, B.X. and Zha, Y.H. and Dai, T. and Yuerong, X. and Chen, K. and Xia, S.T.",
        TITLE = "Adapting Pre-trained 3D Models for Point Cloud Video Understanding
via Cross-frame Spatio-temporal Perception",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "12413-12422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237176"}

@inproceedings{bb242276,
        AUTHOR = "Ashutosh, K. and Nagarajan, T. and Pavlakos, G. and Kitani, K. and Grauman, K.",
        TITLE = "ExpertAF: Expert Actionable Feedback from Video",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13582-13594",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237177"}

@inproceedings{bb242277,
        AUTHOR = "Yang, J.W. and Tan, R. and Wu, Q.H. and Zheng, R.J. and Peng, B.L. and Liang, Y. and Gu, Y. and Cai, M. and Ye, S. and Jang, J. and Deng, Y. and Gao, J.F.",
        TITLE = "Magma: A Foundation Model for Multimodal AI Agents",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14203-14214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237178"}

@inproceedings{bb242278,
        AUTHOR = "Hu, K. and Gao, F. and Nie, X.H. and Zhou, P. and Tran, S. and Neiman, T. and Wang, L.Y. and Shah, M. and Hamid, R. and Yin, B. and Chilimbi, T.",
        TITLE = "M-LLM Based Video Frame Selection for Efficient Video Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13702-13712",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237179"}

@inproceedings{bb242279,
        AUTHOR = "Jung, M. and Xiao, J.B. and Zhang, B.T. and Yao, A.",
        TITLE = "On the Consistency of Video Large Language Models in Temporal
Comprehension",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13713-13722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237180"}

@inproceedings{bb242280,
        AUTHOR = "Li, C. and Im, E.W. and Fazli, P.",
        TITLE = "VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large
Language Models for Video Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13723-13733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237181"}

@inproceedings{bb242281,
        AUTHOR = "Liu, J. and Han, J. and Liu, L. and Aviles Rivero, A.I. and Jiang, C. and Liu, Z. and Wang, H.S.",
        TITLE = "Mamba4D: Efficient 4D Point Cloud Video Understanding with
Disentangled Spatial-Temporal State Space Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "17626-17636",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237182"}

@inproceedings{bb242282,
        AUTHOR = "Das, S. and Mujavarsheik, B. and Lyngkhoi, R.E.Z. and Saha, S. and Maurya, A.",
        TITLE = "Deciphering the Complaint Aspects: Towards an Aspect-Based Complaint
Identification Model with Video Complaint Dataset in Finance",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "7195-7204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237183"}

@inproceedings{bb242283,
        AUTHOR = "Liu, H. and Nakashima, Y. and Babaguchi, N.",
        TITLE = "Paladin: Understanding Video Intentions in Political Advertisement
Videos",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8239-8248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237184"}

@inproceedings{bb242284,
        AUTHOR = "Bae, K. and Ahn, G. and Kim, Y. and Choi, J.",
        TITLE = "DEVIAS: Learning Disentangled Video Representations of Action and Scene",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVIII: 431-448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237185"}

@inproceedings{bb242285,
        AUTHOR = "Salehi, M. and Dorkenwald, M. and Thoker, F.M. and Gavves, E. and Snoek, C.G.M. and Asano, Y.M.",
        TITLE = "Sigma: Sinkhorn-guided Masked Video Modeling",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXIV: 293-312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237186"}

@inproceedings{bb242286,
        AUTHOR = "Xie, B.Z. and Zhang, S.C. and Zhou, Z. and Li, B. and Zhang, Y.H. and Hessel, J. and Yang, J.K. and Liu, Z.W.",
        TITLE = "FUNQA: Towards Surprising Video Comprehension",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "I: 39-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237187"}

@inproceedings{bb242287,
        AUTHOR = "Choi, M. and Goel, H. and Omama, M. and Yang, Y.H. and Shah, S. and Chinchali, S.",
        TITLE = "Towards Neuro-symbolic Video Understanding",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXVIII: 220-236",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237188"}

@inproceedings{bb242288,
        AUTHOR = "Fan, Y. and Ma, X.J. and Wu, R.J. and Du, Y.T. and Li, J.Q. and Gao, Z. and Li, Q.",
        TITLE = "Videoagent: A Memory-augmented Multimodal Agent for Video Understanding",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXII: 75-92",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237189"}

@inproceedings{bb242289,
        AUTHOR = "Wang, S.J. and Zhao, Q. and Do, M.Q. and Agarwal, N. and Lee, K. and Sun, C.",
        TITLE = "VAMOS: Versatile Action Models for Video Understanding",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XII: 142-160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237190"}

@inproceedings{bb242290,
        AUTHOR = "Lebreton, P. and Le Callet, P. and Birkbeck, N. and Wang, Y.L. and Adsumilli, B.",
        TITLE = "A Dataset for Understanding Open UGC Video Datasets",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "165-171",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237191"}

@inproceedings{bb242291,
        AUTHOR = "Wu, J.T. and Mo, S.T. and Atito, S. and Feng, Z.H. and Kittler, J.V. and Husain, S.S. and Awais, M.",
        TITLE = "Masked Momentum Contrastive Learning for Semantic Understanding by
Observation",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "263-269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237192"}

@inproceedings{bb242292,
        AUTHOR = "Yun, H. and Ahn, J. and Kim, M. and Kim, E.S.",
        TITLE = "Compositional Video Understanding with Spatiotemporal Structure-based
Transformers",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18751-18760",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237193"}

@inproceedings{bb242293,
        AUTHOR = "Papalampidi, P. and Koppula, S. and Pathak, S. and Chiu, J. and Heyward, J. and Patraucean, V. and Shen, J.J. and Miech, A. and Zisserman, A. and Nematzdeh, A.",
        TITLE = "A Simple Recipe for Contrastively Pre-Training Video-First Encoders
Beyond 16 Frames",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14386-14397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237194"}

@inproceedings{bb242294,
        AUTHOR = "Wang, A.D. and Wu, B. and Chen, S. and Chen, Z.F. and Guan, H.T. and Lee, W.N. and Li, L.E. and Gan, C.",
        TITLE = "SOK-Bench: A Situated Video Reasoning Benchmark with Aligned
Open-World Knowledge",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13384-13394",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237195"}

@inproceedings{bb242295,
        AUTHOR = "Zhong, Y. and Baghel, B.K.",
        TITLE = "Multimodal Understanding of Memes with Fair Explanations",
        BOOKTITLE = MULA24,
        YEAR = "2024",
        PAGES = "2007-2017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237196"}

@inproceedings{bb242296,
        AUTHOR = "Sheng, D. and Chen, D.D. and Tan, Z.T. and Liu, Q. and Chu, Q. and Bao, J.M. and Gong, T. and Liu, B. and Xu, S.W. and Yu, N.H.",
        TITLE = "Towards More Unified In-Context Visual Understanding",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13362-13372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237197"}

@inproceedings{bb242297,
        AUTHOR = "Ma, F. and Jin, X.J. and Wang, H. and Xian, Y.C. and Feng, J.S. and Yang, Y.",
        TITLE = "Vista-llama: Reducing Hallucination in Video Language Models via
Equal Distance to Visual Tokens",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13151-13160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237198"}

@inproceedings{bb242298,
        AUTHOR = "Tan, C.L. and Lai, J.H. and Zheng, W.S. and Hu, J.F.",
        TITLE = "Siamese Learning with Joint Alignment and Regression for
Weakly-Supervised Video Paragraph Grounding",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13569-13580",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237199"}

@inproceedings{bb242299,
        AUTHOR = "Jin, P. and Takanobu, R. and Zhang, W. and Cao, X.C. and Yuan, L.",
        TITLE = "Chat-UniVi: Unified Visual Representation Empowers Large Language
Models with Image and Video Understanding",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13700-13710",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT237200"}

Last update:Oct 6, 2025 at 14:07:43