@inproceedings{bb206300,
        AUTHOR = "Gupta, R.K. and Yang, Y.P.",
        TITLE = "Leveraging Multi-modal Analyses and Online Knowledge Base for Video
Aboutness Generation",
        BOOKTITLE = ISVC16,
        YEAR = "2016",
        PAGES = "II: 55-64",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201401"}

@inproceedings{bb206301,
        AUTHOR = "Vallet, F. and Essid, S. and Carrive, J. and Richard, G.",
        TITLE = "Robust visual features for the multimodal identification of
unregistered speakers in TV talk-shows",
        BOOKTITLE = ICIP10,
        YEAR = "2010",
        PAGES = "1469-1472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201402"}

@inproceedings{bb206302,
        AUTHOR = "Bailer, W. and Thallinger, G.",
        TITLE = "A framework for multimedia content abstraction and its application to
rushes exploration",
        BOOKTITLE = CIVR07,
        YEAR = "2007",
        PAGES = "146-153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201403"}

@inproceedings{bb206303,
        AUTHOR = "Tesic, J. and Smith, J.R.",
        TITLE = "Efficient Summarizing of Multimedia Archives Using Cluster Labeling",
        BOOKTITLE = CIVR06,
        YEAR = "2006",
        PAGES = "518-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201404"}

@inproceedings{bb206304,
        AUTHOR = "Mauldin, M.L. and Smith, M.A. and Stevens, S.M. and Wactlar, H.D. and Christel, M.G. and Reddy, D.R.",
        TITLE = "System and method for skimming digital audio/video data",
        BOOKTITLE = US_Patent,
        YEAR = "1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201405"}

@article{bb206305,
        AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.",
        TITLE = "Adaptive Speaker Identification with Audio-Visual Cues for Movie Content
Analysis",
        JOURNAL = PRL,
        VOLUME = "25",
        YEAR = "2004",
        NUMBER = "7",
        MONTH = "May",
        PAGES = "777-791",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201406"}

@article{bb206306,
        AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.",
        TITLE = "Content-Based Movie Analysis and Indexing Based on Audio-Visual Cues",
        JOURNAL = CirSysVideo,
        VOLUME = "14",
        YEAR = "2004",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1073-1085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201407"}

@inproceedings{bb206307,
        AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.",
        TITLE = "Movie Content Analysis, Indexing and Skimming Via 
Multimodal Information",
        BOOKTITLE = VideoMining03,
        YEAR = "2003",
        PAGES = "Chapter 5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201408"}

@article{bb206308,
        AUTHOR = "Li, Y. and Kuo, C.C.J.",
        TITLE = "A robust video scene extraction approach to movie content abstraction",
        JOURNAL = IJIST,
        VOLUME = "13",
        YEAR = "2003",
        NUMBER = "5",
        PAGES = "236-244",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201409"}

@article{bb206309,
        AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.",
        TITLE = "Online video summarization on compressed domain",
        JOURNAL = JVCIR,
        VOLUME = "24",
        YEAR = "2013",
        NUMBER = "6",
        MONTH = "August",
        PAGES = "729-738",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201410"}

@article{bb206310,
        AUTHOR = "Sreeja, M.U. and Kovoor, B.C.",
        TITLE = "Towards genre-specific frameworks for video summarisation: A survey",
        JOURNAL = JVCIR,
        VOLUME = "62",
        YEAR = "2019",
        PAGES = "340-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201411"}

@article{bb206311,
        AUTHOR = "Vivekraj, V.K. and Sen, D. and Raman, B.",
        TITLE = "Video Skimming: Taxonomy and Comprehensive Survey",
        JOURNAL = Surveys,
        VOLUME = "52",
        YEAR = "2019",
        NUMBER = "5",
        MONTH = "October",
        PAGES = "Article No 106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201412"}

@article{bb206312,
        AUTHOR = "Kumar, K.",
        TITLE = "EVS-DK: Event video skimming using deep keyframe",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "345-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201413"}

@article{bb206313,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "A Sparse Sampling-Based Framework for Semantic Fast-Forward of
First-Person Videos",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1438-1444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201414"}

@inproceedings{bb206314,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Chamone, F. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "A Weighted Sparse Sampling and Smoothing Frame Transition Approach
for Semantic Fast-Forward First-Person Videos",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "2383-2392",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201415"}

@inproceedings{bb206315,
        AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "Towards Semantic Fast-Forward and Stabilized Egocentric Videos",
        BOOKTITLE = Egocentric16,
        YEAR = "2016",
        PAGES = "I: 557-571",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201416"}

@inproceedings{bb206316,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Campos, M.F.M. and Nascimento, E.R.",
        TITLE = "Fast-forward video based on semantic extraction",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "3334-3338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201417"}

@inproceedings{bb206317,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E. and Marcolino, L.S. and Nascimento, E.R.",
        TITLE = "Straight to the Point: Fast-Forwarding Videos via Reinforcement
Learning Using Textual Data",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10928-10937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201418"}

@article{bb206318,
        AUTHOR = "Sun, X.Y. and Wang, H. and He, B.",
        TITLE = "MABAN: Multi-Agent Boundary-Aware Network for Natural Language Moment
Retrieval",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "5589-5599",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201419"}

@inproceedings{bb206319,
        AUTHOR = "Lan, S. and Panda, R. and Zhu, Q. and Roy Chowdhury, A.K.",
        TITLE = "FFNet: Video Fast-Forwarding via Reinforcement Learning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6771-6780",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201420"}

@inproceedings{bb206320,
        AUTHOR = "Vivekraj, V.K. and Balasubramanian, R. and Sen, D.",
        TITLE = "Vector R-ordering based selection of segments for video skimming",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "871-876",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201421"}

@inproceedings{bb206321,
        AUTHOR = "Christel, M.G. and Lin, W.H. and Maher, B.",
        TITLE = "Evaluating audio skimming and frame rate acceleration for summarizing
BBC rushes",
        BOOKTITLE = CIVR08,
        YEAR = "2008",
        PAGES = "407-416",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201422"}

@inproceedings{bb206322,
        AUTHOR = "Sundaram, H. and Chang, S.F.",
        TITLE = "Video skims: taxonomies and an optimal generation framework",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "II: 21-24",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201423"}

@inproceedings{bb206323,
        AUTHOR = "Sundaram, H. and Chang, S.F.",
        TITLE = "Constrained Utility Maximizations for Generating Visual Skims",
        BOOKTITLE = CBAIVL01,
        YEAR = "2001",
        PAGES = "124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201424"}

@inproceedings{bb206324,
        AUTHOR = "Ma, Y.F. and Zbang, H.J.",
        TITLE = "A model of motion attention for video skimming",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "I: 129-132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201425"}

@inproceedings{bb206325,
        AUTHOR = "di Lecce, V. and Dimauro, G. and Guerriero, A. and Impedovo, S. and Pirlo, G. and Salzo, A.",
        TITLE = "Image basic features indexing techniques for video skimming",
        BOOKTITLE = CIAP99,
        YEAR = "1999",
        PAGES = "715-720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201426"}

@inproceedings{bb206326,
        AUTHOR = "Smith, M.A. and Kanade, T.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "775-781",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"}

@inproceedings{bb206327,
        AUTHOR = "Kanade, T. and Smith, M.A.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = DARPA97,
        YEAR = "1997",
        PAGES = "357-366",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"}

@inproceedings{bb206328,
        AUTHOR = "Kanade, T. and Smith, M.A.",
        TITLE = "Video Skimming and Characterization through the Combination of
Image and Language Understanding Techniques",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"}

@inproceedings{bb206329,
        AUTHOR = "Smith, M.A. and Kanade, T.",
        TITLE = "Video Skimming for Quick Browsing based on Audio and
Image Characterization",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201428"}

@article{bb206330,
        AUTHOR = "Brostow, G.J. and Fauqueur, J. and Cipolla, R.",
        TITLE = "Semantic object classes in video:
A high-definition ground truth database",
        JOURNAL = PRL,
        VOLUME = "30",
        YEAR = "2009",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "88-97",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201430"}

@inproceedings{bb206331,
        AUTHOR = "Aodha, O.M. and Brostow, G.J. and Pollefeys, M.",
        TITLE = "Segmenting video into classes of algorithm-suitability",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1054-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201431"}

@article{bb206332,
        AUTHOR = "Suresha, M. and Kuppa, S. and Raghukumar, D.S.",
        TITLE = "A study on deep learning spatiotemporal models and feature extraction
techniques for video understanding",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "81-101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201432"}

@article{bb206333,
        AUTHOR = "Kavoosifar, M.R. and Apiletti, D. and Baralis, E. and Garza, P. and Huet, B.",
        TITLE = "Effective video hyperlinking by means of enriched feature sets and
monomodal query combinations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "215-227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201433"}

@article{bb206334,
        AUTHOR = "Tang, P.J. and Tan, Y.L. and Li, J.Z. and Tan, B.",
        TITLE = "Translating video into language by enhancing visual and language
representations",
        JOURNAL = JVCIR,
        VOLUME = "72",
        YEAR = "2020",
        PAGES = "102875",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201434"}

@article{bb206335,
        AUTHOR = "Yu, J. and Jiang, X. and Qin, Z. and Zhang, W. and Hu, Y. and Wu, Q.",
        TITLE = "Learning Dual Encoding Model for Adaptive Visual Understanding in
Visual Dialogue",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "220-233",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201435"}

@article{bb206336,
        AUTHOR = "Duan, J.H. and Xu, H. and Lin, X.Z. and Zhu, S.C. and Du, Y.Z.",
        TITLE = "Multi-semantic long-range dependencies capturing for efficient video
representation learning",
        JOURNAL = IVC,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "103988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201436"}

@article{bb206337,
        AUTHOR = "Tan, H.L. and Zhu, H.Y. and Lim, J.H. and Tan, C.",
        TITLE = "A comprehensive survey of procedural video datasets",
        JOURNAL = CVIU,
        VOLUME = "202",
        YEAR = "2021",
        PAGES = "103107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201437"}

@article{bb206338,
        AUTHOR = "Lin, J. and Gan, C. and Wang, K. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient and Scalable Video
Understanding on Edge Devices",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2760-2774",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201438"}

@inproceedings{bb206339,
        AUTHOR = "Lin, J. and Gan, C. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient Video Understanding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7082-7092",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201439"}

@article{bb206340,
        AUTHOR = "Zhou, W. and Hou, Y. and Ouyang, K.W. and Zhou, S.L.",
        TITLE = "Exploring complementary information of self-supervised pretext tasks
for unsupervised video pre-training",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "3",
        PAGES = "255-265",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201440"}

@article{bb206341,
        AUTHOR = "Li, Z.Q. and Wang, W.M. and Li, Z.Y. and Huang, Y.F. and Sato, Y.",
        TITLE = "Spatio-Temporal Perturbations for Video Attribution",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2043-2056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201441"}

@article{bb206342,
        AUTHOR = "Tao, L. and Wang, X.T. and Yamasaki, T.",
        TITLE = "An Improved Inter-Intra Contrastive Learning Framework on
Self-Supervised Video Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5266-5280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201442"}

@inproceedings{bb206343,
        AUTHOR = "Huang, L. and You, S. and Zheng, M.K. and Wang, F. and Qian, C. and Yamasaki, T.",
        TITLE = "Learning Where to Learn in Cross-View Self-Supervised Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14431-14440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201443"}

@article{bb206344,
        AUTHOR = "Hu, Y. and Yin, D.C. and Wang, Y. and Chen, Z.Z. and Luo, C.",
        TITLE = "Decomposing style, content, and motion for videos",
        JOURNAL = JVCIR,
        VOLUME = "89",
        YEAR = "2022",
        PAGES = "103686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201444"}

@article{bb206345,
        AUTHOR = "Hong, M.Y. and Zhang, X.F. and Li, G.R. and Huang, Q.M.",
        TITLE = "Fine-Grained Feature Generation for Generalized Zero-Shot Video
Classification",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "1599-1612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201445"}

@article{bb206346,
        AUTHOR = "Jin, X. and Feng, R. and Sun, S. and Feng, R. and He, T. and Chen, Z.B.",
        TITLE = "Semantical video coding: Instill static-dynamic clues into structured
bitstream for AI tasks",
        JOURNAL = JVCIR,
        VOLUME = "93",
        YEAR = "2023",
        PAGES = "103816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201446"}

@inproceedings{bb206347,
        AUTHOR = "Zhang, H.L. and Pirsiavash, H. and Liu, X.",
        TITLE = "MASTAF: A Model-Agnostic Spatio-Temporal Attention Fusion Network for
Few-shot Video Classification",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2507-2516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201447"}

@inproceedings{bb206348,
        AUTHOR = "Senocak, A. and Kim, J. and Oh, T.H. and Li, D.Z. and Kweon, I.S.",
        TITLE = "Event-Specific Audio-Visual Fusion Layers:
A Simple and New Perspective on Video Understanding",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2236-2246",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201448"}

@inproceedings{bb206349,
        AUTHOR = "Xia, B.Y. and Wu, W.H. and Wang, H.R. and Su, R. and He, D.L. and Yang, H. and Fan, X.R. and Ouyang, W.L.",
        TITLE = "NSNet: Non-saliency Suppression Sampler for Efficient Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:705-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201449"}

@inproceedings{bb206350,
        AUTHOR = "Xia, B.Y. and Wang, Z.H. and Wu, W.H. and Wang, H.R. and Han, J.G.",
        TITLE = "Temporal Saliency Query Network for Efficient Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:741-759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201450"}

@inproceedings{bb206351,
        AUTHOR = "Islam, M.M. and Bertasius, G.",
        TITLE = "Long Movie Clip Classification with State-Space Video Models",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:87-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201451"}

@inproceedings{bb206352,
        AUTHOR = "Habibian, A. and Yahia, H.B. and Abati, D. and Gavves, E. and Porikli, F.M.",
        TITLE = "Delta Distillation for Efficient Video Processing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:213-229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201452"}

@inproceedings{bb206353,
        AUTHOR = "Li, Z.Z. and Wang, M.M. and Pi, H.J. and Xu, K. and Mei, J.B. and Liu, Y.",
        TITLE = "E-NeRV: Expedite Neural Video Representation with Disentangled
Spatial-Temporal Context",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:267-284",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201453"}

@inproceedings{bb206354,
        AUTHOR = "Kosman, E. and di Castro, D.",
        TITLE = "GraphVid: It only Takes a Few Nodes to Understand a Video",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:195-212",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201454"}

@inproceedings{bb206355,
        AUTHOR = "Ju, C. and Han, T. and Zheng, K. and Zhang, Y. and Xie, W.",
        TITLE = "Prompting Visual-Language Models for Efficient Video Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:105-124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201455"}

@inproceedings{bb206356,
        AUTHOR = "Liang, S.X. and Shen, X. and Huang, J.Q. and Hua, X.S.",
        TITLE = "Delving into Details: Synopsis-to-Detail Networks for Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "IV:262-278",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201456"}

@inproceedings{bb206357,
        AUTHOR = "Ur Rehman, Y.A. and Gao, Y. and Shen, J.J. and de Gusmao, P.P.B. and Lane, N.",
        TITLE = "Federated Self-supervised Learning for Video Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXI:506-522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201457"}

@inproceedings{bb206358,
        AUTHOR = "Dadashzadeh, A. and Whone, A. and Mirmehdi, M.",
        TITLE = "Auxiliary Learning for Self-Supervised Video Representation via
Similarity-based Knowledge Distillation",
        BOOKTITLE = L3D-IVU22,
        YEAR = "2022",
        PAGES = "4230-4239",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201458"}

@inproceedings{bb206359,
        AUTHOR = "Li, Y. and Vasconcelos, N.M.",
        TITLE = "Improving Video Model Transfer with Dynamic Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19258-19269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201459"}

@inproceedings{bb206360,
        AUTHOR = "Guo, S. and Xiong, Z. and Zhong, Y.J. and Wang, L.M. and Guo, X.B. and Han, B. and Huang, W.L.",
        TITLE = "Cross-Architecture Self-supervised Video Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19248-19257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201460"}

@inproceedings{bb206361,
        AUTHOR = "Xu, X.Y. and Li, Y.L. and Lu, C.",
        TITLE = "Learning to Anticipate Future with Dynamic Context Removal",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12724-12734",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201461"}

@inproceedings{bb206362,
        AUTHOR = "Gadre, S.Y. and Ehsani, K. and Song, S. and Mottaghi, R.",
        TITLE = "Continuous Scene Representations for Embodied AI",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14829-14839",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201462"}

@inproceedings{bb206363,
        AUTHOR = "Liang, C. and Wang, W.G. and Zhou, T.F. and Yang, Y.",
        TITLE = "Visual Abductive Reasoning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15544-15554",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201463"}

@inproceedings{bb206364,
        AUTHOR = "Kinfu, K.A. and Vidal, R.",
        TITLE = "Analysis and Extensions of Adversarial Training for Video
Classification",
        BOOKTITLE = RoSe22,
        YEAR = "2022",
        PAGES = "3415-3424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201464"}

@inproceedings{bb206365,
        AUTHOR = "Xiao, F. and Kundu, K. and Tighe, J. and Modolo, D.",
        TITLE = "Hierarchical Self-supervised Representation Learning for Movie
Understanding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "9717-9726",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201465"}

@inproceedings{bb206366,
        AUTHOR = "Li, L.L. and Zhou, T.F. and Wang, W.G. and Yang, L. and Li, J.W. and Yang, Y.",
        TITLE = "Locality-Aware Inter-and Intra-Video Reconstruction for
Self-Supervised Correspondence Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8709-8720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201466"}

@inproceedings{bb206367,
        AUTHOR = "Jiang, Y.F. and Gong, X.Y. and Wu, J. and Shi, H. and Yan, Z.C. and Wang, Z.Y.",
        TITLE = "Auto-X3D: Ultra-Efficient Video Understanding via Finer-Grained
Neural Architecture Search",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2354-2363",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201467"}

@inproceedings{bb206368,
        AUTHOR = "Chen, N.L. and Chu, L. and Pan, H. and Lu, Y. and Wang, W.P.",
        TITLE = "Self-Supervised Image Representation Learning with Geometric Set
Consistency",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19270-19280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201468"}

@inproceedings{bb206369,
        AUTHOR = "Lin, Y.Z. and Guo, X. and Lu, Y.",
        TITLE = "Self-Supervised Video Representation Learning with Meta-Contrastive
Network",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "8219-8229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201469"}

@inproceedings{bb206370,
        AUTHOR = "Guo, X.D. and Guo, X. and Lu, Y.",
        TITLE = "SSAN: Separable Self-Attention Network for Video Representation
Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12613-12622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201470"}

@inproceedings{bb206371,
        AUTHOR = "Yang, X.T. and Fan, H.Q. and Torresani, L. and Davis, L.S. and Wang, H.",
        TITLE = "Beyond Short Clips:
End-to-End Video-Level Learning with Collaborative Memories",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7563-7572",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201471"}

@inproceedings{bb206372,
        AUTHOR = "Wu, C.Y. and Krahenbuhl, P.",
        TITLE = "Towards Long-Form Video Understanding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1884-1894",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201472"}

@inproceedings{bb206373,
        AUTHOR = "Zhang, C.H. and Gupta, A. and Zisserman, A.",
        TITLE = "Temporal Query Networks for Fine-grained Video Understanding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4484-4494",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201473"}

@inproceedings{bb206374,
        AUTHOR = "Kangaspunta, J. and Piergiovanni, A. and Jonschkowski, R. and Ryoo, M. and Angelova, A.",
        TITLE = "Adaptive Intermediate Representations for Video Understanding",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1602-1612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201474"}

@inproceedings{bb206375,
        AUTHOR = "Duan, H.D. and Zhao, Y. and Xiong, Y.J. and Liu, W.T. and Lin, D.",
        TITLE = "Omni-sourced Webly-supervised Learning for Video Recognition",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XV:670-688",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201475"}

@inproceedings{bb206376,
        AUTHOR = "Jha, A. and Kumar, A. and Pande, S. and Banerjee, B. and Chaudhuri, S.",
        TITLE = "MT-UNET: A Novel U-Net Based Multi-Task Architecture For Visual Scene
Understanding",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "2191-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201476"}

@inproceedings{bb206377,
        AUTHOR = "Diba, A. and Fayyaz, M. and Sharma, V. and Paluri, M. and Gall, J. and Stiefelhagen, R. and Van Gool, L.J.",
        TITLE = "Large Scale Holistic Video Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "V:593-610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201477"}

@inproceedings{bb206378,
        AUTHOR = "Pont Tuset, J. and Uijlings, J. and Changpinyo, S. and Soricut, R. and Ferrari, V.",
        TITLE = "Connecting Vision and Language with Localized Narratives",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "V:647-664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201478"}

@inproceedings{bb206379,
        AUTHOR = "Hu, A. and Cotter, F. and Mohan, N. and Gurau, C. and Kendall, A.",
        TITLE = "Probabilistic Future Prediction for Video Scene Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVI: 767-785",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201479"}

@inproceedings{bb206380,
        AUTHOR = "Mavroudi, E. and Haro, B.B. and Vidal, R.",
        TITLE = "Representation Learning on Visual-Symbolic Graphs for Video
Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 71-90",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201480"}

@inproceedings{bb206381,
        AUTHOR = "Sener, F. and Singhania, D. and Yao, A.",
        TITLE = "Temporal Aggregate Representations for Long-range Video Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVI: 154-171",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201481"}

@inproceedings{bb206382,
        AUTHOR = "Tosi, F. and Aleotti, F. and Ramirez, P.Z. and Poggi, M. and Salti, S. and di Stefano, L. and Mattoccia, S.",
        TITLE = "Distilled Semantics for Comprehensive Scene Understanding from Videos",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "4653-4664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201482"}

@inproceedings{bb206383,
        AUTHOR = "Piergiovanni, A.J. and Angelova, A. and Ryoo, M.S.",
        TITLE = "Evolving Losses for Unsupervised Video Representation Learning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "130-139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201483"}

@inproceedings{bb206384,
        AUTHOR = "Xiong, Y. and Huang, Q. and Guo, L. and Zhou, H. and Zhou, B. and Lin, D.",
        TITLE = "A Graph-Based Framework to Bridge Movies and Synopses",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4591-4600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201484"}

@inproceedings{bb206385,
        AUTHOR = "Kanehira, A. and Takemoto, K. and Inayoshi, S. and Harada, T.",
        TITLE = "Multimodal Explanations by Predicting Counterfactuality in Videos",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8586-8594",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201485"}

@inproceedings{bb206386,
        AUTHOR = "Kanehira, A. and Harada, T.",
        TITLE = "Learning to Explain With Complemental Examples",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8595-8603",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201486"}

@inproceedings{bb206387,
        AUTHOR = "Zhou, L. and Kalantidis, Y. and Chen, X.L. and Corso, J.J. and Rohrbach, M.",
        TITLE = "Grounded Video Description",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6571-6580",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201487"}

@inproceedings{bb206388,
        AUTHOR = "Liu, X.Y. and Lee, J.Y. and Jin, H.L.",
        TITLE = "Learning Video Representations From Correspondence Proposals",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4268-4276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201488"}

@inproceedings{bb206389,
        AUTHOR = "Xiong, B. and Kalantidis, Y. and Ghadiyaram, D. and Grauman, K.",
        TITLE = "Less Is More: Learning Highlight Detection From Video Duration",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "1258-1267",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201489"}

@inproceedings{bb206390,
        AUTHOR = "Zhang, D. and Dai, X. and Wang, X. and Wang, Y.F. and Davis, L.S.",
        TITLE = "MAN: Moment Alignment Network for Natural Language Moment Retrieval via
Iterative Graph Adjustment",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "1247-1257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201490"}

@inproceedings{bb206391,
        AUTHOR = "Fan, L. and Huang, W. and Gan, C. and Ermon, S. and Gong, B. and Huang, J.",
        TITLE = "End-to-End Learning of Motion Representation for Video Understanding",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6016-6025",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201491"}

@inproceedings{bb206392,
        AUTHOR = "Huang, D. and Ramanathan, V. and Mahajan, D. and Torresani, L. and Paluri, M. and Fei Fei, L. and Niebles, J.C.",
        TITLE = "What Makes a Video a Video: Analyzing Temporal Information in Video
Understanding Models and Datasets",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7366-7375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201492"}

@inproceedings{bb206393,
        AUTHOR = "Mahdisoltani, F. and Memisevic, R. and Fleet, D.J.",
        TITLE = "Hierarchical Video Understanding",
        BOOKTITLE = WiCV-E18,
        YEAR = "2018",
        PAGES = "IV:659-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201493"}

@inproceedings{bb206394,
        AUTHOR = "Shin, K.S. and Jeon, J. and Lee, S. and Lim, B. and Jeong, M.S. and Nang, J.",
        TITLE = "Approach for Video Classification with Multi-label on YouTube-8M
Dataset",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:317-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201494"}

@inproceedings{bb206395,
        AUTHOR = "Skalic, M. and Austin, D.",
        TITLE = "Building A Size Constrained Predictive Models for Video Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:297-305",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201495"}

@inproceedings{bb206396,
        AUTHOR = "Garg, S.",
        TITLE = "Learning Video Features for Multi-label Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:325-337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201496"}

@inproceedings{bb206397,
        AUTHOR = "Cho, C. and Antin, B. and Arora, S. and Ashrafi, S. and Duan, P. and Huynh, D.T. and James, L. and Nguyen, H.T. and Solgi, M. and Than, C.V.",
        TITLE = "Large-Scale Video Classification with Feature Space Augmentation
Coupled with Learned Label Relations and Ensembling",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:338-346",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201497"}

@inproceedings{bb206398,
        AUTHOR = "Lin, R.C. and Xiao, J. and Fan, J.P.",
        TITLE = "NeXtVLAD: An Efficient Neural Network to Aggregate Frame-Level Features
for Large-Scale Video Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:206-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201498"}

@inproceedings{bb206399,
        AUTHOR = "Tang, Y.Y. and Zhang, X. and Wang, J.W. and Chen, S.X. and Ma, L. and Jiang, Y.G.",
        TITLE = "Non-local NetVLAD Encoding for Video Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:219-228",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201499"}

Last update:Jun 1, 2023 at 10:05:03