@inproceedings{bb206300, AUTHOR = "Gupta, R.K. and Yang, Y.P.", TITLE = "Leveraging Multi-modal Analyses and Online Knowledge Base for Video Aboutness Generation", BOOKTITLE = ISVC16, YEAR = "2016", PAGES = "II: 55-64", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201401"} @inproceedings{bb206301, AUTHOR = "Vallet, F. and Essid, S. and Carrive, J. and Richard, G.", TITLE = "Robust visual features for the multimodal identification of unregistered speakers in TV talk-shows", BOOKTITLE = ICIP10, YEAR = "2010", PAGES = "1469-1472", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201402"} @inproceedings{bb206302, AUTHOR = "Bailer, W. and Thallinger, G.", TITLE = "A framework for multimedia content abstraction and its application to rushes exploration", BOOKTITLE = CIVR07, YEAR = "2007", PAGES = "146-153", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201403"} @inproceedings{bb206303, AUTHOR = "Tesic, J. and Smith, J.R.", TITLE = "Efficient Summarizing of Multimedia Archives Using Cluster Labeling", BOOKTITLE = CIVR06, YEAR = "2006", PAGES = "518-520", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825mms4.html#TT201404"} @inproceedings{bb206304, AUTHOR = "Mauldin, M.L. and Smith, M.A. and Stevens, S.M. and Wactlar, H.D. and Christel, M.G. and Reddy, D.R.", TITLE = "System and method for skimming digital audio/video data", BOOKTITLE = US_Patent, YEAR = "1997", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201405"} @article{bb206305, AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.", TITLE = "Adaptive Speaker Identification with Audio-Visual Cues for Movie Content Analysis", JOURNAL = PRL, VOLUME = "25", YEAR = "2004", NUMBER = "7", MONTH = "May", PAGES = "777-791", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201406"} @article{bb206306, AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.", TITLE = "Content-Based Movie Analysis and Indexing Based on Audio-Visual Cues", JOURNAL = CirSysVideo, VOLUME = "14", YEAR = "2004", NUMBER = "8", MONTH = "August", PAGES = "1073-1085", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201407"} @inproceedings{bb206307, AUTHOR = "Li, Y. and Narayanan, S.S. and Kuo, C.C.J.", TITLE = "Movie Content Analysis, Indexing and Skimming Via Multimodal Information", BOOKTITLE = VideoMining03, YEAR = "2003", PAGES = "Chapter 5", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201408"} @article{bb206308, AUTHOR = "Li, Y. and Kuo, C.C.J.", TITLE = "A robust video scene extraction approach to movie content abstraction", JOURNAL = IJIST, VOLUME = "13", YEAR = "2003", NUMBER = "5", PAGES = "236-244", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201409"} @article{bb206309, AUTHOR = "Almeida, J. and Leite, N.J. and da Silva Torres, R.", TITLE = "Online video summarization on compressed domain", JOURNAL = JVCIR, VOLUME = "24", YEAR = "2013", NUMBER = "6", MONTH = "August", PAGES = "729-738", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201410"} @article{bb206310, AUTHOR = "Sreeja, M.U. and Kovoor, B.C.", TITLE = "Towards genre-specific frameworks for video summarisation: A survey", JOURNAL = JVCIR, VOLUME = "62", YEAR = "2019", PAGES = "340-358", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201411"} @article{bb206311, AUTHOR = "Vivekraj, V.K. and Sen, D. and Raman, B.", TITLE = "Video Skimming: Taxonomy and Comprehensive Survey", JOURNAL = Surveys, VOLUME = "52", YEAR = "2019", NUMBER = "5", MONTH = "October", PAGES = "Article No 106", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201412"} @article{bb206312, AUTHOR = "Kumar, K.", TITLE = "EVS-DK: Event video skimming using deep keyframe", JOURNAL = JVCIR, VOLUME = "58", YEAR = "2019", PAGES = "345-352", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201413"} @article{bb206313, AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Campos, M.F.M. and Nascimento, E.R.", TITLE = "A Sparse Sampling-Based Framework for Semantic Fast-Forward of First-Person Videos", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "4", MONTH = "April", PAGES = "1438-1444", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201414"} @inproceedings{bb206314, AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Chamone, F. and Campos, M.F.M. and Nascimento, E.R.", TITLE = "A Weighted Sparse Sampling and Smoothing Frame Transition Approach for Semantic Fast-Forward First-Person Videos", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "2383-2392", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201415"} @inproceedings{bb206315, AUTHOR = "Silva, M.M. and Ramos, W.L.S. and Ferreira, J.P.K. and Campos, M.F.M. and Nascimento, E.R.", TITLE = "Towards Semantic Fast-Forward and Stabilized Egocentric Videos", BOOKTITLE = Egocentric16, YEAR = "2016", PAGES = "I: 557-571", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201416"} @inproceedings{bb206316, AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Campos, M.F.M. and Nascimento, E.R.", TITLE = "Fast-forward video based on semantic extraction", BOOKTITLE = ICIP16, YEAR = "2016", PAGES = "3334-3338", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201417"} @inproceedings{bb206317, AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E. and Marcolino, L.S. and Nascimento, E.R.", TITLE = "Straight to the Point: Fast-Forwarding Videos via Reinforcement Learning Using Textual Data", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10928-10937", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201418"} @article{bb206318, AUTHOR = "Sun, X.Y. and Wang, H. and He, B.", TITLE = "MABAN: Multi-Agent Boundary-Aware Network for Natural Language Moment Retrieval", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "5589-5599", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201419"} @inproceedings{bb206319, AUTHOR = "Lan, S. and Panda, R. and Zhu, Q. and Roy Chowdhury, A.K.", TITLE = "FFNet: Video Fast-Forwarding via Reinforcement Learning", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6771-6780", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201420"} @inproceedings{bb206320, AUTHOR = "Vivekraj, V.K. and Balasubramanian, R. and Sen, D.", TITLE = "Vector R-ordering based selection of segments for video skimming", BOOKTITLE = ICPR16, YEAR = "2016", PAGES = "871-876", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201421"} @inproceedings{bb206321, AUTHOR = "Christel, M.G. and Lin, W.H. and Maher, B.", TITLE = "Evaluating audio skimming and frame rate acceleration for summarizing BBC rushes", BOOKTITLE = CIVR08, YEAR = "2008", PAGES = "407-416", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201422"} @inproceedings{bb206322, AUTHOR = "Sundaram, H. and Chang, S.F.", TITLE = "Video skims: taxonomies and an optimal generation framework", BOOKTITLE = ICIP02, YEAR = "2002", PAGES = "II: 21-24", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201423"} @inproceedings{bb206323, AUTHOR = "Sundaram, H. and Chang, S.F.", TITLE = "Constrained Utility Maximizations for Generating Visual Skims", BOOKTITLE = CBAIVL01, YEAR = "2001", PAGES = "124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201424"} @inproceedings{bb206324, AUTHOR = "Ma, Y.F. and Zbang, H.J.", TITLE = "A model of motion attention for video skimming", BOOKTITLE = ICIP02, YEAR = "2002", PAGES = "I: 129-132", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201425"} @inproceedings{bb206325, AUTHOR = "di Lecce, V. and Dimauro, G. and Guerriero, A. and Impedovo, S. and Pirlo, G. and Salzo, A.", TITLE = "Image basic features indexing techniques for video skimming", BOOKTITLE = CIAP99, YEAR = "1999", PAGES = "715-720", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201426"} @inproceedings{bb206326, AUTHOR = "Smith, M.A. and Kanade, T.", TITLE = "Video Skimming and Characterization through the Combination of Image and Language Understanding Techniques", BOOKTITLE = CVPR97, YEAR = "1997", PAGES = "775-781", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"} @inproceedings{bb206327, AUTHOR = "Kanade, T. and Smith, M.A.", TITLE = "Video Skimming and Characterization through the Combination of Image and Language Understanding Techniques", BOOKTITLE = DARPA97, YEAR = "1997", PAGES = "357-366", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"} @inproceedings{bb206328, AUTHOR = "Kanade, T. and Smith, M.A.", TITLE = "Video Skimming and Characterization through the Combination of Image and Language Understanding Techniques", BOOKTITLE = CMU-CS-TR, YEAR = "1997", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201427"} @inproceedings{bb206329, AUTHOR = "Smith, M.A. and Kanade, T.", TITLE = "Video Skimming for Quick Browsing based on Audio and Image Characterization", BOOKTITLE = CMU-CS-TR, YEAR = "1995", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vsk1.html#TT201428"} @article{bb206330, AUTHOR = "Brostow, G.J. and Fauqueur, J. and Cipolla, R.", TITLE = "Semantic object classes in video: A high-definition ground truth database", JOURNAL = PRL, VOLUME = "30", YEAR = "2009", NUMBER = "2", MONTH = "January", PAGES = "88-97", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201430"} @inproceedings{bb206331, AUTHOR = "Aodha, O.M. and Brostow, G.J. and Pollefeys, M.", TITLE = "Segmenting video into classes of algorithm-suitability", BOOKTITLE = CVPR10, YEAR = "2010", PAGES = "1054-1061", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201431"} @article{bb206332, AUTHOR = "Suresha, M. and Kuppa, S. and Raghukumar, D.S.", TITLE = "A study on deep learning spatiotemporal models and feature extraction techniques for video understanding", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "2", MONTH = "June", PAGES = "81-101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201432"} @article{bb206333, AUTHOR = "Kavoosifar, M.R. and Apiletti, D. and Baralis, E. and Garza, P. and Huet, B.", TITLE = "Effective video hyperlinking by means of enriched feature sets and monomodal query combinations", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "3", MONTH = "September", PAGES = "215-227", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201433"} @article{bb206334, AUTHOR = "Tang, P.J. and Tan, Y.L. and Li, J.Z. and Tan, B.", TITLE = "Translating video into language by enhancing visual and language representations", JOURNAL = JVCIR, VOLUME = "72", YEAR = "2020", PAGES = "102875", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201434"} @article{bb206335, AUTHOR = "Yu, J. and Jiang, X. and Qin, Z. and Zhang, W. and Hu, Y. and Wu, Q.", TITLE = "Learning Dual Encoding Model for Adaptive Visual Understanding in Visual Dialogue", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "220-233", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201435"} @article{bb206336, AUTHOR = "Duan, J.H. and Xu, H. and Lin, X.Z. and Zhu, S.C. and Du, Y.Z.", TITLE = "Multi-semantic long-range dependencies capturing for efficient video representation learning", JOURNAL = IVC, VOLUME = "104", YEAR = "2020", PAGES = "103988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201436"} @article{bb206337, AUTHOR = "Tan, H.L. and Zhu, H.Y. and Lim, J.H. and Tan, C.", TITLE = "A comprehensive survey of procedural video datasets", JOURNAL = CVIU, VOLUME = "202", YEAR = "2021", PAGES = "103107", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201437"} @article{bb206338, AUTHOR = "Lin, J. and Gan, C. and Wang, K. and Han, S.", TITLE = "TSM: Temporal Shift Module for Efficient and Scalable Video Understanding on Edge Devices", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "5", MONTH = "May", PAGES = "2760-2774", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201438"} @inproceedings{bb206339, AUTHOR = "Lin, J. and Gan, C. and Han, S.", TITLE = "TSM: Temporal Shift Module for Efficient Video Understanding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "7082-7092", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201439"} @article{bb206340, AUTHOR = "Zhou, W. and Hou, Y. and Ouyang, K.W. and Zhou, S.L.", TITLE = "Exploring complementary information of self-supervised pretext tasks for unsupervised video pre-training", JOURNAL = IET-CV, VOLUME = "16", YEAR = "2022", NUMBER = "3", PAGES = "255-265", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201440"} @article{bb206341, AUTHOR = "Li, Z.Q. and Wang, W.M. and Li, Z.Y. and Huang, Y.F. and Sato, Y.", TITLE = "Spatio-Temporal Perturbations for Video Attribution", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "4", MONTH = "April", PAGES = "2043-2056", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201441"} @article{bb206342, AUTHOR = "Tao, L. and Wang, X.T. and Yamasaki, T.", TITLE = "An Improved Inter-Intra Contrastive Learning Framework on Self-Supervised Video Representation", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "8", MONTH = "August", PAGES = "5266-5280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201442"} @inproceedings{bb206343, AUTHOR = "Huang, L. and You, S. and Zheng, M.K. and Wang, F. and Qian, C. and Yamasaki, T.", TITLE = "Learning Where to Learn in Cross-View Self-Supervised Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14431-14440", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201443"} @article{bb206344, AUTHOR = "Hu, Y. and Yin, D.C. and Wang, Y. and Chen, Z.Z. and Luo, C.", TITLE = "Decomposing style, content, and motion for videos", JOURNAL = JVCIR, VOLUME = "89", YEAR = "2022", PAGES = "103686", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201444"} @article{bb206345, AUTHOR = "Hong, M.Y. and Zhang, X.F. and Li, G.R. and Huang, Q.M.", TITLE = "Fine-Grained Feature Generation for Generalized Zero-Shot Video Classification", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "1599-1612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201445"} @article{bb206346, AUTHOR = "Jin, X. and Feng, R. and Sun, S. and Feng, R. and He, T. and Chen, Z.B.", TITLE = "Semantical video coding: Instill static-dynamic clues into structured bitstream for AI tasks", JOURNAL = JVCIR, VOLUME = "93", YEAR = "2023", PAGES = "103816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201446"} @inproceedings{bb206347, AUTHOR = "Zhang, H.L. and Pirsiavash, H. and Liu, X.", TITLE = "MASTAF: A Model-Agnostic Spatio-Temporal Attention Fusion Network for Few-shot Video Classification", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "2507-2516", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201447"} @inproceedings{bb206348, AUTHOR = "Senocak, A. and Kim, J. and Oh, T.H. and Li, D.Z. and Kweon, I.S.", TITLE = "Event-Specific Audio-Visual Fusion Layers: A Simple and New Perspective on Video Understanding", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "2236-2246", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201448"} @inproceedings{bb206349, AUTHOR = "Xia, B.Y. and Wu, W.H. and Wang, H.R. and Su, R. and He, D.L. and Yang, H. and Fan, X.R. and Ouyang, W.L.", TITLE = "NSNet: Non-saliency Suppression Sampler for Efficient Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:705-723", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201449"} @inproceedings{bb206350, AUTHOR = "Xia, B.Y. and Wang, Z.H. and Wu, W.H. and Wang, H.R. and Han, J.G.", TITLE = "Temporal Saliency Query Network for Efficient Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:741-759", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201450"} @inproceedings{bb206351, AUTHOR = "Islam, M.M. and Bertasius, G.", TITLE = "Long Movie Clip Classification with State-Space Video Models", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:87-104", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201451"} @inproceedings{bb206352, AUTHOR = "Habibian, A. and Yahia, H.B. and Abati, D. and Gavves, E. and Porikli, F.M.", TITLE = "Delta Distillation for Efficient Video Processing", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:213-229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201452"} @inproceedings{bb206353, AUTHOR = "Li, Z.Z. and Wang, M.M. and Pi, H.J. and Xu, K. and Mei, J.B. and Liu, Y.", TITLE = "E-NeRV: Expedite Neural Video Representation with Disentangled Spatial-Temporal Context", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:267-284", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201453"} @inproceedings{bb206354, AUTHOR = "Kosman, E. and di Castro, D.", TITLE = "GraphVid: It only Takes a Few Nodes to Understand a Video", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:195-212", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201454"} @inproceedings{bb206355, AUTHOR = "Ju, C. and Han, T. and Zheng, K. and Zhang, Y. and Xie, W.", TITLE = "Prompting Visual-Language Models for Efficient Video Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:105-124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201455"} @inproceedings{bb206356, AUTHOR = "Liang, S.X. and Shen, X. and Huang, J.Q. and Hua, X.S.", TITLE = "Delving into Details: Synopsis-to-Detail Networks for Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IV:262-278", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201456"} @inproceedings{bb206357, AUTHOR = "Ur Rehman, Y.A. and Gao, Y. and Shen, J.J. and de Gusmao, P.P.B. and Lane, N.", TITLE = "Federated Self-supervised Learning for Video Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXI:506-522", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201457"} @inproceedings{bb206358, AUTHOR = "Dadashzadeh, A. and Whone, A. and Mirmehdi, M.", TITLE = "Auxiliary Learning for Self-Supervised Video Representation via Similarity-based Knowledge Distillation", BOOKTITLE = L3D-IVU22, YEAR = "2022", PAGES = "4230-4239", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201458"} @inproceedings{bb206359, AUTHOR = "Li, Y. and Vasconcelos, N.M.", TITLE = "Improving Video Model Transfer with Dynamic Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19258-19269", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201459"} @inproceedings{bb206360, AUTHOR = "Guo, S. and Xiong, Z. and Zhong, Y.J. and Wang, L.M. and Guo, X.B. and Han, B. and Huang, W.L.", TITLE = "Cross-Architecture Self-supervised Video Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19248-19257", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201460"} @inproceedings{bb206361, AUTHOR = "Xu, X.Y. and Li, Y.L. and Lu, C.", TITLE = "Learning to Anticipate Future with Dynamic Context Removal", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12724-12734", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201461"} @inproceedings{bb206362, AUTHOR = "Gadre, S.Y. and Ehsani, K. and Song, S. and Mottaghi, R.", TITLE = "Continuous Scene Representations for Embodied AI", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14829-14839", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201462"} @inproceedings{bb206363, AUTHOR = "Liang, C. and Wang, W.G. and Zhou, T.F. and Yang, Y.", TITLE = "Visual Abductive Reasoning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15544-15554", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201463"} @inproceedings{bb206364, AUTHOR = "Kinfu, K.A. and Vidal, R.", TITLE = "Analysis and Extensions of Adversarial Training for Video Classification", BOOKTITLE = RoSe22, YEAR = "2022", PAGES = "3415-3424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201464"} @inproceedings{bb206365, AUTHOR = "Xiao, F. and Kundu, K. and Tighe, J. and Modolo, D.", TITLE = "Hierarchical Self-supervised Representation Learning for Movie Understanding", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "9717-9726", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201465"} @inproceedings{bb206366, AUTHOR = "Li, L.L. and Zhou, T.F. and Wang, W.G. and Yang, L. and Li, J.W. and Yang, Y.", TITLE = "Locality-Aware Inter-and Intra-Video Reconstruction for Self-Supervised Correspondence Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8709-8720", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201466"} @inproceedings{bb206367, AUTHOR = "Jiang, Y.F. and Gong, X.Y. and Wu, J. and Shi, H. and Yan, Z.C. and Wang, Z.Y.", TITLE = "Auto-X3D: Ultra-Efficient Video Understanding via Finer-Grained Neural Architecture Search", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2354-2363", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201467"} @inproceedings{bb206368, AUTHOR = "Chen, N.L. and Chu, L. and Pan, H. and Lu, Y. and Wang, W.P.", TITLE = "Self-Supervised Image Representation Learning with Geometric Set Consistency", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19270-19280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201468"} @inproceedings{bb206369, AUTHOR = "Lin, Y.Z. and Guo, X. and Lu, Y.", TITLE = "Self-Supervised Video Representation Learning with Meta-Contrastive Network", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "8219-8229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201469"} @inproceedings{bb206370, AUTHOR = "Guo, X.D. and Guo, X. and Lu, Y.", TITLE = "SSAN: Separable Self-Attention Network for Video Representation Learning", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12613-12622", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201470"} @inproceedings{bb206371, AUTHOR = "Yang, X.T. and Fan, H.Q. and Torresani, L. and Davis, L.S. and Wang, H.", TITLE = "Beyond Short Clips: End-to-End Video-Level Learning with Collaborative Memories", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7563-7572", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201471"} @inproceedings{bb206372, AUTHOR = "Wu, C.Y. and Krahenbuhl, P.", TITLE = "Towards Long-Form Video Understanding", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "1884-1894", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201472"} @inproceedings{bb206373, AUTHOR = "Zhang, C.H. and Gupta, A. and Zisserman, A.", TITLE = "Temporal Query Networks for Fine-grained Video Understanding", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "4484-4494", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201473"} @inproceedings{bb206374, AUTHOR = "Kangaspunta, J. and Piergiovanni, A. and Jonschkowski, R. and Ryoo, M. and Angelova, A.", TITLE = "Adaptive Intermediate Representations for Video Understanding", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1602-1612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201474"} @inproceedings{bb206375, AUTHOR = "Duan, H.D. and Zhao, Y. and Xiong, Y.J. and Liu, W.T. and Lin, D.", TITLE = "Omni-sourced Webly-supervised Learning for Video Recognition", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XV:670-688", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201475"} @inproceedings{bb206376, AUTHOR = "Jha, A. and Kumar, A. and Pande, S. and Banerjee, B. and Chaudhuri, S.", TITLE = "MT-UNET: A Novel U-Net Based Multi-Task Architecture For Visual Scene Understanding", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "2191-2195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201476"} @inproceedings{bb206377, AUTHOR = "Diba, A. and Fayyaz, M. and Sharma, V. and Paluri, M. and Gall, J. and Stiefelhagen, R. and Van Gool, L.J.", TITLE = "Large Scale Holistic Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "V:593-610", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201477"} @inproceedings{bb206378, AUTHOR = "Pont Tuset, J. and Uijlings, J. and Changpinyo, S. and Soricut, R. and Ferrari, V.", TITLE = "Connecting Vision and Language with Localized Narratives", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "V:647-664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201478"} @inproceedings{bb206379, AUTHOR = "Hu, A. and Cotter, F. and Mohan, N. and Gurau, C. and Kendall, A.", TITLE = "Probabilistic Future Prediction for Video Scene Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVI: 767-785", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201479"} @inproceedings{bb206380, AUTHOR = "Mavroudi, E. and Haro, B.B. and Vidal, R.", TITLE = "Representation Learning on Visual-Symbolic Graphs for Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXIX: 71-90", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201480"} @inproceedings{bb206381, AUTHOR = "Sener, F. and Singhania, D. and Yao, A.", TITLE = "Temporal Aggregate Representations for Long-range Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVI: 154-171", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201481"} @inproceedings{bb206382, AUTHOR = "Tosi, F. and Aleotti, F. and Ramirez, P.Z. and Poggi, M. and Salti, S. and di Stefano, L. and Mattoccia, S.", TITLE = "Distilled Semantics for Comprehensive Scene Understanding from Videos", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "4653-4664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201482"} @inproceedings{bb206383, AUTHOR = "Piergiovanni, A.J. and Angelova, A. and Ryoo, M.S.", TITLE = "Evolving Losses for Unsupervised Video Representation Learning", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "130-139", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201483"} @inproceedings{bb206384, AUTHOR = "Xiong, Y. and Huang, Q. and Guo, L. and Zhou, H. and Zhou, B. and Lin, D.", TITLE = "A Graph-Based Framework to Bridge Movies and Synopses", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4591-4600", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201484"} @inproceedings{bb206385, AUTHOR = "Kanehira, A. and Takemoto, K. and Inayoshi, S. and Harada, T.", TITLE = "Multimodal Explanations by Predicting Counterfactuality in Videos", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "8586-8594", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201485"} @inproceedings{bb206386, AUTHOR = "Kanehira, A. and Harada, T.", TITLE = "Learning to Explain With Complemental Examples", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "8595-8603", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201486"} @inproceedings{bb206387, AUTHOR = "Zhou, L. and Kalantidis, Y. and Chen, X.L. and Corso, J.J. and Rohrbach, M.", TITLE = "Grounded Video Description", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6571-6580", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201487"} @inproceedings{bb206388, AUTHOR = "Liu, X.Y. and Lee, J.Y. and Jin, H.L.", TITLE = "Learning Video Representations From Correspondence Proposals", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "4268-4276", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201488"} @inproceedings{bb206389, AUTHOR = "Xiong, B. and Kalantidis, Y. and Ghadiyaram, D. and Grauman, K.", TITLE = "Less Is More: Learning Highlight Detection From Video Duration", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1258-1267", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201489"} @inproceedings{bb206390, AUTHOR = "Zhang, D. and Dai, X. and Wang, X. and Wang, Y.F. and Davis, L.S.", TITLE = "MAN: Moment Alignment Network for Natural Language Moment Retrieval via Iterative Graph Adjustment", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1247-1257", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201490"} @inproceedings{bb206391, AUTHOR = "Fan, L. and Huang, W. and Gan, C. and Ermon, S. and Gong, B. and Huang, J.", TITLE = "End-to-End Learning of Motion Representation for Video Understanding", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6016-6025", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201491"} @inproceedings{bb206392, AUTHOR = "Huang, D. and Ramanathan, V. and Mahajan, D. and Torresani, L. and Paluri, M. and Fei Fei, L. and Niebles, J.C.", TITLE = "What Makes a Video a Video: Analyzing Temporal Information in Video Understanding Models and Datasets", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7366-7375", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201492"} @inproceedings{bb206393, AUTHOR = "Mahdisoltani, F. and Memisevic, R. and Fleet, D.J.", TITLE = "Hierarchical Video Understanding", BOOKTITLE = WiCV-E18, YEAR = "2018", PAGES = "IV:659-663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201493"} @inproceedings{bb206394, AUTHOR = "Shin, K.S. and Jeon, J. and Lee, S. and Lim, B. and Jeong, M.S. and Nang, J.", TITLE = "Approach for Video Classification with Multi-label on YouTube-8M Dataset", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:317-324", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201494"} @inproceedings{bb206395, AUTHOR = "Skalic, M. and Austin, D.", TITLE = "Building A Size Constrained Predictive Models for Video Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:297-305", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201495"} @inproceedings{bb206396, AUTHOR = "Garg, S.", TITLE = "Learning Video Features for Multi-label Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:325-337", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201496"} @inproceedings{bb206397, AUTHOR = "Cho, C. and Antin, B. and Arora, S. and Ashrafi, S. and Duan, P. and Huynh, D.T. and James, L. and Nguyen, H.T. and Solgi, M. and Than, C.V.", TITLE = "Large-Scale Video Classification with Feature Space Augmentation Coupled with Learned Label Relations and Ensembling", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:338-346", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201497"} @inproceedings{bb206398, AUTHOR = "Lin, R.C. and Xiao, J. and Fan, J.P.", TITLE = "NeXtVLAD: An Efficient Neural Network to Aggregate Frame-Level Features for Large-Scale Video Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:206-218", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201498"} @inproceedings{bb206399, AUTHOR = "Tang, Y.Y. and Zhang, X. and Wang, J.W. and Chen, S.X. and Ma, L. and Jiang, Y.G.", TITLE = "Non-local NetVLAD Encoding for Video Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:219-228", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT201499"}