@inproceedings{bb140400,
        AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L. and Tian, Q.",
        TITLE = "Visformer: The Vision-friendly Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "569-578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136404"}

@inproceedings{bb140401,
        AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.",
        TITLE = "Leveraging Batch Normalization for Vision Transformers",
        BOOKTITLE = NeruArch21,
        YEAR = "2021",
        PAGES = "413-422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136405"}

@inproceedings{bb140402,
        AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.",
        TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster
Inference",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12239-12249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136406"}

@inproceedings{bb140403,
        AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer",
        BOOKTITLE = WMF21,
        YEAR = "2021",
        PAGES = "1032-1041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136407"}

@inproceedings{bb140404,
        AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks",
        BOOKTITLE = WMF20,
        YEAR = "2020",
        PAGES = "2832-2840",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136408"}

@inproceedings{bb140405,
        AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.",
        TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task
Visual Representations",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "1431-1440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136409"}

@article{bb140406,
        AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.",
        TITLE = "Inheritance Attention Matrix-Based Universal Adversarial
Perturbations on Vision Transformers",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1923-1927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136410"}

@article{bb140407,
        AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A. and Zhang, P.Q.",
        TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data
Classification",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "3095-3110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136411"}

@article{bb140408,
        AUTHOR = "Heo, J. and Wang, Y. and Park, J.",
        TITLE = "Occlusion-aware spatial attention transformer for occluded object
recognition",
        JOURNAL = PRL,
        VOLUME = "159",
        YEAR = "2022",
        PAGES = "70-76",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136412"}

@article{bb140409,
        AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.",
        TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained
visual categorization",
        JOURNAL = PR,
        VOLUME = "135",
        YEAR = "2023",
        PAGES = "109131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136413"}

@article{bb140410,
        AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.",
        TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention
and Progressive Shift",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "11120-11135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136414"}

@article{bb140411,
        AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G. and Liu, Y. and Li, H.S. and Qiao, Y.",
        TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual
Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12581-12600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136415"}

@article{bb140412,
        AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L. and Song, M.L.",
        TITLE = "Constituent Attention for Vision Transformers",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103838",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136416"}

@article{bb140413,
        AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.",
        TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for
Hyperspectral Image Classification",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "2",
        PAGES = "288",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136417"}

@article{bb140414,
        AUTHOR = "Wang, W.X. and Chen, W. and Qiu, Q. and Chen, L. and Wu, B. and Lin, B.B. and He, X.F. and Liu, W.",
        TITLE = "CrossFormer++: A Versatile Vision Transformer Hinging on Cross-Scale
Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3123-3136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136418"}

@article{bb140415,
        AUTHOR = "Zhang, Q.M. and Zhang, J. and Xu, Y.F. and Tao, D.C.",
        TITLE = "Vision Transformer With Quadrangle Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3608-3624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136419"}

@article{bb140416,
        AUTHOR = "Huang, L. and Bai, X.Y. and Zeng, J. and Yu, M.Q. and Pang, W. and Wang, K.P.",
        TITLE = "FAM: Improving columnar vision transformer with feature attention
mechanism",
        JOURNAL = CVIU,
        VOLUME = "242",
        YEAR = "2024",
        PAGES = "103981",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136420"}

@article{bb140417,
        AUTHOR = "Li, M.X. and Yu, W. and Liu, Q.L. and Li, Z.L. and Li, R. and Zhong, B. and Zhang, S.P.",
        TITLE = "Hybrid Transformers With Attention-Guided Spatial Embeddings for
Makeup Transfer and Removal",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2876-2890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136421"}

@inproceedings{bb140418,
        AUTHOR = "Cai, H. and Li, J. and Hu, M. and Gan, C. and Han, S.",
        TITLE = "EfficientViT: Lightweight Multi-Scale Attention for High-Resolution
Dense Prediction",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17256-17267",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136422"}

@inproceedings{bb140419,
        AUTHOR = "Ryu, J. and Han, D.Y. and Lim, J.W.",
        TITLE = "Gramian Attention Heads are Strong yet Efficient Vision Learners",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5818-5828",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136423"}

@inproceedings{bb140420,
        AUTHOR = "Xu, R.H. and Zhang, H. and Hu, W.Z. and Zhang, S.L. and Wang, X.Y.",
        TITLE = "ParCNetV2: Oversized Kernel with Enhanced Attention*",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5729-5739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136424"}

@inproceedings{bb140421,
        AUTHOR = "Zhao, B.Y. and Yu, Z. and Lan, S.Y. and Cheng, Y. and Anandkumar, A. and Lao, Y.J. and Alvarez, J.M.",
        TITLE = "Fully Attentional Networks with Self-emerging Token Labeling",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5562-5572",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136425"}

@inproceedings{bb140422,
        AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.",
        TITLE = "Robustifying Token Attention for Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17511-17522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136426"}

@inproceedings{bb140423,
        AUTHOR = "Zhao, Y. and Tang, H.D. and Jiang, Y.Y. and A, Y. and Wu, Q. and Wang, J.",
        TITLE = "Parameter-Efficient Vision Transformer with Linear Attention",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1275-1279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136427"}

@inproceedings{bb140424,
        AUTHOR = "Shi, L. and Huang, H.D. and Song, B. and Tan, M. and Zhao, W.Z. and Xia, T. and Ren, P.J.",
        TITLE = "TAQ: Top-K Attention-Aware Quantization for Vision Transformers",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1750-1754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136428"}

@inproceedings{bb140425,
        AUTHOR = "Baili, N. and Frigui, H.",
        TITLE = "ADA-VIT: Attention-Guided Data Augmentation for Vision Transformers",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "385-389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136429"}

@inproceedings{bb140426,
        AUTHOR = "Ding, M.Y. and Shen, Y. and Fan, L.J. and Chen, Z.F. and Chen, Z. and Luo, P. and Tenenbaum, J. and Gan, C.",
        TITLE = "Visual Dependency Transformers:
Dependency Tree Emerges from Reversed Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14528-14539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136430"}

@inproceedings{bb140427,
        AUTHOR = "Song, J.C. and Mou, C. and Wang, S.Q. and Ma, S.W. and Zhang, J.",
        TITLE = "Optimization-Inspired Cross-Attention Transformer for Compressive
Sensing",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6174-6184",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136431"}

@inproceedings{bb140428,
        AUTHOR = "Hassani, A. and Walton, S. and Li, J.C. and Li, S. and Shi, H.",
        TITLE = "Neighborhood Attention Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6185-6194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136432"}

@inproceedings{bb140429,
        AUTHOR = "Liu, Z.J. and Yang, X.Y. and Tang, H.T. and Yang, S. and Han, S.",
        TITLE = "FlatFormer: Flattened Window Attention for Efficient Point Cloud
Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "1200-1211",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136433"}

@inproceedings{bb140430,
        AUTHOR = "Pan, X. and Ye, T.Z. and Xia, Z.F. and Song, S. and Huang, G.",
        TITLE = "Slide-Transformer: Hierarchical Vision Transformer with Local
Self-Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2082-2091",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136434"}

@inproceedings{bb140431,
        AUTHOR = "Zhu, L. and Wang, X.J. and Ke, Z.H. and Zhang, W. and Lau, R.",
        TITLE = "BiFormer: Vision Transformer with Bi-Level Routing Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10323-10333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136435"}

@inproceedings{bb140432,
        AUTHOR = "Long, S. and Zhao, Z. and Pi, J. and Wang, S.S. and Wang, J.D.",
        TITLE = "Beyond Attentive Tokens: Incorporating Token Importance and Diversity
for Efficient Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10334-10343",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136436"}

@inproceedings{bb140433,
        AUTHOR = "Liu, X.Y. and Peng, H. and Zheng, N.X. and Yang, Y.Q. and Hu, H. and Yuan, Y.X.",
        TITLE = "EfficientViT: Memory Efficient Vision Transformer with Cascaded Group
Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14420-14430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136437"}

@inproceedings{bb140434,
        AUTHOR = "You, H.R. and Xiong, Y. and Dai, X.L. and Wu, B. and Zhang, P.Z. and Fan, H.Q. and Vajda, P. and Lin, Y.Y.C.",
        TITLE = "Castling-ViT: Compressing Self-Attention via Switching Towards
Linear-Angular Attention at Vision Transformer Inference",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14431-14442",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136438"}

@inproceedings{bb140435,
        AUTHOR = "Grainger, R. and Paniagua, T. and Song, X. and Cuntoor, N. and Lee, M.W. and Wu, T.F.",
        TITLE = "PaCa-ViT: Learning Patch-to-Cluster Attention in Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18568-18578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136439"}

@inproceedings{bb140436,
        AUTHOR = "Wei, C. and Duke, B. and Jiang, R. and Aarabi, P. and Taylor, G.W. and Shkurti, F.",
        TITLE = "Sparsifiner: Learning Sparse Instance-Dependent Attention for
Efficient Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22680-22689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136440"}

@inproceedings{bb140437,
        AUTHOR = "Bhattacharyya, M. and Chattopadhyay, S. and Nag, S.",
        TITLE = "DeCAtt: Efficient Vision Transformers with Decorrelated Attention
Heads",
        BOOKTITLE = ECV23,
        YEAR = "2023",
        PAGES = "4695-4699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136441"}

@inproceedings{bb140438,
        AUTHOR = "Tatsunami, Y. and Taki, M.",
        TITLE = "RaftMLP: How Much Can Be Done Without Attention and with Less Spatial
Locality?",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "VI:459-475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136442"}

@inproceedings{bb140439,
        AUTHOR = "Bolya, D. and Fu, C.Y. and Dai, X.L. and Zhang, P.Z. and Hoffman, J.",
        TITLE = "Hydra Attention: Efficient Attention with Many Heads",
        BOOKTITLE = CADK22,
        YEAR = "2022",
        PAGES = "35-49",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136443"}

@inproceedings{bb140440,
        AUTHOR = "Chen, X.Y. and Hu, Q. and Li, K. and Zhong, C. and Wang, G.H.",
        TITLE = "Accumulated Trivial Attention Matters in Vision Transformers on Small
Datasets",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "3973-3981",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136444"}

@inproceedings{bb140441,
        AUTHOR = "Lan, H. and Wang, X. and Shen, H. and Liang, P. and Wei, X.",
        TITLE = "Couplformer: Rethinking Vision Transformer with Coupling Attention",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "6464-6473",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136445"}

@inproceedings{bb140442,
        AUTHOR = "Debnath, B. and Po, O. and Chowdhury, F.A. and Chakradhar, S.",
        TITLE = "Cosine Similarity based Few-Shot Video Classifier with
Attention-based Aggregation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1273-1279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136446"}

@inproceedings{bb140443,
        AUTHOR = "Mari, C.R. and Gonzalez, D.V. and Bou Balust, E.",
        TITLE = "Multi-Scale Transformer-Based Feature Combination for Image Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3166-3170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136447"}

@inproceedings{bb140444,
        AUTHOR = "Furukawa, R. and Hotta, K.",
        TITLE = "Local Embedding for Axial Attention",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2586-2590",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136448"}

@inproceedings{bb140445,
        AUTHOR = "Kakogeorgiou, I. and Gidaris, S. and Psomas, B. and Avrithis, Y. and Bursuc, A. and Karantzalos, K. and Komodakis, N.",
        TITLE = "What to Hide from Your Students: Attention-Guided Masked Image Modeling",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXX:300-318",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136449"}

@inproceedings{bb140446,
        AUTHOR = "Ding, M.Y. and Xiao, B. and Codella, N. and Luo, P. and Wang, J.D. and Yuan, L.",
        TITLE = "DaViT: Dual Attention Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIV:74-92",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136450"}

@inproceedings{bb140447,
        AUTHOR = "Wang, P.C. and Wang, X. and Wang, F. and Lin, M. and Chang, S.N. and Li, H. and Jin, R.",
        TITLE = "KVT: k-NN Attention for Boosting Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIV:285-302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136451"}

@inproceedings{bb140448,
        AUTHOR = "Rao, Y.M. and Zhao, W.L. and Zhou, J. and Lu, J.W.",
        TITLE = "AMixer:
Adaptive Weight Mixing for Self-Attention Free Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXI:50-67",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136452"}

@inproceedings{bb140449,
        AUTHOR = "Li, A. and Jiao, J. and Li, N. and Qi, W. and Xu, W. and Pang, M.",
        TITLE = "Conmw Transformer: A General Vision Transformer Backbone With
Merged-Window Attention",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1551-1555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136453"}

@inproceedings{bb140450,
        AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.",
        TITLE = "VSA: Learning Varied-Size Window Attention in Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXV:466-483",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136454"}

@inproceedings{bb140451,
        AUTHOR = "Mallick, R. and Benois Pineau, J. and Zemmari, A.",
        TITLE = "I Saw: A Self-Attention Weighted Method for Explanation of Visual
Transformers",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3271-3275",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136455"}

@inproceedings{bb140452,
        AUTHOR = "Song, Z.K. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.",
        TITLE = "Transformer Tracking with Cyclic Shifting Window Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8781-8790",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136456"}

@inproceedings{bb140453,
        AUTHOR = "Yang, C.L. and Wang, Y.L. and Zhang, J.M. and Zhang, H. and Wei, Z.J. and Lin, Z. and Yuille, A.L.",
        TITLE = "Lite Vision Transformer with Enhanced Self-Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "11988-11998",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136457"}

@inproceedings{bb140454,
        AUTHOR = "Xia, Z.F. and Pan, X. and Song, S. and Li, L.E. and Huang, G.",
        TITLE = "Vision Transformer with Deformable Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "4784-4793",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136458"}

@inproceedings{bb140455,
        AUTHOR = "Yu, T. and Khalitov, R. and Cheng, L. and Yang, Z.R.",
        TITLE = "Paramixer: Parameterizing Mixing Links in Sparse Factors Works Better
than Dot-Product Self-Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "681-690",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136459"}

@inproceedings{bb140456,
        AUTHOR = "Cheng, B. and Misra, I. and Schwing, A.G. and Kirillov, A. and Girdhar, R.",
        TITLE = "Masked-attention Mask Transformer for Universal Image Segmentation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "1280-1289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136460"}

@inproceedings{bb140457,
        AUTHOR = "Rangrej, S.B. and Srinidhi, C.L. and Clark, J.J.",
        TITLE = "Consistency driven Sequential Transformers Attention Model for
Partially Observable Scenes",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2508-2517",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136461"}

@inproceedings{bb140458,
        AUTHOR = "Chen, C.F.R. and Fan, Q.F. and Panda, R.",
        TITLE = "CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image
Classification",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "347-356",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136462"}

@inproceedings{bb140459,
        AUTHOR = "Chefer, H. and Gur, S. and Wolf, L.B.",
        TITLE = "Generic Attention-model Explainability for Interpreting Bi-Modal and
Encoder-Decoder Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "387-396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136463"}

@inproceedings{bb140460,
        AUTHOR = "Xu, W.J. and Xu, Y.F. and Chang, T. and Tu, Z.W.",
        TITLE = "Co-Scale Conv-Attentional Image Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9961-9970",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136464"}

@inproceedings{bb140461,
        AUTHOR = "Yang, G.L. and Tang, H. and Ding, M.L. and Sebe, N. and Ricci, E.",
        TITLE = "Transformer-Based Attention Networks for Continuous Pixel-Wise
Prediction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "16249-16259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136465"}

@inproceedings{bb140462,
        AUTHOR = "Kim, K. and Wu, B.C. and Dai, X.L. and Zhang, P.Z. and Yan, Z.C. and Vajda, P. and Kim, S.",
        TITLE = "Rethinking the Self-Attention in Vision Transformers",
        BOOKTITLE = ECV21,
        YEAR = "2021",
        PAGES = "3065-3069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136466"}

@article{bb140463,
        AUTHOR = "Selva, J. and Johansen, A.S. and Escalera, S. and Nasrollahi, K. and Moeslund, T.B. and Clapes, A.",
        TITLE = "Video Transformers: A Survey",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12922-12943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136467"}

@article{bb140464,
        AUTHOR = "Zhang, Z.C. and Chen, Z.D. and Wang, Y.X. and Luo, X. and Xu, X.S.",
        TITLE = "A vision transformer for fine-grained classification by reducing
noise and enhancing discriminative information",
        JOURNAL = PR,
        VOLUME = "145",
        YEAR = "2024",
        PAGES = "109979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136468"}

@article{bb140465,
        AUTHOR = "Xian, K. and Peng, J. and Cao, Z.G. and Zhang, J.M. and Lin, G.S.",
        TITLE = "ViTA: Video Transformer Adaptor for Robust Video Depth Estimation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3302-3316",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136469"}

@inproceedings{bb140466,
        AUTHOR = "Piergiovanni, A. and Kuo, W.C. and Angelova, A.",
        TITLE = "Rethinking Video ViTs: Sparse Video Tubes for Joint Image and Video
Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2214-2224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136470"}

@inproceedings{bb140467,
        AUTHOR = "Park, J. and Lee, J.Y. and Sohn, K.H.",
        TITLE = "Dual-Path Adaptation from Image to Video Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2203-2213",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136471"}

@inproceedings{bb140468,
        AUTHOR = "Karim, R. and Zhao, H. and Wildes, R.P. and Siam, M.",
        TITLE = "MED-VT: Multiscale Encoder-Decoder Video Transformer with Application
to Object Segmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6323-6333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136472"}

@inproceedings{bb140469,
        AUTHOR = "Yu, L.J. and Cheng, Y. and Sohn, K. and Lezama, J. and Zhang, H. and Chang, H. and Hauptmann, A.G. and Yang, M.H. and Hao, Y. and Essa, I. and Jiang, L.",
        TITLE = "MAGVIT: Masked Generative Video Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10459-10469",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136473"}

@inproceedings{bb140470,
        AUTHOR = "Xing, Z. and Dai, Q. and Hu, H. and Chen, J.J. and Wu, Z. and Jiang, Y.G.",
        TITLE = "SVFormer: Semi-supervised Video Transformer for Action Recognition",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18816-18826",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136474"}

@inproceedings{bb140471,
        AUTHOR = "Xie, F. and Chu, L. and Li, J.H. and Lu, Y. and Ma, C.",
        TITLE = "VideoTrack: Learning to Track Objects via Video Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22826-22835",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136475"}

@inproceedings{bb140472,
        AUTHOR = "Qiu, Z.W. and Yang, Q.S. and Wang, J. and Feng, H.C. and Han, J.Y. and Ding, E. and Xu, C. and Fu, D.M. and Wang, J.D.",
        TITLE = "PSVT: End-to-End Multi-Person 3D Pose and Shape Estimation with
Progressive Video Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "21254-21263",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136476"}

@inproceedings{bb140473,
        AUTHOR = "Yang, J. and Chen, J.W. and Yanai, K.",
        TITLE = "Transformer-based Cross-modal Recipe Embeddings with Large Batch
Training",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "II: 471-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136477"}

@inproceedings{bb140474,
        AUTHOR = "Huang, K.W. and Chen, G.C.F. and Chang, P.W. and Lin, S.C. and Hsu, C. and Thengane, V. and Lin, J.Y.Y.",
        TITLE = "Strong Gravitational Lensing Parameter Estimation with Vision
Transformer",
        BOOKTITLE = AI4Space22,
        YEAR = "2022",
        PAGES = "143-153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136478"}

@inproceedings{bb140475,
        AUTHOR = "Zheng, M. and Luo, J.P.",
        TITLE = "Space-time Video Super-resolution 3d Transformer",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "II: 374-385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136479"}

@inproceedings{bb140476,
        AUTHOR = "Ye, X. and Bilodeau, G.A.",
        TITLE = "VPTR: Efficient Transformers for Video Prediction",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "3492-3499",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136480"}

@inproceedings{bb140477,
        AUTHOR = "Liang, Y.X. and Zhou, P. and Zimmermann, R. and Yan, S.C.",
        TITLE = "DualFormer:
Local-Global Stratified Transformer for Efficient Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:577-595",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136481"}

@inproceedings{bb140478,
        AUTHOR = "Wang, J. and Yang, X.T. and Li, H.D. and Liu, L. and Wu, Z.X. and Jiang, Y.G.",
        TITLE = "Efficient Video Transformers with Spatial-Temporal Token Selection",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:69-86",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136482"}

@inproceedings{bb140479,
        AUTHOR = "Yuan, J. and Barmpoutis, P. and Stathaki, T.",
        TITLE = "Multi-Scale Deformable Transformer Encoder Based Single-Stage
Pedestrian Detection",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2906-2910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136483"}

@inproceedings{bb140480,
        AUTHOR = "Yun, H. and Lee, S. and Kim, G.",
        TITLE = "Panoramic Vision Transformer for Saliency Detection in 360° Videos",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:422-439",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136484"}

@inproceedings{bb140481,
        AUTHOR = "Sun, G.X. and Hua, Y. and Hu, G.S. and Robertson, N.",
        TITLE = "TDViT: Temporal Dilated Video Transformer for Dense Video Tasks",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:285-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136485"}

@inproceedings{bb140482,
        AUTHOR = "Wang, Y.H. and Zhang, J.C. and Li, Z.G. and Zeng, X. and Zhang, Z. and Zhang, D. and Long, Y. and Wang, N.",
        TITLE = "Neural Network-based In-Loop Filter for CLIC 2022",
        BOOKTITLE = CLIC22,
        YEAR = "2022",
        PAGES = "1773-1776",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136486"}

@inproceedings{bb140483,
        AUTHOR = "Chang, H.W. and Zhang, H. and Jiang, L. and Liu, C. and Freeman, W.T.",
        TITLE = "MaskGIT: Masked Generative Image Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "11305-11315",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136487"}

@inproceedings{bb140484,
        AUTHOR = "Herzig, R. and Ben Avraham, E. and Mangalam, K. and Bar, A. and Chechik, G. and Rohrbach, A. and Darrell, T.J. and Globerson, A.",
        TITLE = "Object-Region Video Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3138-3149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136488"}

@inproceedings{bb140485,
        AUTHOR = "Wang, R. and Chen, D.D. and Wu, Z. and Chen, Y.P. and Dai, X. and Liu, M.C. and Jiang, Y.G. and Zhou, L. and Yuan, L.",
        TITLE = "BEVT: BERT Pretraining of Video Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14713-14723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136489"}

@inproceedings{bb140486,
        AUTHOR = "Wu, C.Y. and Li, Y. and Mangalam, K. and Fan, H.Q. and Xiong, B. and Malik, J. and Feichtenhofer, C.",
        TITLE = "MeMViT: Memory-Augmented Multiscale Vision Transformer for Efficient
Long-Term Video Recognition",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "13577-13587",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136490"}

@inproceedings{bb140487,
        AUTHOR = "Mangalam, K. and Fan, H.Q. and Li, Y. and Wu, C.Y. and Xiong, B. and Feichtenhofer, C. and Malik, J.",
        TITLE = "Reversible Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10820-10830",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136491"}

@inproceedings{bb140488,
        AUTHOR = "Li, Y. and Wu, C.Y. and Fan, H.Q. and Mangalam, K. and Xiong, B. and Malik, J. and Feichtenhofer, C.",
        TITLE = "MViTv2: Improved Multiscale Vision Transformers for Classification
and Detection",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "4794-4804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136492"}

@inproceedings{bb140489,
        AUTHOR = "Ranasinghe, K. and Naseer, M. and Khan, S. and Khan, F.S. and Ryoo, M.S.",
        TITLE = "Self-supervised Video Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2864-2874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136493"}

@inproceedings{bb140490,
        AUTHOR = "Yang, S. and Wang, X.G. and Li, Y. and Fang, Y.X. and Fang, J. and Liu, W.Y. and Zhao, X. and Shan, Y.",
        TITLE = "Temporally Efficient Vision Transformer for Video Instance
Segmentation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2875-2885",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136494"}

@inproceedings{bb140491,
        AUTHOR = "Liu, Z. and Ning, J. and Cao, Y. and Wei, Y.X. and Zhang, Z. and Lin, S. and Hu, H.",
        TITLE = "Video Swin Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3192-3201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136495"}

@inproceedings{bb140492,
        AUTHOR = "Yan, S. and Xiong, X. and Arnab, A. and Lu, Z.C. and Zhang, M. and Sun, C. and Schmid, C.",
        TITLE = "Multiview Transformers for Video Recognition",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3323-3333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136496"}

@inproceedings{bb140493,
        AUTHOR = "Shao, R.Z. and Wu, G. and Zhou, Y.M. and Fu, Y. and Fang, L. and Liu, Y.B.",
        TITLE = "LocalTrans: A Multiscale Local Transformer Network for
Cross-Resolution Homography Estimation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "14870-14879",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136497"}

@inproceedings{bb140494,
        AUTHOR = "Rombach, R. and Esser, P. and Ommer, B.",
        TITLE = "Geometry-Free View Synthesis: Transformers and no 3D Priors",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "14336-14346",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136498"}

@inproceedings{bb140495,
        AUTHOR = "Tan, J. and Tang, J.Q. and Wang, L.M. and Wu, G.S.",
        TITLE = "Relaxed Transformer Decoders for Direct Action Proposal Generation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "13506-13515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136499"}

@inproceedings{bb140496,
        AUTHOR = "Liu, S. and Fan, H.Q. and Qian, S.S. and Chen, Y. and Ding, W. and Wang, Z.Y.",
        TITLE = "HiT: Hierarchical Transformer with Momentum Contrast for Video-Text
Retrieval",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11895-11905",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136500"}

@inproceedings{bb140497,
        AUTHOR = "Truong, T.D. and Duong, C.N. and Vu, T.D. and Pham, H.A. and Raj, B. and Le, N. and Luu, K.",
        TITLE = "The Right to Talk: An Audio-Visual Transformer Approach",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1085-1094",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136501"}

@inproceedings{bb140498,
        AUTHOR = "Weng, W.M. and Zhang, Y. and Xiong, Z.W.",
        TITLE = "Event-based Video Reconstruction Using Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2543-2552",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136502"}

@inproceedings{bb140499,
        AUTHOR = "Arnab, A. and Dehghani, M. and Heigold, G. and Sun, C. and Lucic, M. and Schmid, C.",
        TITLE = "ViViT: A Video Vision Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "6816-6826",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136503"}

Last update:Apr 18, 2024 at 11:38:49