@inproceedings{bb154600,
        AUTHOR = "Wu, B. and Xu, C.F. and Dai, X.L. and Wan, A. and Zhang, P.Z. and Yan, Z.C. and Tomizuka, M. and Gonzalez, J. and Keutzer, K. and Vajda, P.",
        TITLE = "Visual Transformers: Where Do Transformers Really Belong in Vision
Models?",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "579-589",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150518"}

@inproceedings{bb154601,
        AUTHOR = "Qiu, Y. and Yamamoto, S. and Nakashima, K. and Suzuki, R. and Iwata, K. and Kataoka, H. and Satoh, Y.",
        TITLE = "Describing and Localizing Multiple Changes with Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1951-1960",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150519"}

@inproceedings{bb154602,
        AUTHOR = "Song, M. and Choi, J. and Han, B.H.",
        TITLE = "Variable-Rate Deep Image Compression through Spatially-Adaptive
Feature Transform",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2360-2369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150520"}

@inproceedings{bb154603,
        AUTHOR = "Dong, Q. and Tu, Z.W. and Liao, H.F. and Zhang, Y.T. and Mahadevan, V. and Soatto, S.",
        TITLE = "Visual Relationship Detection Using Part-and-Sum Transformers with
Composite Queries",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "3530-3539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150521"}

@inproceedings{bb154604,
        AUTHOR = "Fan, H.Q. and Xiong, B. and Mangalam, K. and Li, Y.H. and Yan, Z.C. and Malik, J. and Feichtenhofer, C.",
        TITLE = "Multiscale Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "6804-6815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150522"}

@inproceedings{bb154605,
        AUTHOR = "Mahmood, K. and Mahmood, R. and van Dijk, M.",
        TITLE = "On the Robustness of Vision Transformers to Adversarial Examples",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "7818-7827",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150523"}

@inproceedings{bb154606,
        AUTHOR = "Chen, X.L. and Xie, S. and He, K.",
        TITLE = "An Empirical Study of Training Self-Supervised Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9620-9629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150524"}

@inproceedings{bb154607,
        AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.",
        TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent
Forecasting",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9793-9803",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150525"}

@inproceedings{bb154608,
        AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.",
        TITLE = "Rethinking and Improving Relative Position Encoding for Vision
Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10013-10021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150526"}

@inproceedings{bb154609,
        AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.",
        TITLE = "Understanding Robustness of Transformers for Image Classification",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10211-10221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150527"}

@inproceedings{bb154610,
        AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.",
        TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10428-10437",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150528"}

@inproceedings{bb154611,
        AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.",
        TITLE = "Stochastic Transformer Networks with Linear Competing Units:
Application to end-to-end SL Translation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11926-11935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150529"}

@inproceedings{bb154612,
        AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.",
        TITLE = "Vision Transformers for Dense Prediction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12159-12168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150530"}

@inproceedings{bb154613,
        AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.",
        TITLE = "AutoFormer: Searching Transformers for Visual Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12250-12260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150531"}

@inproceedings{bb154614,
        AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.",
        TITLE = "Incorporating Convolution Designs into Visual Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "559-568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150532"}

@inproceedings{bb154615,
        AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L.H. and Tian, Q.",
        TITLE = "Visformer: The Vision-friendly Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "569-578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150533"}

@inproceedings{bb154616,
        AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.",
        TITLE = "Leveraging Batch Normalization for Vision Transformers",
        BOOKTITLE = NeruArch21,
        YEAR = "2021",
        PAGES = "413-422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150534"}

@inproceedings{bb154617,
        AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.",
        TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster
Inference",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12239-12249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150535"}

@inproceedings{bb154618,
        AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer",
        BOOKTITLE = WMF21,
        YEAR = "2021",
        PAGES = "1032-1041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150536"}

@inproceedings{bb154619,
        AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks",
        BOOKTITLE = WMF20,
        YEAR = "2020",
        PAGES = "2832-2840",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150537"}

@inproceedings{bb154620,
        AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.",
        TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task
Visual Representations",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "1431-1440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150538"}

@article{bb154621,
        AUTHOR = "Jiang, B. and Zhao, K.K. and Tang, J.",
        TITLE = "RGTransformer: Region-Graph Transformer for Image Representation and
Few-Shot Classification",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "792-796",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150539"}

@article{bb154622,
        AUTHOR = "Kim, B. and Kim, J. and Ye, J.C.",
        TITLE = "Task-Agnostic Vision Transformer for Distributed Learning of Image
Processing",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "203-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150540"}

@article{bb154623,
        AUTHOR = "Park, S. and Ye, J.C.",
        TITLE = "Multi-Task Distributed Learning Using Vision Transformer With Random
Patch Permutation",
        JOURNAL = MedImg,
        VOLUME = "42",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2091-2105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150541"}

@article{bb154624,
        AUTHOR = "Kim, B.J. and Choi, H. and Jang, H. and Lee, D.G. and Jeong, W. and Kim, S.W.",
        TITLE = "Improved robustness of vision transformers via prelayernorm in patch
embedding",
        JOURNAL = PR,
        VOLUME = "141",
        YEAR = "2023",
        PAGES = "109659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150542"}

@article{bb154625,
        AUTHOR = "Zhou, D. and Hou, Q. and Yang, L.J. and Jin, X.J. and Feng, J.S.",
        TITLE = "Token Selection is a Simple Booster for Vision Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12738-12746",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150543"}

@article{bb154626,
        AUTHOR = "Feng, Z.Z. and Zhang, S.L.",
        TITLE = "Efficient Vision Transformer via Token Merger",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "4156-4169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150544"}

@article{bb154627,
        AUTHOR = "Qian, S.J. and Zhu, Y. and Li, W.B. and Li, M. and Jia, J.Y.",
        TITLE = "What Makes for Good Tokenizers in Vision Transformer?",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "13011-13023",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150545"}

@article{bb154628,
        AUTHOR = "Fu, K. and Yuan, M.Z. and Liu, S.L. and Wang, M.",
        TITLE = "Boosting Point-BERT by Multi-Choice Tokens",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "438-447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150546"}

@article{bb154629,
        AUTHOR = "Yan, F.Y. and Yan, B. and Liang, W. and Pei, M.T.",
        TITLE = "Token labeling-guided multi-scale medical image classification",
        JOURNAL = PRL,
        VOLUME = "178",
        YEAR = "2024",
        PAGES = "28-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150547"}

@article{bb154630,
        AUTHOR = "Li, Y.X. and Huang, Y.W. and He, N. and Ma, K. and Zheng, Y.F.",
        TITLE = "Improving vision transformer for medical image classification via
token-wise perturbation",
        JOURNAL = JVCIR,
        VOLUME = "98",
        YEAR = "2024",
        PAGES = "104022",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150548"}

@article{bb154631,
        AUTHOR = "Kang, J.Y. and Heo, B. and Choe, J.",
        TITLE = "Improving ViT interpretability with patch-level mask prediction",
        JOURNAL = PRL,
        VOLUME = "187",
        YEAR = "2025",
        PAGES = "73-79",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150549"}

@article{bb154632,
        AUTHOR = "Arya, R.K. and Peddi, R. and Srivastava, R.",
        TITLE = "Hyperspectral image classification using hybrid convolutional-based
cross-patch retentive network",
        JOURNAL = CVIU,
        VOLUME = "257",
        YEAR = "2025",
        PAGES = "104382",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150550"}

@article{bb154633,
        AUTHOR = "Niu, Y. and Song, Z.C. and Luo, Q.Y. and Chen, G.C. and Ma, M.M. and Li, F.",
        TITLE = "ATMformer: An Adaptive Token Merging Vision Transformer for Remote
Sensing Image Scene Classification",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "4",
        PAGES = "660",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150551"}

@article{bb154634,
        AUTHOR = "Wang, Y.C. and Yang, Y.Z.",
        TITLE = "Efficient Visual Transformer by Learnable Token Merging",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "9597-9608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150552"}

@inproceedings{bb154635,
        AUTHOR = "Bergner, B. and Lippert, C. and Mahendran, A.",
        TITLE = "Token Cropr: Faster ViTs for Quite a Few Tasks",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9740-9750",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150553"}

@inproceedings{bb154636,
        AUTHOR = "Dang, C.X. and Duan, Z. and An, P. and Zhang, X.M. and Hu, X. and Ma, J.",
        TITLE = "FASTer: Focal Token Acquiring-and-Scaling Transformer for Long-term
3D Object Detection",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "17029-17038",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150554"}

@inproceedings{bb154637,
        AUTHOR = "Olszewski, J. and Rymarczyk, D. and Wojcik, P. and Pach, M. and Zielinski, B.",
        TITLE = "TORE: Token Recycling in Vision Transformers for Efficient Active
Visual Exploration",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8606-8616",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150555"}

@inproceedings{bb154638,
        AUTHOR = "Eliopoulos, N.J. and Jajal, P. and Davis, J.C. and Liu, G. and Thiravathukal, G.K. and Lu, Y.H.",
        TITLE = "Pruning One More Token is Enough: Leveraging Latency-Workload
Non-Linearities for Vision Transformers on the Edge",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "7153-7162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150556"}

@inproceedings{bb154639,
        AUTHOR = "Koner, R. and Jain, G. and Jain, P. and Tresp, V. and Paul, S.",
        TITLE = "LookupVIT: Compressing Visual Information to a Limited Number of Tokens",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXXVI: 322-337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150557"}

@inproceedings{bb154640,
        AUTHOR = "Jie, S. and Tang, Y.H. and Guo, J.Y. and Deng, Z.H. and Han, K. and Wang, Y.H.",
        TITLE = "Token Compensator: Altering Inference Cost of Vision Transformer
Without Re-tuning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVI: 76-94",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150558"}

@inproceedings{bb154641,
        AUTHOR = "Huang, W.X. and Shen, Y.H. and Xie, J. and Zhang, B.C. and He, G.Q. and Li, K. and Sun, X. and Lin, S.H.",
        TITLE = "A General and Efficient Training for Transformer via Token Expansion",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15783-15792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150559"}

@inproceedings{bb154642,
        AUTHOR = "Wu, J. and Duan, B. and Kang, W.T. and Tang, H. and Yan, Y.",
        TITLE = "Token Transformation Matters: Towards Faithful Post-Hoc Explanation
for Vision Transformer",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "10926-10935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150560"}

@inproceedings{bb154643,
        AUTHOR = "Yu, Q. and Tanaka, M. and Fujiwara, K.",
        TITLE = "Exploring Vision Transformers for 3D Human Motion-Language Models
with Motion Patches",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "937-946",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150561"}

@inproceedings{bb154644,
        AUTHOR = "Yuan, X. and Fei, H.L. and Baek, J.",
        TITLE = "Efficient Transformer Adaptation with Soft Token Merging",
        BOOKTITLE = LargeVM24,
        YEAR = "2024",
        PAGES = "3658-3668",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150562"}

@inproceedings{bb154645,
        AUTHOR = "Xu, X. and Wang, S. and Chen, Y.D. and Zheng, Y.P. and Wei, Z.W. and Liu, J.J.",
        TITLE = "GTP-ViT: Efficient Vision Transformers via Graph-based Token
Propagation",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "86-95",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150563"}

@inproceedings{bb154646,
        AUTHOR = "Ding, S.R. and Zhao, P.S. and Zhang, X.P. and Qian, R. and Xiong, H.K. and Tian, Q.",
        TITLE = "Prune Spatio-temporal Tokens by Semantic-aware Temporal Accumulation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16899-16910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150564"}

@inproceedings{bb154647,
        AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.",
        TITLE = "Improving Robustness of Vision Transformers by Reducing Sensitivity
to Patch Corruptions",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "4108-4118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150565"}

@inproceedings{bb154648,
        AUTHOR = "Xie, W. and Zhao, Z. and Li, S.Y. and Zuo, B.H. and Wang, Y.G.",
        TITLE = "Nonrigid Object Contact Estimation With Regional Unwrapping
Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "9308-9317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150566"}

@inproceedings{bb154649,
        AUTHOR = "Nalmpantis, A. and Panagiotopoulos, A. and Gkountouras, J. and Papakostas, K. and Aziz, W.",
        TITLE = "Vision DiffMask: Faithful Interpretation of Vision Transformers with
Differentiable Patch Masking",
        BOOKTITLE = XAI4CV23,
        YEAR = "2023",
        PAGES = "3756-3763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150567"}

@inproceedings{bb154650,
        AUTHOR = "Beyer, L. and Izmailov, P. and Kolesnikov, A. and Caron, M. and Kornblith, S. and Zhai, X.H. and Minderer, M. and Tschannen, M. and Alabdulmohsin, I. and Pavetic, F.",
        TITLE = "FlexiViT: One Model for All Patch Sizes",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14496-14506",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150568"}

@inproceedings{bb154651,
        AUTHOR = "Chang, S.N. and Wang, P. and Lin, M. and Wang, F. and Zhang, D.J.H. and Jin, R. and Shou, M.Z.",
        TITLE = "Making Vision Transformers Efficient from A Token Sparsification View",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6195-6205",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150569"}

@inproceedings{bb154652,
        AUTHOR = "Phan, L. and Nguyen, H.T.H. and Warrier, H. and Gupta, Y.",
        TITLE = "Patch Embedding as Local Features: Unifying Deep Local and Global
Features via Vision Transformer for Image Retrieval",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "II:204-221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150570"}

@inproceedings{bb154653,
        AUTHOR = "Liu, Y. and Matsoukas, C. and Strand, F. and Azizpour, H. and Smith, K.",
        TITLE = "PatchDropout: Economizing Vision Transformers Using Patch Dropout",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "3942-3951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150571"}

@inproceedings{bb154654,
        AUTHOR = "Havtorn, J.D. and Royer, A. and Blankevoort, T. and Bejnordi, B.E.",
        TITLE = "MSViT: Dynamic Mixed-scale Tokenization for Vision Transformers",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "838-848",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150572"}

@inproceedings{bb154655,
        AUTHOR = "Haurum, J.B. and Escalera, S. and Taylor, G.W. and Moeslund, T.B.",
        TITLE = "Which Tokens to Use? Investigating Token Reduction in Vision
Transformers",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "773-783",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150573"}

@inproceedings{bb154656,
        AUTHOR = "Ren, S. and Yang, X.Y. and Liu, S. and Wang, X.C.",
        TITLE = "SG-Former: Self-guided Transformer with Evolving Token Reallocation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5980-5991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150574"}

@inproceedings{bb154657,
        AUTHOR = "Xiao, H. and Zheng, W.Z. and Zhu, Z. and Zhou, J. and Lu, J.W.",
        TITLE = "Token-Label Alignment for Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5472-5481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150575"}

@inproceedings{bb154658,
        AUTHOR = "Popovic, N. and Paudel, D.P. and Probst, T. and Van Gool, L.J.",
        TITLE = "Token-Consistent Dropout For Calibrated Vision Transformers",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1030-1034",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150576"}

@inproceedings{bb154659,
        AUTHOR = "Wei, S.Y. and Ye, T.Z. and Zhang, S. and Tang, Y. and Liang, J.J.",
        TITLE = "Joint Token Pruning and Squeezing Towards More Aggressive Compression
of Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2092-2101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150577"}

@inproceedings{bb154660,
        AUTHOR = "Zhang, J.P. and Huang, Y.Z. and Wu, W.B. and Lyu, M.R.",
        TITLE = "Transferable Adversarial Attacks on Vision Transformers with Token
Gradient Regularization",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "16415-16424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150578"}

@inproceedings{bb154661,
        AUTHOR = "Ronen, T. and Levy, O. and Golbert, A.",
        TITLE = "Vision Transformers with Mixed-Resolution Tokenization",
        BOOKTITLE = ECV23,
        YEAR = "2023",
        PAGES = "4613-4622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150579"}

@inproceedings{bb154662,
        AUTHOR = "Lorenzana, M.B. and Engstrom, C. and Chandra, S.S.",
        TITLE = "Transformer Compressed Sensing Via Global Image Tokens",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3011-3015",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150580"}

@inproceedings{bb154663,
        AUTHOR = "Fayyaz, M. and Koohpayegani, S.A. and Jafari, F.R. and Sengupta, S. and Joze, H.R.V. and Sommerlade, E. and Pirsiavash, H. and Gall, J.",
        TITLE = "Adaptive Token Sampling for Efficient Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:396-414",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150581"}

@inproceedings{bb154664,
        AUTHOR = "Kong, Z.L. and Dong, P.Y. and Ma, X.L. and Meng, X. and Niu, W. and Sun, M.S. and Shen, X. and Yuan, G. and Ren, B. and Tang, H. and Qin, M.H. and Wang, Y.Z.",
        TITLE = "SPViT:
Enabling Faster Vision Transformers via Latency-Aware Soft Token Pruning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:620-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150582"}

@inproceedings{bb154665,
        AUTHOR = "Fang, J. and Xie, L.X. and Wang, X.G. and Zhang, X.P. and Liu, W.Y. and Tian, Q.",
        TITLE = "MSG-Transformer:
Exchanging Local Spatial Information by Manipulating Messenger Tokens",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12053-12062",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150583"}

@inproceedings{bb154666,
        AUTHOR = "Yin, H.X. and Vahdat, A. and Alvarez, J.M. and Mallya, A. and Kautz, J. and Molchanov, P.",
        TITLE = "A-ViT: Adaptive Tokens for Efficient Vision Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10799-10808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150584"}

@inproceedings{bb154667,
        AUTHOR = "Gu, J.D. and Tresp, V. and Qin, Y.",
        TITLE = "Are Vision Transformers Robust to Patch Perturbations?",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XII:404-421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150585"}

@inproceedings{bb154668,
        AUTHOR = "Li, Z.K. and Ma, L.P. and Chen, M.J. and Xiao, J.R. and Gu, Q.Y.",
        TITLE = "Patch Similarity Aware Data-Free Quantization for Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:154-170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150586"}

@inproceedings{bb154669,
        AUTHOR = "Yun, S. and Lee, H. and Kim, J. and Shin, J.",
        TITLE = "Patch-level Representation Learning for Self-supervised Vision
Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8344-8353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150587"}

@inproceedings{bb154670,
        AUTHOR = "Salman, H. and Jain, S. and Wong, E. and Madry, A.",
        TITLE = "Certified Patch Robustness via Smoothed Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15116-15126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150588"}

@inproceedings{bb154671,
        AUTHOR = "Tang, Y.H. and Han, K. and Wang, Y.H. and Xu, C. and Guo, J.Y. and Xu, C. and Tao, D.C.",
        TITLE = "Patch Slimming for Efficient Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12155-12164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150589"}

@inproceedings{bb154672,
        AUTHOR = "Chen, Z.Y. and Li, B. and Wu, S. and Xu, J.H. and Ding, S.H. and Zhang, W.Q.",
        TITLE = "Shape Matters: Deformable Patch Attack",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "IV:529-548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150590"}

@inproceedings{bb154673,
        AUTHOR = "Chen, Z.Y. and Li, B. and Xu, J.H. and Wu, S. and Ding, S.H. and Zhang, W.Q.",
        TITLE = "Towards Practical Certifiable Patch Defense with Vision Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15127-15137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150591"}

@inproceedings{bb154674,
        AUTHOR = "Yuan, L. and Chen, Y.P. and Wang, T. and Yu, W.H. and Shi, Y.J. and Jiang, Z.H. and Tay, F.E.H. and Feng, J.S. and Yan, S.C.",
        TITLE = "Tokens-to-Token ViT:
Training Vision Transformers from Scratch on ImageNet",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "538-547",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150592"}

@article{bb154675,
        AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.",
        TITLE = "Inheritance Attention Matrix-Based Universal Adversarial
Perturbations on Vision Transformers",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1923-1927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150593"}

@article{bb154676,
        AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A.Z. and Zhang, P.Q.",
        TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data
Classification",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "3095-3110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150594"}

@article{bb154677,
        AUTHOR = "Heo, J. and Wang, Y. and Park, J.",
        TITLE = "Occlusion-aware spatial attention transformer for occluded object
recognition",
        JOURNAL = PRL,
        VOLUME = "159",
        YEAR = "2022",
        PAGES = "70-76",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150595"}

@article{bb154678,
        AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.",
        TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained
visual categorization",
        JOURNAL = PR,
        VOLUME = "135",
        YEAR = "2023",
        PAGES = "109131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150596"}

@article{bb154679,
        AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.",
        TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention
and Progressive Shift",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "11120-11135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150597"}

@article{bb154680,
        AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G.L. and Liu, Y. and Li, H.S. and Qiao, Y.",
        TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual
Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12581-12600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150598"}

@article{bb154681,
        AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L.Y. and Song, M.L.",
        TITLE = "Constituent Attention for Vision Transformers",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103838",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150599"}

@article{bb154682,
        AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.",
        TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for
Hyperspectral Image Classification",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "2",
        PAGES = "288",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150600"}

@article{bb154683,
        AUTHOR = "Wang, W.X. and Chen, W. and Qiu, Q. and Chen, L. and Wu, B.X. and Lin, B.B. and He, X.F. and Liu, W.",
        TITLE = "CrossFormer++: A Versatile Vision Transformer Hinging on Cross-Scale
Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3123-3136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150601"}

@article{bb154684,
        AUTHOR = "Zhang, Q.M. and Zhang, J. and Xu, Y.F. and Tao, D.C.",
        TITLE = "Vision Transformer With Quadrangle Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3608-3624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150602"}

@article{bb154685,
        AUTHOR = "Huang, L. and Bai, X.Y. and Zeng, J. and Yu, M.Q. and Pang, W. and Wang, K.P.",
        TITLE = "FAM: Improving columnar vision transformer with feature attention
mechanism",
        JOURNAL = CVIU,
        VOLUME = "242",
        YEAR = "2024",
        PAGES = "103981",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150603"}

@article{bb154686,
        AUTHOR = "Li, M.X. and Yu, W. and Liu, Q.L. and Li, Z.L. and Li, R. and Zhong, B. and Zhang, S.P.",
        TITLE = "Hybrid Transformers With Attention-Guided Spatial Embeddings for
Makeup Transfer and Removal",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2876-2890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150604"}

@article{bb154687,
        AUTHOR = "Nie, X.S. and Jin, H.Y. and Yan, Y.F. and Chen, X. and Zhu, Z.H. and Qi, D.L.",
        TITLE = "ScopeViT: Scale-Aware Vision Transformer",
        JOURNAL = PR,
        VOLUME = "153",
        YEAR = "2024",
        PAGES = "110470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150605"}

@article{bb154688,
        AUTHOR = "Hanyu, T. and Yamazaki, K. and Tran, M. and McCann, R.A. and Liao, H.T. and Rainwater, C. and Adkins, M. and Cothren, J. and Le, N.",
        TITLE = "AerialFormer: Multi-Resolution Transformer for Aerial Image
Segmentation",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "16",
        PAGES = "2930",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150606"}

@article{bb154689,
        AUTHOR = "Wang, D.Z. and Wei, X.Y. and Chen, C.Y.",
        TITLE = "CAST: An innovative framework for Cross-dimensional Attention
Structure in Transformers",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150607"}

@article{bb154690,
        AUTHOR = "van Engelenhoven, A. and Strisciuglio, N. and Talavera, E.",
        TITLE = "CAST: Clustering self-Attention using Surrogate Tokens for efficient
transformers",
        JOURNAL = PRL,
        VOLUME = "186",
        YEAR = "2024",
        PAGES = "30-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150608"}

@article{bb154691,
        AUTHOR = "Zheng, G.Y. and Zang, B. and Yang, P.H. and Zhang, W.B. and Li, B.",
        TITLE = "FE-SKViT: A Feature-Enhanced ViT Model with Skip Attention for
Automatic Modulation Recognition",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "22",
        PAGES = "4204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150609"}

@article{bb154692,
        AUTHOR = "Lu, J.C. and Zhang, J.G. and Zhu, X.T. and Feng, J.F. and Xiang, T. and Zhang, L.",
        TITLE = "Softmax-Free Linear Transformers",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "3355-3374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150610"}

@article{bb154693,
        AUTHOR = "Li, C.H. and Zhang, C.N.",
        TITLE = "Toward a deeper understanding: RetNet viewed through Convolution",
        JOURNAL = PR,
        VOLUME = "155",
        YEAR = "2024",
        PAGES = "110625",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150611"}

@article{bb154694,
        AUTHOR = "Liao, H.X. and Li, X.S. and Qin, X. and Wang, W.J. and He, G.D. and Huang, H.J. and Guo, X. and Chun, X. and Zhang, J.Y. and Fu, Y.Q. and Qin, Z.Y.",
        TITLE = "EPSViTs: A hybrid architecture for image classification based on
parameter-shared multi-head self-attention",
        JOURNAL = IVC,
        VOLUME = "149",
        YEAR = "2024",
        PAGES = "105130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150612"}

@article{bb154695,
        AUTHOR = "Sa, J.W. and Ryu, J. and Kim, H.",
        TITLE = "ECTFormer: An efficient Conv-Transformer model design for image
recognition",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111092",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150613"}

@article{bb154696,
        AUTHOR = "Li, J.F. and Feng, M.L. and Xia, C.Y.",
        TITLE = "DBCvT: Double Branch Convolutional Transformer for Medical Image
Classification",
        JOURNAL = PRL,
        VOLUME = "186",
        YEAR = "2024",
        PAGES = "250-257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150614"}

@article{bb154697,
        AUTHOR = "Liao, Y. and Gao, Y.S. and Zhang, W.C.",
        TITLE = "Dynamic accumulated attention map for interpreting evolution of
decision-making in vision transformer",
        JOURNAL = PR,
        VOLUME = "165",
        YEAR = "2025",
        PAGES = "111607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150615"}

@article{bb154698,
        AUTHOR = "Shi, Y.L. and Sun, M.W. and Wang, Y.S. and Ma, J.H. and Chen, Z.Q.",
        TITLE = "EViT: An Eagle Vision Transformer With Bi-Fovea Self-Attention",
        JOURNAL = Cyber,
        VOLUME = "55",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1288-1300",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150616"}

@article{bb154699,
        AUTHOR = "Long, W. and Chen, Z.Y. and Li, W.T. and Zhang, Y.J. and Yao, H. and Peng, J.X. and Cui, Z.W.",
        TITLE = "Leveraging negative correlation for Full-Range Self-Attention in
Vision Transformers",
        JOURNAL = PR,
        VOLUME = "169",
        YEAR = "2026",
        PAGES = "111899",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150617"}

Last update:Nov 10, 2025 at 14:27:42