@inproceedings{bb150800,
        AUTHOR = "Yu, Q.H. and Wang, H.Y. and Qiao, S.Y. and Collins, M. and Zhu, Y.K. and Adam, H. and Yuille, A.L. and Chen, L.C.",
        TITLE = "k-means Mask Transformer",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIX:288-307",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146729"}

@inproceedings{bb150801,
        AUTHOR = "Pham, K. and Kafle, K. and Lin, Z. and Ding, Z.H. and Cohen, S. and Tran, Q. and Shrivastava, A.",
        TITLE = "Improving Closed and Open-Vocabulary Attribute Prediction Using
Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXV:201-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146730"}

@inproceedings{bb150802,
        AUTHOR = "Yu, W.X. and Zhang, H. and Lan, T.X. and Hu, Y.C. and Yin, D.",
        TITLE = "CBPT: A New Backbone for Enhancing Information Transmission of Vision
Transformers",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "156-160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146731"}

@inproceedings{bb150803,
        AUTHOR = "Takeda, M. and Yanai, K.",
        TITLE = "Continual Learning in Vision Transformer",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "616-620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146732"}

@inproceedings{bb150804,
        AUTHOR = "Zhou, W.L. and Kamata, S.I. and Luo, Z.B. and Xue, X.",
        TITLE = "Rethinking Unified Spectral-Spatial-Based Hyperspectral Image
Classification Under 3D Configuration of Vision Transformer",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "711-715",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146733"}

@inproceedings{bb150805,
        AUTHOR = "Cao, Y.H. and Yu, H. and Wu, J.X.",
        TITLE = "Training Vision Transformers with only 2040 Images",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXV:220-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146734"}

@inproceedings{bb150806,
        AUTHOR = "Wang, C. and Xu, H.M. and Zhang, X. and Wang, L. and Zheng, Z.T. and Liu, H.F.",
        TITLE = "Convolutional Embedding Makes Hierarchical Vision Transformer Stronger",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XX:739-756",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146735"}

@inproceedings{bb150807,
        AUTHOR = "Wu, B.X. and Gu, J.D. and Li, Z.F. and Cai, D. and He, X.F. and Liu, W.",
        TITLE = "Towards Efficient Adversarial Training on Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIII:307-325",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146736"}

@inproceedings{bb150808,
        AUTHOR = "Zong, Z.F. and Li, K.C. and Song, G.L. and Wang, Y. and Qiao, Y. and Leng, B. and Liu, Y.",
        TITLE = "Self-slimmed Vision Transformer",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:432-448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146737"}

@inproceedings{bb150809,
        AUTHOR = "Fayyaz, M. and Koohpayegani, S.A. and Jafari, F.R. and Sengupta, S. and Joze, H.R.V. and Sommerlade, E. and Pirsiavash, H. and Gall, J.",
        TITLE = "Adaptive Token Sampling for Efficient Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:396-414",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146738"}

@inproceedings{bb150810,
        AUTHOR = "Weng, Z.J. and Yang, X.T. and Li, A. and Wu, Z.X. and Jiang, Y.G.",
        TITLE = "Semi-supervised Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXX:605-620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146739"}

@inproceedings{bb150811,
        AUTHOR = "Su, T. and Ye, S. and Song, C.Q. and Cheng, J.",
        TITLE = "Mask-Vit: an Object Mask Embedding in Vision Transformer for
Fine-Grained Visual Classification",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1626-1630",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146740"}

@inproceedings{bb150812,
        AUTHOR = "Gai, L. and Chen, W. and Gao, R. and Chen, Y.W. and Qiao, X.",
        TITLE = "Using Vision Transformers in 3-D Medical Image Classifications",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "696-700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146741"}

@inproceedings{bb150813,
        AUTHOR = "Wu, K. and Zhang, J. and Peng, H. and Liu, M.C. and Xiao, B. and Fu, J.L. and Yuan, L.",
        TITLE = "TinyViT: Fast Pretraining Distillation for Small Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXI:68-85",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146742"}

@inproceedings{bb150814,
        AUTHOR = "Gao, L. and Nie, D. and Li, B. and Ren, X.F.",
        TITLE = "Doubly-Fused ViT: Fuse Information from Vision Transformer Doubly with
Local Representation",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIII:744-761",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146743"}

@inproceedings{bb150815,
        AUTHOR = "Yao, T. and Pan, Y.W. and Li, Y. and Ngo, C.W. and Mei, T.",
        TITLE = "Wave-ViT: Unifying Wavelet and Transformers for Visual Representation
Learning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXV:328-345",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146744"}

@inproceedings{bb150816,
        AUTHOR = "Yuan, Z.H. and Xue, C.H. and Chen, Y.Q. and Wu, Q. and Sun, G.Y.",
        TITLE = "PTQ4ViT: Post-training Quantization for Vision Transformers with Twin
Uniform Quantization",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XII:191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146745"}

@inproceedings{bb150817,
        AUTHOR = "Kong, Z.L. and Dong, P.Y. and Ma, X.L. and Meng, X. and Niu, W. and Sun, M.S. and Shen, X. and Yuan, G. and Ren, B. and Tang, H. and Qin, M.H. and Wang, Y.Z.",
        TITLE = "SPViT:
Enabling Faster Vision Transformers via Latency-Aware Soft Token Pruning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:620-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146746"}

@inproceedings{bb150818,
        AUTHOR = "Pan, J.T. and Bulat, A. and Tan, F. and Zhu, X.T. and Dudziak, L. and Li, H.S. and Tzimiropoulos, G. and Martinez, B.",
        TITLE = "EdgeViTs: Competing Light-Weight CNNs on Mobile Devices with Vision
Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:294-311",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146747"}

@inproceedings{bb150819,
        AUTHOR = "Liu, Y. and Mai, S.Q. and Chen, X.N. and Hsieh, C.J. and You, Y.",
        TITLE = "Towards Efficient and Scalable Sharpness-Aware Minimization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12350-12360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146748"}

@inproceedings{bb150820,
        AUTHOR = "Ren, P.Z. and Li, C.L. and Wang, G.R. and Xiao, Y. and Du, Q. and Liang, X.D. and Chang, X.J.",
        TITLE = "Beyond Fixation: Dynamic Window Visual Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "11977-11987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146749"}

@inproceedings{bb150821,
        AUTHOR = "Fang, J. and Xie, L.X. and Wang, X.G. and Zhang, X.P. and Liu, W.Y. and Tian, Q.",
        TITLE = "MSG-Transformer:
Exchanging Local Spatial Information by Manipulating Messenger Tokens",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12053-12062",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146750"}

@inproceedings{bb150822,
        AUTHOR = "Sandler, M. and Zhmoginov, A. and Vladymyrov, M. and Jackson, A.",
        TITLE = "Fine-tuning Image Transformers using Learnable Memory",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12145-12154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146751"}

@inproceedings{bb150823,
        AUTHOR = "Yu, X. and Tang, L. and Rao, Y.M. and Huang, T.J. and Zhou, J. and Lu, J.W.",
        TITLE = "Point-BERT: Pre-training 3D Point Cloud Transformers with Masked
Point Modeling",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19291-19300",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146752"}

@inproceedings{bb150824,
        AUTHOR = "Park, C. and Jeong, Y. and Cho, M. and Park, J.",
        TITLE = "Fast Point Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16928-16937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146753"}

@inproceedings{bb150825,
        AUTHOR = "Tu, Z.Z. and Talebi, H. and Zhang, H. and Yang, F. and Milanfar, P. and Bovik, A. and Li, Y.X.",
        TITLE = "MAXIM: Multi-Axis MLP for Image Processing",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5759-5770",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146754"}

@inproceedings{bb150826,
        AUTHOR = "Hou, Z.J. and Kung, S.Y.",
        TITLE = "Multi-Dimensional Vision Transformer Compression via Dependency
Guided Gaussian Process Search",
        BOOKTITLE = EVW22,
        YEAR = "2022",
        PAGES = "3668-3677",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146755"}

@inproceedings{bb150827,
        AUTHOR = "Wang, Y.K. and Chen, X.H. and Cao, L. and Huang, W.B. and Sun, F.C. and Wang, Y.H.",
        TITLE = "Multimodal Token Fusion for Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12176-12185",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146756"}

@inproceedings{bb150828,
        AUTHOR = "Zhang, J.N. and Peng, H.W. and Wu, K. and Liu, M.C. and Xiao, B. and Fu, J.L. and Yuan, L.",
        TITLE = "MiniViT: Compressing Vision Transformers with Weight Multiplexing",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12135-12144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146757"}

@inproceedings{bb150829,
        AUTHOR = "Chen, T.L. and Zhang, Z.Y. and Cheng, Y. and Awadallah, A. and Wang, Z.Y.",
        TITLE = "The Principle of Diversity: Training Stronger Vision Transformers
Calls for Reducing All Levels of Redundancy",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12010-12020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146758"}

@inproceedings{bb150830,
        AUTHOR = "Yin, H.X. and Vahdat, A. and Alvarez, J.M. and Mallya, A. and Kautz, J. and Molchanov, P.",
        TITLE = "A-ViT: Adaptive Tokens for Efficient Vision Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10799-10808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146759"}

@inproceedings{bb150831,
        AUTHOR = "Lu, J.H. and Zhang, X.S. and Zhao, T.L. and He, X.Y. and Cheng, J.",
        TITLE = "APRIL: Finding the Achilles' Heel on Privacy for Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10041-10050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146760"}

@inproceedings{bb150832,
        AUTHOR = "Hatamizadeh, A. and Yin, H.X. and Roth, H. and Li, W.Q. and Kautz, J. and Xu, D. and Molchanov, P.",
        TITLE = "GradViT: Gradient Inversion of Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10011-10020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146761"}

@inproceedings{bb150833,
        AUTHOR = "Zhang, H.F. and Duan, J.R. and Xue, M.Q. and Song, J. and Sun, L. and Song, M.L.",
        TITLE = "Bootstrapping ViTs: Towards Liberating Vision Transformers from
Pre-training",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8934-8943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146762"}

@inproceedings{bb150834,
        AUTHOR = "Chavan, A. and Shen, Z.Q. and Liu, Z. and Liu, Z. and Cheng, K.T. and Xing, E.",
        TITLE = "Vision Transformer Slimming:
Multi-Dimension Searching in Continuous Optimization Space",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "4921-4931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146763"}

@inproceedings{bb150835,
        AUTHOR = "Chen, R.J. and Chen, C.K. and Li, Y.C. and Chen, T.Y. and Trister, A.D. and Krishnan, R.G. and Mahmood, F.",
        TITLE = "Scaling Vision Transformers to Gigapixel Images via Hierarchical
Self-Supervised Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16123-16134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146764"}

@inproceedings{bb150836,
        AUTHOR = "Zhai, X.H. and Kolesnikov, A. and Houlsby, N. and Beyer, L.",
        TITLE = "Scaling Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "1204-1213",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146765"}

@inproceedings{bb150837,
        AUTHOR = "Guo, J.Y. and Han, K. and Wu, H. and Tang, Y. and Chen, X.H. and Wang, Y.H. and Xu, C.",
        TITLE = "CMT: Convolutional Neural Networks Meet Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12165-12175",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146766"}

@inproceedings{bb150838,
        AUTHOR = "Meng, L.C. and Li, H.D. and Chen, B.C. and Lan, S.Y. and Wu, Z.X. and Jiang, Y.G. and Lim, S.N.",
        TITLE = "AdaViT: Adaptive Vision Transformers for Efficient Image Recognition",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12299-12308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146767"}

@inproceedings{bb150839,
        AUTHOR = "Herrmann, C. and Sargent, K. and Jiang, L. and Zabih, R. and Chang, H. and Liu, C. and Krishnan, D. and Sun, D.Q.",
        TITLE = "Pyramid Adversarial Training Improves ViT Performance",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "13409-13419",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146768"}

@inproceedings{bb150840,
        AUTHOR = "Li, C.L. and Zhuang, B. and Wang, G.R. and Liang, X.D. and Chang, X.J. and Yang, Y.",
        TITLE = "Automated Progressive Learning for Efficient Training of Vision
Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12476-12486",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146769"}

@inproceedings{bb150841,
        AUTHOR = "Pu, M.Y. and Huang, Y.P. and Liu, Y.M. and Guan, Q.J. and Ling, H.B.",
        TITLE = "EDTER: Edge Detection with Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "1392-1402",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146770"}

@inproceedings{bb150842,
        AUTHOR = "Zhu, R. and Li, Z.Q. and Matai, J. and Porikli, F.M. and Chandraker, M.",
        TITLE = "IRISformer: Dense Vision Transformers for Single-Image Inverse
Rendering in Indoor Scenes",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2812-2821",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146771"}

@inproceedings{bb150843,
        AUTHOR = "Ermolov, A. and Mirvakhabova, L. and Khrulkov, V. and Sebe, N. and Oseledets, I.",
        TITLE = "Hyperbolic Vision Transformers: Combining Improvements in Metric
Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7399-7409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146772"}

@inproceedings{bb150844,
        AUTHOR = "Zhang, C.Z. and Zhang, M.Y. and Zhang, S.H. and Jin, D.S. and Zhou, Q. and Cai, Z.A. and Zhao, H. and Liu, X.L. and Liu, Z.W.",
        TITLE = "Delving Deep into the Generalization of Vision Transformers under
Distribution Shifts",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7267-7276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146773"}

@inproceedings{bb150845,
        AUTHOR = "Hou, Z. and Yu, B. and Tao, D.C.",
        TITLE = "BatchFormer: Learning to Explore Sample Relationships for Robust
Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7246-7256",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146774"}

@inproceedings{bb150846,
        AUTHOR = "Zamir, S.W. and Arora, A. and Khan, S. and Hayat, M. and Khan, F.S. and Yang, M.H.",
        TITLE = "Restormer: Efficient Transformer for High-Resolution Image
Restoration",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5718-5729",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146775"}

@inproceedings{bb150847,
        AUTHOR = "Lin, K. and Wang, L.J. and Liu, Z.C.",
        TITLE = "Mesh Graphormer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12919-12928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146776"}

@inproceedings{bb150848,
        AUTHOR = "Casey, E. and Perez, V. and Li, Z.R.",
        TITLE = "The Animation Transformer: Visual Correspondence via Segment Matching",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11303-11312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146777"}

@inproceedings{bb150849,
        AUTHOR = "Reizenstein, J. and Shapovalov, R. and Henzler, P. and Sbordone, L. and Labatut, P. and Novotny, D.",
        TITLE = "Common Objects in 3D: Large-Scale Learning and Evaluation of
Real-life 3D Category Reconstruction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10881-10891",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146778"}

@inproceedings{bb150850,
        AUTHOR = "Feng, W.X. and Wang, Y.J. and Ma, L.H. and Yuan, Y. and Zhang, C.",
        TITLE = "Temporal Knowledge Consistency for Unsupervised Visual Representation
Learning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10150-10160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146779"}

@inproceedings{bb150851,
        AUTHOR = "Wu, H.P. and Xiao, B. and Codella, N. and Liu, M.C. and Dai, X.Y. and Yuan, L. and Zhang, L.",
        TITLE = "CvT: Introducing Convolutions to Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "22-31",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146780"}

@inproceedings{bb150852,
        AUTHOR = "Touvron, H. and Cord, M. and Sablayrolles, A. and Synnaeve, G. and Jegou, H.",
        TITLE = "Going deeper with Image Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "32-42",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146781"}

@inproceedings{bb150853,
        AUTHOR = "Zhao, J.W. and Yan, K. and Zhao, Y.F. and Guo, X.W. and Huang, F.Y. and Li, J.",
        TITLE = "Transformer-based Dual Relation Graph for Multi-label Image
Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "163-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146782"}

@inproceedings{bb150854,
        AUTHOR = "Pan, Z.Z. and Zhuang, B. and Liu, J. and He, H.Y. and Cai, J.F.",
        TITLE = "Scalable Vision Transformers with Hierarchical Pooling",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "367-376",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146783"}

@inproceedings{bb150855,
        AUTHOR = "Yuan, L. and Chen, Y.P. and Wang, T. and Yu, W.H. and Shi, Y.J. and Jiang, Z.H. and Tay, F.E.H. and Feng, J.S. and Yan, S.C.",
        TITLE = "Tokens-to-Token ViT:
Training Vision Transformers from Scratch on ImageNet",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "538-547",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146784"}

@inproceedings{bb150856,
        AUTHOR = "Wu, B. and Xu, C.F. and Dai, X.L. and Wan, A. and Zhang, P.Z. and Yan, Z.C. and Tomizuka, M. and Gonzalez, J. and Keutzer, K. and Vajda, P.",
        TITLE = "Visual Transformers: Where Do Transformers Really Belong in Vision
Models?",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "579-589",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146785"}

@inproceedings{bb150857,
        AUTHOR = "Hu, R.H. and Singh, A.",
        TITLE = "UniT: Multimodal Multitask Learning with a Unified Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1419-1429",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146786"}

@inproceedings{bb150858,
        AUTHOR = "Qiu, Y. and Yamamoto, S. and Nakashima, K. and Suzuki, R. and Iwata, K. and Kataoka, H. and Satoh, Y.",
        TITLE = "Describing and Localizing Multiple Changes with Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1951-1960",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146787"}

@inproceedings{bb150859,
        AUTHOR = "Song, M. and Choi, J. and Han, B.H.",
        TITLE = "Variable-Rate Deep Image Compression through Spatially-Adaptive
Feature Transform",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2360-2369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146788"}

@inproceedings{bb150860,
        AUTHOR = "Shenga, H. and Cai, S. and Liu, Y. and Deng, B. and Huang, J.Q. and Hua, X.S. and Zhao, M.J.",
        TITLE = "Improving 3D Object Detection with Channel-wise Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2723-2732",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146789"}

@inproceedings{bb150861,
        AUTHOR = "Zhang, P.C. and Dai, X. and Yang, J.W. and Xiao, B. and Yuan, L. and Zhang, L. and Gao, J.F.",
        TITLE = "Multi-Scale Vision Longformer: A New Vision Transformer for
High-Resolution Image Encoding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2978-2988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146790"}

@inproceedings{bb150862,
        AUTHOR = "Dong, Q. and Tu, Z.W. and Liao, H.F. and Zhang, Y.T. and Mahadevan, V. and Soatto, S.",
        TITLE = "Visual Relationship Detection Using Part-and-Sum Transformers with
Composite Queries",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "3530-3539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146791"}

@inproceedings{bb150863,
        AUTHOR = "Fan, H.Q. and Xiong, B. and Mangalam, K. and Li, Y. and Yan, Z.C. and Malik, J. and Feichtenhofer, C.",
        TITLE = "Multiscale Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "6804-6815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146792"}

@inproceedings{bb150864,
        AUTHOR = "Mahmood, K. and Mahmood, R. and van Dijk, M.",
        TITLE = "On the Robustness of Vision Transformers to Adversarial Examples",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "7818-7827",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146793"}

@inproceedings{bb150865,
        AUTHOR = "Chen, X.L. and Xie, S. and He, K.",
        TITLE = "An Empirical Study of Training Self-Supervised Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9620-9629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146794"}

@inproceedings{bb150866,
        AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.",
        TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent
Forecasting",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9793-9803",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146795"}

@inproceedings{bb150867,
        AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.",
        TITLE = "Rethinking and Improving Relative Position Encoding for Vision
Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10013-10021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146796"}

@inproceedings{bb150868,
        AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.",
        TITLE = "Understanding Robustness of Transformers for Image Classification",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10211-10221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146797"}

@inproceedings{bb150869,
        AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.",
        TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10428-10437",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146798"}

@inproceedings{bb150870,
        AUTHOR = "Heo, B. and Yun, S. and Han, D.Y. and Chun, S. and Choe, J. and Oh, S.J.",
        TITLE = "Rethinking Spatial Dimensions of Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11916-11925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146799"}

@inproceedings{bb150871,
        AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.",
        TITLE = "Stochastic Transformer Networks with Linear Competing Units:
Application to end-to-end SL Translation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11926-11935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146800"}

@inproceedings{bb150872,
        AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.",
        TITLE = "Vision Transformers for Dense Prediction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12159-12168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146801"}

@inproceedings{bb150873,
        AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.",
        TITLE = "AutoFormer: Searching Transformers for Visual Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12250-12260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146802"}

@inproceedings{bb150874,
        AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.",
        TITLE = "Incorporating Convolution Designs into Visual Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "559-568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146803"}

@inproceedings{bb150875,
        AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L.H. and Tian, Q.",
        TITLE = "Visformer: The Vision-friendly Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "569-578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146804"}

@inproceedings{bb150876,
        AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.",
        TITLE = "Leveraging Batch Normalization for Vision Transformers",
        BOOKTITLE = NeruArch21,
        YEAR = "2021",
        PAGES = "413-422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146805"}

@inproceedings{bb150877,
        AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.",
        TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster
Inference",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12239-12249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146806"}

@inproceedings{bb150878,
        AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer",
        BOOKTITLE = WMF21,
        YEAR = "2021",
        PAGES = "1032-1041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146807"}

@inproceedings{bb150879,
        AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks",
        BOOKTITLE = WMF20,
        YEAR = "2020",
        PAGES = "2832-2840",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146808"}

@inproceedings{bb150880,
        AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.",
        TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task
Visual Representations",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "1431-1440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT146809"}

@article{bb150881,
        AUTHOR = "Kim, B. and Kim, J. and Ye, J.C.",
        TITLE = "Task-Agnostic Vision Transformer for Distributed Learning of Image
Processing",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "203-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146810"}

@article{bb150882,
        AUTHOR = "Park, S. and Ye, J.C.",
        TITLE = "Multi-Task Distributed Learning Using Vision Transformer With Random
Patch Permutation",
        JOURNAL = MedImg,
        VOLUME = "42",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2091-2105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146811"}

@article{bb150883,
        AUTHOR = "Kim, B.J. and Choi, H. and Jang, H. and Lee, D.G. and Jeong, W. and Kim, S.W.",
        TITLE = "Improved robustness of vision transformers via prelayernorm in patch
embedding",
        JOURNAL = PR,
        VOLUME = "141",
        YEAR = "2023",
        PAGES = "109659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146812"}

@article{bb150884,
        AUTHOR = "Kang, J.Y. and Heo, B. and Choe, J.",
        TITLE = "Improving ViT interpretability with patch-level mask prediction",
        JOURNAL = PRL,
        VOLUME = "187",
        YEAR = "2025",
        PAGES = "73-79",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146813"}

@article{bb150885,
        AUTHOR = "Arya, R.K. and Peddi, R. and Srivastava, R.",
        TITLE = "Hyperspectral image classification using hybrid convolutional-based
cross-patch retentive network",
        JOURNAL = CVIU,
        VOLUME = "257",
        YEAR = "2025",
        PAGES = "104382",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146814"}

@inproceedings{bb150886,
        AUTHOR = "Yu, Q. and Tanaka, M. and Fujiwara, K.",
        TITLE = "Exploring Vision Transformers for 3D Human Motion-Language Models
with Motion Patches",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "937-946",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146815"}

@inproceedings{bb150887,
        AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.",
        TITLE = "Improving Robustness of Vision Transformers by Reducing Sensitivity
to Patch Corruptions",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "4108-4118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146816"}

@inproceedings{bb150888,
        AUTHOR = "Nalmpantis, A. and Panagiotopoulos, A. and Gkountouras, J. and Papakostas, K. and Aziz, W.",
        TITLE = "Vision DiffMask: Faithful Interpretation of Vision Transformers with
Differentiable Patch Masking",
        BOOKTITLE = XAI4CV23,
        YEAR = "2023",
        PAGES = "3756-3763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146817"}

@inproceedings{bb150889,
        AUTHOR = "Beyer, L. and Izmailov, P. and Kolesnikov, A. and Caron, M. and Kornblith, S. and Zhai, X.H. and Minderer, M. and Tschannen, M. and Alabdulmohsin, I. and Pavetic, F.",
        TITLE = "FlexiViT: One Model for All Patch Sizes",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14496-14506",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146818"}

@inproceedings{bb150890,
        AUTHOR = "Chang, S.N. and Wang, P. and Lin, M. and Wang, F. and Zhang, D.J.H. and Jin, R. and Shou, M.Z.",
        TITLE = "Making Vision Transformers Efficient from A Token Sparsification View",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6195-6205",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146819"}

@inproceedings{bb150891,
        AUTHOR = "Phan, L. and Nguyen, H.T.H. and Warrier, H. and Gupta, Y.",
        TITLE = "Patch Embedding as Local Features: Unifying Deep Local and Global
Features via Vision Transformer for Image Retrieval",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "II:204-221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146820"}

@inproceedings{bb150892,
        AUTHOR = "Liu, Y. and Matsoukas, C. and Strand, F. and Azizpour, H. and Smith, K.",
        TITLE = "PatchDropout: Economizing Vision Transformers Using Patch Dropout",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "3942-3951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146821"}

@inproceedings{bb150893,
        AUTHOR = "Gu, J.D. and Tresp, V. and Qin, Y.",
        TITLE = "Are Vision Transformers Robust to Patch Perturbations?",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XII:404-421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146822"}

@inproceedings{bb150894,
        AUTHOR = "Li, Z.K. and Ma, L.P. and Chen, M.J. and Xiao, J.R. and Gu, Q.Y.",
        TITLE = "Patch Similarity Aware Data-Free Quantization for Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XI:154-170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146823"}

@inproceedings{bb150895,
        AUTHOR = "Yun, S. and Lee, H. and Kim, J. and Shin, J.",
        TITLE = "Patch-level Representation Learning for Self-supervised Vision
Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8344-8353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146824"}

@inproceedings{bb150896,
        AUTHOR = "Salman, H. and Jain, S. and Wong, E. and Madry, A.",
        TITLE = "Certified Patch Robustness via Smoothed Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15116-15126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146825"}

@inproceedings{bb150897,
        AUTHOR = "Tang, Y. and Han, K. and Wang, Y.H. and Xu, C. and Guo, J.Y. and Xu, C. and Tao, D.C.",
        TITLE = "Patch Slimming for Efficient Vision Transformers",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12155-12164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146826"}

@inproceedings{bb150898,
        AUTHOR = "Chen, Z.Y. and Li, B. and Wu, S. and Xu, J.H. and Ding, S.H. and Zhang, W.Q.",
        TITLE = "Shape Matters: Deformable Patch Attack",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "IV:529-548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146827"}

@inproceedings{bb150899,
        AUTHOR = "Chen, Z.Y. and Li, B. and Xu, J.H. and Wu, S. and Ding, S.H. and Zhang, W.Q.",
        TITLE = "Towards Practical Certifiable Patch Defense with Vision Transformer",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15127-15137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT146828"}

Last update:Jun 10, 2025 at 21:16:26