@inproceedings{bb154100, AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.", TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent Forecasting", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9793-9803", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150022"} @inproceedings{bb154101, AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.", TITLE = "Rethinking and Improving Relative Position Encoding for Vision Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10013-10021", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150023"} @inproceedings{bb154102, AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.", TITLE = "Understanding Robustness of Transformers for Image Classification", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10211-10221", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150024"} @inproceedings{bb154103, AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.", TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10428-10437", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150025"} @inproceedings{bb154104, AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.", TITLE = "Stochastic Transformer Networks with Linear Competing Units: Application to end-to-end SL Translation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11926-11935", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150026"} @inproceedings{bb154105, AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.", TITLE = "Vision Transformers for Dense Prediction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12159-12168", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150027"} @inproceedings{bb154106, AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.", TITLE = "AutoFormer: Searching Transformers for Visual Recognition", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12250-12260", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150028"} @inproceedings{bb154107, AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.", TITLE = "Incorporating Convolution Designs into Visual Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "559-568", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150029"} @inproceedings{bb154108, AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L.H. and Tian, Q.", TITLE = "Visformer: The Vision-friendly Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "569-578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150030"} @inproceedings{bb154109, AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.", TITLE = "Leveraging Batch Normalization for Vision Transformers", BOOKTITLE = NeruArch21, YEAR = "2021", PAGES = "413-422", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150031"} @inproceedings{bb154110, AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.", TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster Inference", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12239-12249", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150032"} @inproceedings{bb154111, AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer", BOOKTITLE = WMF21, YEAR = "2021", PAGES = "1032-1041", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150033"} @inproceedings{bb154112, AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks", BOOKTITLE = WMF20, YEAR = "2020", PAGES = "2832-2840", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150034"} @inproceedings{bb154113, AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.", TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task Visual Representations", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "1431-1440", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT150035"} @article{bb154114, AUTHOR = "Kim, B. and Kim, J. and Ye, J.C.", TITLE = "Task-Agnostic Vision Transformer for Distributed Learning of Image Processing", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "203-218", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150036"} @article{bb154115, AUTHOR = "Park, S. and Ye, J.C.", TITLE = "Multi-Task Distributed Learning Using Vision Transformer With Random Patch Permutation", JOURNAL = MedImg, VOLUME = "42", YEAR = "2023", NUMBER = "7", MONTH = "July", PAGES = "2091-2105", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150037"} @article{bb154116, AUTHOR = "Kim, B.J. and Choi, H. and Jang, H. and Lee, D.G. and Jeong, W. and Kim, S.W.", TITLE = "Improved robustness of vision transformers via prelayernorm in patch embedding", JOURNAL = PR, VOLUME = "141", YEAR = "2023", PAGES = "109659", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150038"} @article{bb154117, AUTHOR = "Kang, J.Y. and Heo, B. and Choe, J.", TITLE = "Improving ViT interpretability with patch-level mask prediction", JOURNAL = PRL, VOLUME = "187", YEAR = "2025", PAGES = "73-79", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150039"} @article{bb154118, AUTHOR = "Arya, R.K. and Peddi, R. and Srivastava, R.", TITLE = "Hyperspectral image classification using hybrid convolutional-based cross-patch retentive network", JOURNAL = CVIU, VOLUME = "257", YEAR = "2025", PAGES = "104382", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150040"} @inproceedings{bb154119, AUTHOR = "Yu, Q. and Tanaka, M. and Fujiwara, K.", TITLE = "Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "937-946", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150041"} @inproceedings{bb154120, AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.", TITLE = "Improving Robustness of Vision Transformers by Reducing Sensitivity to Patch Corruptions", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "4108-4118", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150042"} @inproceedings{bb154121, AUTHOR = "Nalmpantis, A. and Panagiotopoulos, A. and Gkountouras, J. and Papakostas, K. and Aziz, W.", TITLE = "Vision DiffMask: Faithful Interpretation of Vision Transformers with Differentiable Patch Masking", BOOKTITLE = XAI4CV23, YEAR = "2023", PAGES = "3756-3763", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150043"} @inproceedings{bb154122, AUTHOR = "Beyer, L. and Izmailov, P. and Kolesnikov, A. and Caron, M. and Kornblith, S. and Zhai, X.H. and Minderer, M. and Tschannen, M. and Alabdulmohsin, I. and Pavetic, F.", TITLE = "FlexiViT: One Model for All Patch Sizes", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14496-14506", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150044"} @inproceedings{bb154123, AUTHOR = "Chang, S.N. and Wang, P. and Lin, M. and Wang, F. and Zhang, D.J.H. and Jin, R. and Shou, M.Z.", TITLE = "Making Vision Transformers Efficient from A Token Sparsification View", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6195-6205", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150045"} @inproceedings{bb154124, AUTHOR = "Phan, L. and Nguyen, H.T.H. and Warrier, H. and Gupta, Y.", TITLE = "Patch Embedding as Local Features: Unifying Deep Local and Global Features via Vision Transformer for Image Retrieval", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "II:204-221", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150046"} @inproceedings{bb154125, AUTHOR = "Liu, Y. and Matsoukas, C. and Strand, F. and Azizpour, H. and Smith, K.", TITLE = "PatchDropout: Economizing Vision Transformers Using Patch Dropout", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "3942-3951", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150047"} @inproceedings{bb154126, AUTHOR = "Gu, J.D. and Tresp, V. and Qin, Y.", TITLE = "Are Vision Transformers Robust to Patch Perturbations?", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XII:404-421", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150048"} @inproceedings{bb154127, AUTHOR = "Li, Z.K. and Ma, L.P. and Chen, M.J. and Xiao, J.R. and Gu, Q.Y.", TITLE = "Patch Similarity Aware Data-Free Quantization for Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:154-170", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150049"} @inproceedings{bb154128, AUTHOR = "Yun, S. and Lee, H. and Kim, J. and Shin, J.", TITLE = "Patch-level Representation Learning for Self-supervised Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8344-8353", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150050"} @inproceedings{bb154129, AUTHOR = "Salman, H. and Jain, S. and Wong, E. and Madry, A.", TITLE = "Certified Patch Robustness via Smoothed Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15116-15126", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150051"} @inproceedings{bb154130, AUTHOR = "Tang, Y.H. and Han, K. and Wang, Y.H. and Xu, C. and Guo, J.Y. and Xu, C. and Tao, D.C.", TITLE = "Patch Slimming for Efficient Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12155-12164", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150052"} @inproceedings{bb154131, AUTHOR = "Chen, Z.Y. and Li, B. and Wu, S. and Xu, J.H. and Ding, S.H. and Zhang, W.Q.", TITLE = "Shape Matters: Deformable Patch Attack", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IV:529-548", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150053"} @inproceedings{bb154132, AUTHOR = "Chen, Z.Y. and Li, B. and Xu, J.H. and Wu, S. and Ding, S.H. and Zhang, W.Q.", TITLE = "Towards Practical Certifiable Patch Defense with Vision Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15127-15137", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651patvit5.html#TT150054"} @article{bb154133, AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.", TITLE = "Inheritance Attention Matrix-Based Universal Adversarial Perturbations on Vision Transformers", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1923-1927", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150055"} @article{bb154134, AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A.Z. and Zhang, P.Q.", TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data Classification", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "3095-3110", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150056"} @article{bb154135, AUTHOR = "Heo, J. and Wang, Y. and Park, J.", TITLE = "Occlusion-aware spatial attention transformer for occluded object recognition", JOURNAL = PRL, VOLUME = "159", YEAR = "2022", PAGES = "70-76", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150057"} @article{bb154136, AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.", TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained visual categorization", JOURNAL = PR, VOLUME = "135", YEAR = "2023", PAGES = "109131", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150058"} @article{bb154137, AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.", TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention and Progressive Shift", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "11120-11135", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150059"} @article{bb154138, AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G.L. and Liu, Y. and Li, H.S. and Qiao, Y.", TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual Recognition", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12581-12600", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150060"} @article{bb154139, AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L.Y. and Song, M.L.", TITLE = "Constituent Attention for Vision Transformers", JOURNAL = CVIU, VOLUME = "237", YEAR = "2023", PAGES = "103838", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150061"} @article{bb154140, AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.", TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for Hyperspectral Image Classification", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "2", PAGES = "288", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150062"} @article{bb154141, AUTHOR = "Wang, W.X. and Chen, W. and Qiu, Q. and Chen, L. and Wu, B.X. and Lin, B.B. and He, X.F. and Liu, W.", TITLE = "CrossFormer++: A Versatile Vision Transformer Hinging on Cross-Scale Attention", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3123-3136", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150063"} @article{bb154142, AUTHOR = "Zhang, Q.M. and Zhang, J. and Xu, Y.F. and Tao, D.C.", TITLE = "Vision Transformer With Quadrangle Attention", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3608-3624", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150064"} @article{bb154143, AUTHOR = "Huang, L. and Bai, X.Y. and Zeng, J. and Yu, M.Q. and Pang, W. and Wang, K.P.", TITLE = "FAM: Improving columnar vision transformer with feature attention mechanism", JOURNAL = CVIU, VOLUME = "242", YEAR = "2024", PAGES = "103981", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150065"} @article{bb154144, AUTHOR = "Li, M.X. and Yu, W. and Liu, Q.L. and Li, Z.L. and Li, R. and Zhong, B. and Zhang, S.P.", TITLE = "Hybrid Transformers With Attention-Guided Spatial Embeddings for Makeup Transfer and Removal", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "4", MONTH = "April", PAGES = "2876-2890", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150066"} @article{bb154145, AUTHOR = "Nie, X.S. and Jin, H.Y. and Yan, Y.F. and Chen, X. and Zhu, Z.H. and Qi, D.L.", TITLE = "ScopeViT: Scale-Aware Vision Transformer", JOURNAL = PR, VOLUME = "153", YEAR = "2024", PAGES = "110470", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150067"} @article{bb154146, AUTHOR = "Hanyu, T. and Yamazaki, K. and Tran, M. and McCann, R.A. and Liao, H.T. and Rainwater, C. and Adkins, M. and Cothren, J. and Le, N.", TITLE = "AerialFormer: Multi-Resolution Transformer for Aerial Image Segmentation", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "16", PAGES = "2930", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150068"} @article{bb154147, AUTHOR = "Wang, D.Z. and Wei, X.Y. and Chen, C.Y.", TITLE = "CAST: An innovative framework for Cross-dimensional Attention Structure in Transformers", JOURNAL = PR, VOLUME = "159", YEAR = "2025", PAGES = "111153", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150069"} @article{bb154148, AUTHOR = "van Engelenhoven, A. and Strisciuglio, N. and Talavera, E.", TITLE = "CAST: Clustering self-Attention using Surrogate Tokens for efficient transformers", JOURNAL = PRL, VOLUME = "186", YEAR = "2024", PAGES = "30-36", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150070"} @article{bb154149, AUTHOR = "Zheng, G.Y. and Zang, B. and Yang, P.H. and Zhang, W.B. and Li, B.", TITLE = "FE-SKViT: A Feature-Enhanced ViT Model with Skip Attention for Automatic Modulation Recognition", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "22", PAGES = "4204", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150071"} @article{bb154150, AUTHOR = "Lu, J.C. and Zhang, J.G. and Zhu, X.T. and Feng, J.F. and Xiang, T. and Zhang, L.", TITLE = "Softmax-Free Linear Transformers", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "3355-3374", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150072"} @article{bb154151, AUTHOR = "Li, C.H. and Zhang, C.N.", TITLE = "Toward a deeper understanding: RetNet viewed through Convolution", JOURNAL = PR, VOLUME = "155", YEAR = "2024", PAGES = "110625", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150073"} @article{bb154152, AUTHOR = "Liao, H.X. and Li, X.S. and Qin, X. and Wang, W.J. and He, G.D. and Huang, H.J. and Guo, X. and Chun, X. and Zhang, J.Y. and Fu, Y.Q. and Qin, Z.Y.", TITLE = "EPSViTs: A hybrid architecture for image classification based on parameter-shared multi-head self-attention", JOURNAL = IVC, VOLUME = "149", YEAR = "2024", PAGES = "105130", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150074"} @article{bb154153, AUTHOR = "Sa, J.W. and Ryu, J. and Kim, H.", TITLE = "ECTFormer: An efficient Conv-Transformer model design for image recognition", JOURNAL = PR, VOLUME = "159", YEAR = "2025", PAGES = "111092", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150075"} @article{bb154154, AUTHOR = "Li, J.F. and Feng, M.L. and Xia, C.Y.", TITLE = "DBCvT: Double Branch Convolutional Transformer for Medical Image Classification", JOURNAL = PRL, VOLUME = "186", YEAR = "2024", PAGES = "250-257", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150076"} @article{bb154155, AUTHOR = "Liao, Y. and Gao, Y.S. and Zhang, W.C.", TITLE = "Dynamic accumulated attention map for interpreting evolution of decision-making in vision transformer", JOURNAL = PR, VOLUME = "165", YEAR = "2025", PAGES = "111607", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150077"} @article{bb154156, AUTHOR = "Shi, Y.L. and Sun, M.W. and Wang, Y.S. and Ma, J.H. and Chen, Z.Q.", TITLE = "EViT: An Eagle Vision Transformer With Bi-Fovea Self-Attention", JOURNAL = Cyber, VOLUME = "55", YEAR = "2025", NUMBER = "3", MONTH = "March", PAGES = "1288-1300", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150078"} @article{bb154157, AUTHOR = "Long, W. and Chen, Z.Y. and Li, W.T. and Zhang, Y.J. and Yao, H. and Peng, J.X. and Cui, Z.W.", TITLE = "Leveraging negative correlation for Full-Range Self-Attention in Vision Transformers", JOURNAL = PR, VOLUME = "169", YEAR = "2026", PAGES = "111899", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150079"} @inproceedings{bb154158, AUTHOR = "Zhu, J.C. and Chen, X.L. and He, K. and LeCun, Y. and Liu, Z.", TITLE = "Transformers without Normalization", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "14901-14911", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150080"} @inproceedings{bb154159, AUTHOR = "Peng, Z.L. and Huang, Y. and Xu, Z.Q. and Tang, F.L. and Hu, M. and Yang, X.K. and Shen, W.", TITLE = "Star with Bilinear Mapping", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "25292-25302", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150081"} @inproceedings{bb154160, AUTHOR = "Nottebaum, M. and Dunnhofer, M. and Micheloni, C.", TITLE = "LowFormer: Hardware Efficient Design for Convolutional Transformer Backbones", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "7008-7018", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150082"} @inproceedings{bb154161, AUTHOR = "Chowdhury, A.R. and Diddigi, R.B. and Prabuchandran, K.J. and Tripathi, A.M.", TITLE = "Bandit-based Attention Mechanism in Vision Transformers", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "9597-9606", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150083"} @inproceedings{bb154162, AUTHOR = "Alam, Q.M. and Tarchoun, B. and Alouani, I. and Abu Ghazaleh, N.", TITLE = "Adversarial Attention Deficit: Fooling Deformable Vision Transformers with Collaborative Adversarial Patches", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "7123-7132", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150084"} @inproceedings{bb154163, AUTHOR = "Ren, S. and Zhou, D. and He, S.F. and Feng, J.S. and Wang, X.C.", TITLE = "Shunted Self-Attention via Multi-Scale Token Aggregation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10843-10852", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150085"} @inproceedings{bb154164, AUTHOR = "Qiang, Y. and Li, C.Y. and Khanduri, P. and Zhu, D.X.", TITLE = "Fairness-aware Vision Transformer via Debiased Self-attention", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXXVII: 358-376", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150086"} @inproceedings{bb154165, AUTHOR = "Gong, H.H. and Dong, M.J. and Ma, S.Q. and Camtepe, S. and Nepal, S. and Xu, C.", TITLE = "Random Entangled Tokens for Adversarially Robust Vision Transformer", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24554-24563", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150087"} @inproceedings{bb154166, AUTHOR = "Lee, S. and Choi, J. and Kim, H.W.J.", TITLE = "Multi-Criteria Token Fusion with One-Step-Ahead Attention for Efficient Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15741-15750", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150088"} @inproceedings{bb154167, AUTHOR = "Zhang, S.X. and Liu, H.P. and Lin, S. and He, K.", TITLE = "You Only Need Less Attention at Each Stage in Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "6057-6066", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150089"} @inproceedings{bb154168, AUTHOR = "Li, L. and Wei, Z. and Dong, P. and Luo, W.H. and Xue, W. and Liu, Q.F. and Guo, Y.", TITLE = "Attnzero: Efficient Attention Discovery for Vision Transformers", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "V: 20-37", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150090"} @inproceedings{bb154169, AUTHOR = "Bao Long, N.H. and Zhang, C.Y. and Shi, Y.Z. and Hirakawa, T. and Yamashita, T. and Matsui, T. and Fujiyoshi, H.", TITLE = "Debiformer: Vision Transformer with Deformable Agent Bi-level Routing Attention", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "X: 445-462", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150091"} @inproceedings{bb154170, AUTHOR = "Yang, X. and Yuan, L.Z. and Wilber, K. and Sharma, A. and Gu, X.Y. and Qiao, S.Y. and Debats, S. and Wang, H.S. and Adam, H. and Sirotenko, M. and Chen, L.C.", TITLE = "PolyMaX: General Dense Prediction with Mask Transformer", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1039-1050", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150092"} @inproceedings{bb154171, AUTHOR = "Nie, X.S. and Chen, X. and Jin, H.Y. and Zhu, Z.H. and Yan, Y.F. and Qi, D.L.", TITLE = "Triplet Attention Transformer for Spatiotemporal Predictive Learning", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "7021-7030", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150093"} @inproceedings{bb154172, AUTHOR = "Cai, H. and Li, J. and Hu, M. and Gan, C. and Han, S.", TITLE = "EfficientViT: Lightweight Multi-Scale Attention for High-Resolution Dense Prediction", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17256-17267", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150094"} @inproceedings{bb154173, AUTHOR = "Ryu, J. and Han, D.Y. and Lim, J.W.", TITLE = "Gramian Attention Heads are Strong yet Efficient Vision Learners", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5818-5828", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150095"} @inproceedings{bb154174, AUTHOR = "Xu, R.H. and Zhang, H. and Hu, W.Z. and Zhang, S.L. and Wang, X.Y.", TITLE = "ParCNetV2: Oversized Kernel with Enhanced Attention*", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5729-5739", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150096"} @inproceedings{bb154175, AUTHOR = "Zhao, B.Y. and Yu, Z. and Lan, S.Y. and Cheng, Y.T. and Anandkumar, A. and Lao, Y.J. and Alvarez, J.M.", TITLE = "Fully Attentional Networks with Self-emerging Token Labeling", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5562-5572", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150097"} @inproceedings{bb154176, AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.", TITLE = "Robustifying Token Attention for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17511-17522", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150098"} @inproceedings{bb154177, AUTHOR = "Zhao, Y.P. and Tang, H.D. and Jiang, Y.Y. and A, Y. and Wu, Q. and Wang, J.", TITLE = "Parameter-Efficient Vision Transformer with Linear Attention", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1275-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150099"} @inproceedings{bb154178, AUTHOR = "Shi, L. and Huang, H.D. and Song, B. and Tan, M. and Zhao, W.Z. and Xia, T. and Ren, P.J.", TITLE = "TAQ: Top-K Attention-Aware Quantization for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1750-1754", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150100"} @inproceedings{bb154179, AUTHOR = "Baili, N. and Frigui, H.", TITLE = "ADA-VIT: Attention-Guided Data Augmentation for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "385-389", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150101"} @inproceedings{bb154180, AUTHOR = "Ding, M.Y. and Shen, Y.K. and Fan, L.J. and Chen, Z.F. and Chen, Z. and Luo, P. and Tenenbaum, J. and Gan, C.", TITLE = "Visual Dependency Transformers: Dependency Tree Emerges from Reversed Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14528-14539", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150102"} @inproceedings{bb154181, AUTHOR = "Song, J.C. and Mou, C. and Wang, S.Q. and Ma, S.W. and Zhang, J.", TITLE = "Optimization-Inspired Cross-Attention Transformer for Compressive Sensing", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6174-6184", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150103"} @inproceedings{bb154182, AUTHOR = "Hassani, A. and Walton, S. and Li, J.C. and Li, S. and Shi, H.", TITLE = "Neighborhood Attention Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6185-6194", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150104"} @inproceedings{bb154183, AUTHOR = "Liu, Z.J. and Yang, X.Y. and Tang, H.T. and Yang, S. and Han, S.", TITLE = "FlatFormer: Flattened Window Attention for Efficient Point Cloud Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "1200-1211", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150105"} @inproceedings{bb154184, AUTHOR = "Pan, X. and Ye, T.Z. and Xia, Z.F. and Song, S. and Huang, G.", TITLE = "Slide-Transformer: Hierarchical Vision Transformer with Local Self-Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2082-2091", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150106"} @inproceedings{bb154185, AUTHOR = "Zhu, L. and Wang, X.J. and Ke, Z.H. and Zhang, W. and Lau, R.", TITLE = "BiFormer: Vision Transformer with Bi-Level Routing Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10323-10333", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150107"} @inproceedings{bb154186, AUTHOR = "Long, S. and Zhao, Z. and Pi, J. and Wang, S.S. and Wang, J.D.", TITLE = "Beyond Attentive Tokens: Incorporating Token Importance and Diversity for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10334-10343", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150108"} @inproceedings{bb154187, AUTHOR = "Liu, X.Y. and Peng, H. and Zheng, N.X. and Yang, Y.Q. and Hu, H. and Yuan, Y.X.", TITLE = "EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14420-14430", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150109"} @inproceedings{bb154188, AUTHOR = "You, H.R. and Xiong, Y. and Dai, X.L. and Wu, B. and Zhang, P.Z. and Fan, H.Q. and Vajda, P. and Lin, Y.Y.C.", TITLE = "Castling-ViT: Compressing Self-Attention via Switching Towards Linear-Angular Attention at Vision Transformer Inference", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14431-14442", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150110"} @inproceedings{bb154189, AUTHOR = "Grainger, R. and Paniagua, T. and Song, X. and Cuntoor, N. and Lee, M.W. and Wu, T.F.", TITLE = "PaCa-ViT: Learning Patch-to-Cluster Attention in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18568-18578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150111"} @inproceedings{bb154190, AUTHOR = "Wei, C. and Duke, B. and Jiang, R. and Aarabi, P. and Taylor, G.W. and Shkurti, F.", TITLE = "Sparsifiner: Learning Sparse Instance-Dependent Attention for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22680-22689", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150112"} @inproceedings{bb154191, AUTHOR = "Bhattacharyya, M. and Chattopadhyay, S. and Nag, S.", TITLE = "DeCAtt: Efficient Vision Transformers with Decorrelated Attention Heads", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4695-4699", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150113"} @inproceedings{bb154192, AUTHOR = "Zhang, Y. and Chen, D. and Kundu, S. and Li, C.H. and Beerel, P.A.", TITLE = "SAL-ViT: Towards Latency Efficient Private Inference on ViT using Selective Attention Search with a Learnable Softmax Approximation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5093-5102", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150114"} @inproceedings{bb154193, AUTHOR = "Yeganeh, Y. and Farshad, A. and Weinberger, P. and Ahmadi, S.A. and Adeli, E. and Navab, N.", TITLE = "Transformers Pay Attention to Convolutions Leveraging Emerging Properties of ViTs by Dual Attention-Image Network", BOOKTITLE = CVAMD23, YEAR = "2023", PAGES = "2296-2307", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150115"} @inproceedings{bb154194, AUTHOR = "Zheng, J.H. and Yang, L.Q. and Li, Y.Y. and Yang, K. and Wang, Z.Y. and Zhou, J.", TITLE = "Lightweight Vision Transformer with Spatial and Channel Enhanced Self-Attention", BOOKTITLE = REDLCV23, YEAR = "2023", PAGES = "1484-1488", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150116"} @inproceedings{bb154195, AUTHOR = "Hyeon Woo, N. and Yu Ji, K. and Heo, B. and Han, D.Y. and Oh, S.J. and Oh, T.H.", TITLE = "Scratching Visual Transformer's Back with Uniform Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5784-5795", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150117"} @inproceedings{bb154196, AUTHOR = "Zhang, H.K. and Hu, W.Z. and Wang, X.Y.", TITLE = "Fcaformer: Forward Cross Attention in Hybrid Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "6037-6046", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150118"} @inproceedings{bb154197, AUTHOR = "Zeng, W.X. and Li, M. and Xiong, W.J. and Tong, T. and Lu, W.J. and Tan, J. and Wang, R.S. and Huang, R.", TITLE = "MPCViT: Searching for Accurate and Efficient MPC-Friendly Vision Transformer with Heterogeneous Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5029-5040", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150119"} @inproceedings{bb154198, AUTHOR = "Psomas, B. and Kakogeorgiou, I. and Karantzalos, K. and Avrithis, Y.", TITLE = "Keep It SimPool:Who Said Supervised Transformers Suffer from Attention Deficit?", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5327-5337", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150120"} @inproceedings{bb154199, AUTHOR = "Han, D.C. and Pan, X. and Han, Y.Z. and Song, S. and Huang, G.", TITLE = "FLatten Transformer: Vision Transformer using Focused Linear Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5938-5948", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT150121"}