@inproceedings{bb146000, AUTHOR = "Ermolov, A. and Mirvakhabova, L. and Khrulkov, V. and Sebe, N. and Oseledets, I.", TITLE = "Hyperbolic Vision Transformers: Combining Improvements in Metric Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "7399-7409", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141932"} @inproceedings{bb146001, AUTHOR = "Zhang, C.Z. and Zhang, M.Y. and Zhang, S.H. and Jin, D.S. and Zhou, Q. and Cai, Z.A. and Zhao, H. and Liu, X.L. and Liu, Z.W.", TITLE = "Delving Deep into the Generalization of Vision Transformers under Distribution Shifts", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "7267-7276", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141933"} @inproceedings{bb146002, AUTHOR = "Hou, Z. and Yu, B. and Tao, D.C.", TITLE = "BatchFormer: Learning to Explore Sample Relationships for Robust Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "7246-7256", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141934"} @inproceedings{bb146003, AUTHOR = "Zamir, S.W. and Arora, A. and Khan, S. and Hayat, M. and Khan, F.S. and Yang, M.H.", TITLE = "Restormer: Efficient Transformer for High-Resolution Image Restoration", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5718-5729", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141935"} @inproceedings{bb146004, AUTHOR = "Lin, K. and Wang, L.J. and Liu, Z.C.", TITLE = "Mesh Graphormer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12919-12928", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141936"} @inproceedings{bb146005, AUTHOR = "Casey, E. and Perez, V. and Li, Z.R.", TITLE = "The Animation Transformer: Visual Correspondence via Segment Matching", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11303-11312", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141937"} @inproceedings{bb146006, AUTHOR = "Reizenstein, J. and Shapovalov, R. and Henzler, P. and Sbordone, L. and Labatut, P. and Novotny, D.", TITLE = "Common Objects in 3D: Large-Scale Learning and Evaluation of Real-life 3D Category Reconstruction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10881-10891", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141938"} @inproceedings{bb146007, AUTHOR = "Feng, W.X. and Wang, Y.J. and Ma, L.H. and Yuan, Y. and Zhang, C.", TITLE = "Temporal Knowledge Consistency for Unsupervised Visual Representation Learning", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10150-10160", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141939"} @inproceedings{bb146008, AUTHOR = "Wu, H.P. and Xiao, B. and Codella, N. and Liu, M.C. and Dai, X.Y. and Yuan, L. and Zhang, L.", TITLE = "CvT: Introducing Convolutions to Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "22-31", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141940"} @inproceedings{bb146009, AUTHOR = "Touvron, H. and Cord, M. and Sablayrolles, A. and Synnaeve, G. and Jegou, H.", TITLE = "Going deeper with Image Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "32-42", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141941"} @inproceedings{bb146010, AUTHOR = "Zhao, J.W. and Yan, K. and Zhao, Y.F. and Guo, X.W. and Huang, F.Y. and Li, J.", TITLE = "Transformer-based Dual Relation Graph for Multi-label Image Recognition", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "163-172", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141942"} @inproceedings{bb146011, AUTHOR = "Pan, Z.Z. and Zhuang, B. and Liu, J. and He, H.Y. and Cai, J.F.", TITLE = "Scalable Vision Transformers with Hierarchical Pooling", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "367-376", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141943"} @inproceedings{bb146012, AUTHOR = "Yuan, L. and Chen, Y.P. and Wang, T. and Yu, W.H. and Shi, Y.J. and Jiang, Z.H. and Tay, F.E.H. and Feng, J.S. and Yan, S.C.", TITLE = "Tokens-to-Token ViT: Training Vision Transformers from Scratch on ImageNet", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "538-547", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141944"} @inproceedings{bb146013, AUTHOR = "Wu, B. and Xu, C.F. and Dai, X.L. and Wan, A. and Zhang, P.Z. and Yan, Z.C. and Tomizuka, M. and Gonzalez, J. and Keutzer, K. and Vajda, P.", TITLE = "Visual Transformers: Where Do Transformers Really Belong in Vision Models?", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "579-589", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141945"} @inproceedings{bb146014, AUTHOR = "Hu, R.H. and Singh, A.", TITLE = "UniT: Multimodal Multitask Learning with a Unified Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1419-1429", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141946"} @inproceedings{bb146015, AUTHOR = "Qiu, Y. and Yamamoto, S. and Nakashima, K. and Suzuki, R. and Iwata, K. and Kataoka, H. and Satoh, Y.", TITLE = "Describing and Localizing Multiple Changes with Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1951-1960", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141947"} @inproceedings{bb146016, AUTHOR = "Song, M. and Choi, J. and Han, B.H.", TITLE = "Variable-Rate Deep Image Compression through Spatially-Adaptive Feature Transform", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2360-2369", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141948"} @inproceedings{bb146017, AUTHOR = "Shenga, H. and Cai, S. and Liu, Y. and Deng, B. and Huang, J.Q. and Hua, X.S. and Zhao, M.J.", TITLE = "Improving 3D Object Detection with Channel-wise Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2723-2732", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141949"} @inproceedings{bb146018, AUTHOR = "Zhang, P.C. and Dai, X. and Yang, J.W. and Xiao, B. and Yuan, L. and Zhang, L. and Gao, J.F.", TITLE = "Multi-Scale Vision Longformer: A New Vision Transformer for High-Resolution Image Encoding", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2978-2988", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141950"} @inproceedings{bb146019, AUTHOR = "Dong, Q. and Tu, Z.W. and Liao, H.F. and Zhang, Y.T. and Mahadevan, V. and Soatto, S.", TITLE = "Visual Relationship Detection Using Part-and-Sum Transformers with Composite Queries", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "3530-3539", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141951"} @inproceedings{bb146020, AUTHOR = "Fan, H.Q. and Xiong, B. and Mangalam, K. and Li, Y. and Yan, Z.C. and Malik, J. and Feichtenhofer, C.", TITLE = "Multiscale Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "6804-6815", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141952"} @inproceedings{bb146021, AUTHOR = "Mahmood, K. and Mahmood, R. and van Dijk, M.", TITLE = "On the Robustness of Vision Transformers to Adversarial Examples", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "7818-7827", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141953"} @inproceedings{bb146022, AUTHOR = "Chen, X.L. and Xie, S. and He, K.", TITLE = "An Empirical Study of Training Self-Supervised Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9620-9629", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141954"} @inproceedings{bb146023, AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.", TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent Forecasting", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9793-9803", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141955"} @inproceedings{bb146024, AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.", TITLE = "Rethinking and Improving Relative Position Encoding for Vision Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10013-10021", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141956"} @inproceedings{bb146025, AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.", TITLE = "Understanding Robustness of Transformers for Image Classification", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10211-10221", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141957"} @inproceedings{bb146026, AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.", TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10428-10437", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141958"} @inproceedings{bb146027, AUTHOR = "Heo, B. and Yun, S. and Han, D.Y. and Chun, S. and Choe, J. and Oh, S.J.", TITLE = "Rethinking Spatial Dimensions of Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11916-11925", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141959"} @inproceedings{bb146028, AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.", TITLE = "Stochastic Transformer Networks with Linear Competing Units: Application to end-to-end SL Translation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11926-11935", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141960"} @inproceedings{bb146029, AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.", TITLE = "Vision Transformers for Dense Prediction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12159-12168", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141961"} @inproceedings{bb146030, AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.", TITLE = "AutoFormer: Searching Transformers for Visual Recognition", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12250-12260", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141962"} @inproceedings{bb146031, AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.", TITLE = "Incorporating Convolution Designs into Visual Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "559-568", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141963"} @inproceedings{bb146032, AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L. and Tian, Q.", TITLE = "Visformer: The Vision-friendly Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "569-578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141964"} @inproceedings{bb146033, AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.", TITLE = "Leveraging Batch Normalization for Vision Transformers", BOOKTITLE = NeruArch21, YEAR = "2021", PAGES = "413-422", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141965"} @inproceedings{bb146034, AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.", TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster Inference", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12239-12249", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141966"} @inproceedings{bb146035, AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer", BOOKTITLE = WMF21, YEAR = "2021", PAGES = "1032-1041", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141967"} @inproceedings{bb146036, AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks", BOOKTITLE = WMF20, YEAR = "2020", PAGES = "2832-2840", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141968"} @inproceedings{bb146037, AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.", TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task Visual Representations", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "1431-1440", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT141969"} @article{bb146038, AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.", TITLE = "Inheritance Attention Matrix-Based Universal Adversarial Perturbations on Vision Transformers", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1923-1927", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141970"} @article{bb146039, AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A. and Zhang, P.Q.", TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data Classification", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "3095-3110", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141971"} @article{bb146040, AUTHOR = "Heo, J. and Wang, Y. and Park, J.", TITLE = "Occlusion-aware spatial attention transformer for occluded object recognition", JOURNAL = PRL, VOLUME = "159", YEAR = "2022", PAGES = "70-76", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141972"} @article{bb146041, AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.", TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained visual categorization", JOURNAL = PR, VOLUME = "135", YEAR = "2023", PAGES = "109131", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141973"} @article{bb146042, AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.", TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention and Progressive Shift", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "11120-11135", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141974"} @article{bb146043, AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G. and Liu, Y. and Li, H.S. and Qiao, Y.", TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual Recognition", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12581-12600", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141975"} @article{bb146044, AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L. and Song, M.L.", TITLE = "Constituent Attention for Vision Transformers", JOURNAL = CVIU, VOLUME = "237", YEAR = "2023", PAGES = "103838", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141976"} @article{bb146045, AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.", TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for Hyperspectral Image Classification", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "2", PAGES = "288", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141977"} @article{bb146046, AUTHOR = "Wang, W.X. and Chen, W. and Qiu, Q. and Chen, L. and Wu, B. and Lin, B.B. and He, X.F. and Liu, W.", TITLE = "CrossFormer++: A Versatile Vision Transformer Hinging on Cross-Scale Attention", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3123-3136", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141978"} @article{bb146047, AUTHOR = "Zhang, Q.M. and Zhang, J. and Xu, Y.F. and Tao, D.C.", TITLE = "Vision Transformer With Quadrangle Attention", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3608-3624", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141979"} @article{bb146048, AUTHOR = "Huang, L. and Bai, X.Y. and Zeng, J. and Yu, M.Q. and Pang, W. and Wang, K.P.", TITLE = "FAM: Improving columnar vision transformer with feature attention mechanism", JOURNAL = CVIU, VOLUME = "242", YEAR = "2024", PAGES = "103981", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141980"} @article{bb146049, AUTHOR = "Li, M.X. and Yu, W. and Liu, Q.L. and Li, Z.L. and Li, R. and Zhong, B. and Zhang, S.P.", TITLE = "Hybrid Transformers With Attention-Guided Spatial Embeddings for Makeup Transfer and Removal", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "4", MONTH = "April", PAGES = "2876-2890", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141981"} @article{bb146050, AUTHOR = "Nie, X.S. and Jin, H.Y. and Yan, Y.F. and Chen, X. and Zhu, Z.H. and Qi, D.L.", TITLE = "ScopeViT: Scale-Aware Vision Transformer", JOURNAL = PR, VOLUME = "153", YEAR = "2024", PAGES = "110470", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141982"} @article{bb146051, AUTHOR = "Hanyu, T. and Yamazaki, K. and Tran, M. and McCann, R.A. and Liao, H.T. and Rainwater, C. and Adkins, M. and Cothren, J. and Le, N.", TITLE = "AerialFormer: Multi-Resolution Transformer for Aerial Image Segmentation", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "16", PAGES = "2930", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141983"} @inproceedings{bb146052, AUTHOR = "Yang, X. and Yuan, L.Z. and Wilber, K. and Sharma, A. and Gu, X.Y. and Qiao, S.Y. and Debats, S. and Wang, H.S. and Adam, H. and Sirotenko, M. and Chen, L.C.", TITLE = "PolyMaX: General Dense Prediction with Mask Transformer", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1039-1050", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141984"} @inproceedings{bb146053, AUTHOR = "Nie, X.S. and Chen, X. and Jin, H.Y. and Zhu, Z.H. and Yan, Y.F. and Qi, D.L.", TITLE = "Triplet Attention Transformer for Spatiotemporal Predictive Learning", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "7021-7030", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141985"} @inproceedings{bb146054, AUTHOR = "Cai, H. and Li, J. and Hu, M. and Gan, C. and Han, S.", TITLE = "EfficientViT: Lightweight Multi-Scale Attention for High-Resolution Dense Prediction", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17256-17267", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141986"} @inproceedings{bb146055, AUTHOR = "Ryu, J. and Han, D.Y. and Lim, J.W.", TITLE = "Gramian Attention Heads are Strong yet Efficient Vision Learners", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5818-5828", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141987"} @inproceedings{bb146056, AUTHOR = "Xu, R.H. and Zhang, H. and Hu, W.Z. and Zhang, S.L. and Wang, X.Y.", TITLE = "ParCNetV2: Oversized Kernel with Enhanced Attention*", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5729-5739", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141988"} @inproceedings{bb146057, AUTHOR = "Zhao, B.Y. and Yu, Z. and Lan, S.Y. and Cheng, Y. and Anandkumar, A. and Lao, Y.J. and Alvarez, J.M.", TITLE = "Fully Attentional Networks with Self-emerging Token Labeling", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5562-5572", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141989"} @inproceedings{bb146058, AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.", TITLE = "Robustifying Token Attention for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17511-17522", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141990"} @inproceedings{bb146059, AUTHOR = "Zhao, Y. and Tang, H.D. and Jiang, Y.Y. and A, Y. and Wu, Q. and Wang, J.", TITLE = "Parameter-Efficient Vision Transformer with Linear Attention", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1275-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141991"} @inproceedings{bb146060, AUTHOR = "Shi, L. and Huang, H.D. and Song, B. and Tan, M. and Zhao, W.Z. and Xia, T. and Ren, P.J.", TITLE = "TAQ: Top-K Attention-Aware Quantization for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1750-1754", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141992"} @inproceedings{bb146061, AUTHOR = "Baili, N. and Frigui, H.", TITLE = "ADA-VIT: Attention-Guided Data Augmentation for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "385-389", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141993"} @inproceedings{bb146062, AUTHOR = "Ding, M.Y. and Shen, Y. and Fan, L.J. and Chen, Z.F. and Chen, Z. and Luo, P. and Tenenbaum, J. and Gan, C.", TITLE = "Visual Dependency Transformers: Dependency Tree Emerges from Reversed Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14528-14539", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141994"} @inproceedings{bb146063, AUTHOR = "Song, J.C. and Mou, C. and Wang, S.Q. and Ma, S.W. and Zhang, J.", TITLE = "Optimization-Inspired Cross-Attention Transformer for Compressive Sensing", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6174-6184", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141995"} @inproceedings{bb146064, AUTHOR = "Hassani, A. and Walton, S. and Li, J.C. and Li, S. and Shi, H.", TITLE = "Neighborhood Attention Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6185-6194", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141996"} @inproceedings{bb146065, AUTHOR = "Liu, Z.J. and Yang, X.Y. and Tang, H.T. and Yang, S. and Han, S.", TITLE = "FlatFormer: Flattened Window Attention for Efficient Point Cloud Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "1200-1211", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141997"} @inproceedings{bb146066, AUTHOR = "Pan, X. and Ye, T.Z. and Xia, Z.F. and Song, S. and Huang, G.", TITLE = "Slide-Transformer: Hierarchical Vision Transformer with Local Self-Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2082-2091", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141998"} @inproceedings{bb146067, AUTHOR = "Zhu, L. and Wang, X.J. and Ke, Z.H. and Zhang, W. and Lau, R.", TITLE = "BiFormer: Vision Transformer with Bi-Level Routing Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10323-10333", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT141999"} @inproceedings{bb146068, AUTHOR = "Long, S. and Zhao, Z. and Pi, J. and Wang, S.S. and Wang, J.D.", TITLE = "Beyond Attentive Tokens: Incorporating Token Importance and Diversity for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10334-10343", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142000"} @inproceedings{bb146069, AUTHOR = "Liu, X.Y. and Peng, H. and Zheng, N.X. and Yang, Y.Q. and Hu, H. and Yuan, Y.X.", TITLE = "EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14420-14430", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142001"} @inproceedings{bb146070, AUTHOR = "You, H.R. and Xiong, Y. and Dai, X.L. and Wu, B. and Zhang, P.Z. and Fan, H.Q. and Vajda, P. and Lin, Y.Y.C.", TITLE = "Castling-ViT: Compressing Self-Attention via Switching Towards Linear-Angular Attention at Vision Transformer Inference", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14431-14442", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142002"} @inproceedings{bb146071, AUTHOR = "Grainger, R. and Paniagua, T. and Song, X. and Cuntoor, N. and Lee, M.W. and Wu, T.F.", TITLE = "PaCa-ViT: Learning Patch-to-Cluster Attention in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18568-18578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142003"} @inproceedings{bb146072, AUTHOR = "Wei, C. and Duke, B. and Jiang, R. and Aarabi, P. and Taylor, G.W. and Shkurti, F.", TITLE = "Sparsifiner: Learning Sparse Instance-Dependent Attention for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22680-22689", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142004"} @inproceedings{bb146073, AUTHOR = "Bhattacharyya, M. and Chattopadhyay, S. and Nag, S.", TITLE = "DeCAtt: Efficient Vision Transformers with Decorrelated Attention Heads", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4695-4699", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142005"} @inproceedings{bb146074, AUTHOR = "Tatsunami, Y. and Taki, M.", TITLE = "RaftMLP: How Much Can Be Done Without Attention and with Less Spatial Locality?", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "VI:459-475", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142006"} @inproceedings{bb146075, AUTHOR = "Bolya, D. and Fu, C.Y. and Dai, X.L. and Zhang, P.Z. and Hoffman, J.", TITLE = "Hydra Attention: Efficient Attention with Many Heads", BOOKTITLE = CADK22, YEAR = "2022", PAGES = "35-49", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142007"} @inproceedings{bb146076, AUTHOR = "Chen, X.Y. and Hu, Q. and Li, K. and Zhong, C. and Wang, G.H.", TITLE = "Accumulated Trivial Attention Matters in Vision Transformers on Small Datasets", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "3973-3981", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142008"} @inproceedings{bb146077, AUTHOR = "Lan, H. and Wang, X. and Shen, H. and Liang, P. and Wei, X.", TITLE = "Couplformer: Rethinking Vision Transformer with Coupling Attention", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "6464-6473", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142009"} @inproceedings{bb146078, AUTHOR = "Debnath, B. and Po, O. and Chowdhury, F.A. and Chakradhar, S.", TITLE = "Cosine Similarity based Few-Shot Video Classifier with Attention-based Aggregation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1273-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142010"} @inproceedings{bb146079, AUTHOR = "Mari, C.R. and Gonzalez, D.V. and Bou Balust, E.", TITLE = "Multi-Scale Transformer-Based Feature Combination for Image Retrieval", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3166-3170", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142011"} @inproceedings{bb146080, AUTHOR = "Furukawa, R. and Hotta, K.", TITLE = "Local Embedding for Axial Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2586-2590", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142012"} @inproceedings{bb146081, AUTHOR = "Ding, M.Y. and Xiao, B. and Codella, N. and Luo, P. and Wang, J.D. and Yuan, L.", TITLE = "DaViT: Dual Attention Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:74-92", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142013"} @inproceedings{bb146082, AUTHOR = "Wang, P.C. and Wang, X. and Wang, F. and Lin, M. and Chang, S.N. and Li, H. and Jin, R.", TITLE = "KVT: k-NN Attention for Boosting Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:285-302", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142014"} @inproceedings{bb146083, AUTHOR = "Rao, Y.M. and Zhao, W.L. and Zhou, J. and Lu, J.W.", TITLE = "AMixer: Adaptive Weight Mixing for Self-Attention Free Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXI:50-67", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142015"} @inproceedings{bb146084, AUTHOR = "Li, A. and Jiao, J. and Li, N. and Qi, W. and Xu, W. and Pang, M.", TITLE = "Conmw Transformer: A General Vision Transformer Backbone With Merged-Window Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1551-1555", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142016"} @inproceedings{bb146085, AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.", TITLE = "VSA: Learning Varied-Size Window Attention in Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:466-483", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142017"} @inproceedings{bb146086, AUTHOR = "Mallick, R. and Benois Pineau, J. and Zemmari, A.", TITLE = "I Saw: A Self-Attention Weighted Method for Explanation of Visual Transformers", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3271-3275", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142018"} @inproceedings{bb146087, AUTHOR = "Song, Z.K. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.", TITLE = "Transformer Tracking with Cyclic Shifting Window Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8781-8790", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142019"} @inproceedings{bb146088, AUTHOR = "Yang, C.L. and Wang, Y.L. and Zhang, J.M. and Zhang, H. and Wei, Z.J. and Lin, Z. and Yuille, A.L.", TITLE = "Lite Vision Transformer with Enhanced Self-Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11988-11998", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142020"} @inproceedings{bb146089, AUTHOR = "Xia, Z.F. and Pan, X. and Song, S. and Li, L.E. and Huang, G.", TITLE = "Vision Transformer with Deformable Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "4784-4793", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142021"} @inproceedings{bb146090, AUTHOR = "Yu, T. and Khalitov, R. and Cheng, L. and Yang, Z.R.", TITLE = "Paramixer: Parameterizing Mixing Links in Sparse Factors Works Better than Dot-Product Self-Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "681-690", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142022"} @inproceedings{bb146091, AUTHOR = "Cheng, B. and Misra, I. and Schwing, A.G. and Kirillov, A. and Girdhar, R.", TITLE = "Masked-attention Mask Transformer for Universal Image Segmentation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "1280-1289", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142023"} @inproceedings{bb146092, AUTHOR = "Rangrej, S.B. and Srinidhi, C.L. and Clark, J.J.", TITLE = "Consistency driven Sequential Transformers Attention Model for Partially Observable Scenes", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "2508-2517", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142024"} @inproceedings{bb146093, AUTHOR = "Chen, C.F.R. and Fan, Q.F. and Panda, R.", TITLE = "CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image Classification", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "347-356", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142025"} @inproceedings{bb146094, AUTHOR = "Chefer, H. and Gur, S. and Wolf, L.B.", TITLE = "Generic Attention-model Explainability for Interpreting Bi-Modal and Encoder-Decoder Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "387-396", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142026"} @inproceedings{bb146095, AUTHOR = "Xu, W.J. and Xu, Y.F. and Chang, T. and Tu, Z.W.", TITLE = "Co-Scale Conv-Attentional Image Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9961-9970", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142027"} @inproceedings{bb146096, AUTHOR = "Yang, G.L. and Tang, H. and Ding, M.L. and Sebe, N. and Ricci, E.", TITLE = "Transformer-Based Attention Networks for Continuous Pixel-Wise Prediction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "16249-16259", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142028"} @inproceedings{bb146097, AUTHOR = "Kim, K. and Wu, B.C. and Dai, X.L. and Zhang, P.Z. and Yan, Z.C. and Vajda, P. and Kim, S.", TITLE = "Rethinking the Self-Attention in Vision Transformers", BOOKTITLE = ECV21, YEAR = "2021", PAGES = "3065-3069", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT142029"} @article{bb146098, AUTHOR = "Selva, J. and Johansen, A.S. and Escalera, S. and Nasrollahi, K. and Moeslund, T.B. and Clapes, A.", TITLE = "Video Transformers: A Survey", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12922-12943", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT142030"} @article{bb146099, AUTHOR = "Zhang, Z.C. and Chen, Z.D. and Wang, Y.X. and Luo, X. and Xu, X.S.", TITLE = "A vision transformer for fine-grained classification by reducing noise and enhancing discriminative information", JOURNAL = PR, VOLUME = "145", YEAR = "2024", PAGES = "109979", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT142031"}