@article{bb153800, AUTHOR = "Jiang, K. and Peng, P. and Lian, Y.Z. and Xu, W.S.", TITLE = "The encoding method of position embeddings in vision transformer", JOURNAL = JVCIR, VOLUME = "89", YEAR = "2022", PAGES = "103664", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149723"} @article{bb153801, AUTHOR = "Han, K. and Wang, Y.H. and Chen, H.T. and Chen, X.H. and Guo, J.Y. and Liu, Z.H. and Tang, Y.H. and Xiao, A. and Xu, C.J. and Xu, Y.X. and Yang, Z.H. and Zhang, Y. and Tao, D.C.", TITLE = "A Survey on Vision Transformer", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "87-110", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149724"} @article{bb153802, AUTHOR = "Hou, Q. and Jiang, Z.H. and Yuan, L. and Cheng, M.M. and Yan, S.C. and Feng, J.S.", TITLE = "Vision Permutator: A Permutable MLP-Like Architecture for Visual Recognition", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "1328-1334", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149725"} @article{bb153803, AUTHOR = "Yu, W.H. and Si, C.Y. and Zhou, P. and Luo, M. and Zhou, Y.C. and Feng, J.S. and Yan, S.C. and Wang, X.C.", TITLE = "MetaFormer Baselines for Vision", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "896-912", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149726"} @inproceedings{bb153804, AUTHOR = "Yu, W.H. and Luo, M. and Zhou, P. and Si, C.Y. and Zhou, Y.C. and Wang, X.C. and Feng, J.S. and Yan, S.C.", TITLE = "MetaFormer is Actually What You Need for Vision", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10809-10819", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149727"} @article{bb153805, AUTHOR = "Zhou, D. and Hou, Q. and Yang, L.J. and Jin, X.J. and Feng, J.S.", TITLE = "Token Selection is a Simple Booster for Vision Transformers", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12738-12746", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149728"} @article{bb153806, AUTHOR = "Yuan, L. and Hou, Q. and Jiang, Z.H. and Feng, J.S. and Yan, S.C.", TITLE = "VOLO: Vision Outlooker for Visual Recognition", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "6575-6586", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149729"} @article{bb153807, AUTHOR = "Wu, Y.H. and Liu, Y. and Zhan, X. and Cheng, M.M.", TITLE = "P2T: Pyramid Pooling Transformer for Scene Understanding", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12760-12771", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149730"} @article{bb153808, AUTHOR = "Wang, H. and Du, Y.T. and Zhang, Y.B. and Li, S. and Zhang, L.", TITLE = "One-Stage Visual Relationship Referring With Transformers and Adaptive Message Passing", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "190-202", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149731"} @article{bb153809, AUTHOR = "Kiya, H. and Iijima, R. and Maungmaung, A. and Kinoshit, Y.", TITLE = "Image and Model Transformation with Secret Key for Vision Transformer", JOURNAL = IEICE, VOLUME = "E106-D", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "2-11", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149732"} @article{bb153810, AUTHOR = "Li, Y. and Chen, K. and Sun, S.L. and He, C.", TITLE = "Multi-scale homography estimation based on dual feature aggregation transformer", JOURNAL = IET-IPR, VOLUME = "17", YEAR = "2023", NUMBER = "5", PAGES = "1403-1416", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149733"} @article{bb153811, AUTHOR = "Wang, G.Q. and Chen, H. and Chen, L. and Zhuang, Y. and Zhang, S.H. and Zhang, T. and Dong, H. and Gao, P.", TITLE = "P2FEViT: Plug-and-Play CNN Feature Embedded Hybrid Vision Transformer for Remote Sensing Image Classification", JOURNAL = RS, VOLUME = "15", YEAR = "2023", NUMBER = "7", PAGES = "1773", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149734"} @article{bb153812, AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.", TITLE = "ViTAEv2: Vision Transformer Advanced by Exploring Inductive Bias for Image Recognition and Beyond", JOURNAL = IJCV, VOLUME = "131", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "1141-1162", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149735"} @article{bb153813, AUTHOR = "Zhang, J.N. and Li, X.T. and Wang, Y.B. and Wang, C.J. and Yang, Y.B. and Liu, Y. and Tao, D.C.", TITLE = "EATFormer: Improving Vision Transformer Inspired by Evolutionary Algorithm", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "3509-3536", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149736"} @article{bb153814, AUTHOR = "Fan, X.Y. and Liu, H.J.", TITLE = "FlexFormer: Flexible Transformer for efficient visual recognition", JOURNAL = PRL, VOLUME = "169", YEAR = "2023", PAGES = "95-101", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149737"} @article{bb153815, AUTHOR = "Cho, S. and Hong, S. and Kim, S.", TITLE = "CATs++: Boosting Cost Aggregation With Convolutions and Transformers", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7174-7194", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149738"} @inproceedings{bb153816, AUTHOR = "Yue, X.Y. and Sun, S.Y. and Kuang, Z.H. and Wei, M. and Torr, P.H.S. and Zhang, W. and Lin, D.", TITLE = "Vision Transformer with Progressive Sampling", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "377-386", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149739"} @article{bb153817, AUTHOR = "Feng, Z.Z. and Zhang, S.L.", TITLE = "Efficient Vision Transformer via Token Merger", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "4156-4169", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149740"} @article{bb153818, AUTHOR = "Huang, X.Y. and Liu, F. and Cui, Y.H. and Chen, P. and Li, L.L. and Li, P.F.", TITLE = "Faster and Better: A Lightweight Transformer Network for Remote Sensing Scene Classification", JOURNAL = RS, VOLUME = "15", YEAR = "2023", NUMBER = "14", PAGES = "3645", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149741"} @article{bb153819, AUTHOR = "Zhao, J.X. and Jiao, L.C. and Wang, C. and Liu, X. and Liu, F. and Li, L.L. and Ma, M. and Yang, S.Y.", TITLE = "Knowledge Guided Evolutionary Transformer for Remote Sensing Scene Classification", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "10", MONTH = "October", PAGES = "10368-10384", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149742"} @article{bb153820, AUTHOR = "Zhang, D. and Ma, W.P. and Jiao, L.C. and Liu, X. and Yang, Y.T. and Liu, F.", TITLE = "Multiple Hierarchical Cross-Scale Transformer for Remote Sensing Scene Classification", JOURNAL = RS, VOLUME = "17", YEAR = "2025", NUMBER = "1", PAGES = "42", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149743"} @article{bb153821, AUTHOR = "Yao, T. and Li, Y. and Pan, Y.W. and Wang, Y. and Zhang, X.P. and Mei, T.", TITLE = "Dual Vision Transformer", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "10870-10882", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149744"} @article{bb153822, AUTHOR = "Rao, Y.M. and Liu, Z. and Zhao, W.L. and Zhou, J. and Lu, J.W.", TITLE = "Dynamic Spatial Sparsification for Efficient Vision Transformers and Convolutional Neural Networks", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "10883-10897", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149745"} @article{bb153823, AUTHOR = "Li, J. and Liu, Z. and Li, L. and Lin, J.Q. and Yao, J. and Tu, J.", TITLE = "Multi-view convolutional vision transformer for 3D object recognition", JOURNAL = JVCIR, VOLUME = "95", YEAR = "2023", PAGES = "103906", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149746"} @article{bb153824, AUTHOR = "Shang, J.H. and Li, X. and Kahatapitiya, K. and Lee, Y.C. and Ryoo, M.S.", TITLE = "StARformer: Transformer With State-Action-Reward Representations for Robot Learning", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12862-12877", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149747"} @inproceedings{bb153825, AUTHOR = "Shang, J.H. and Kahatapitiya, K. and Li, X. and Ryoo, M.S.", TITLE = "StARformer: Transformer with State-Action-Reward Representations for Visual Reinforcement Learning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:462-479", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149748"} @article{bb153826, AUTHOR = "Duan, H.R. and Long, Y. and Wang, S.D. and Zhang, H.F. and Willcocks, C.G. and Shao, L.", TITLE = "Dynamic Unary Convolution in Transformers", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12747-12759", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149749"} @article{bb153827, AUTHOR = "Qian, S.J. and Zhu, Y. and Li, W.B. and Li, M. and Jia, J.Y.", TITLE = "What Makes for Good Tokenizers in Vision Transformer?", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "13011-13023", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149750"} @article{bb153828, AUTHOR = "Sun, W.X. and Qin, Z. and Deng, H. and Wang, J.Y. and Zhang, Y. and Zhang, K. and Barnes, N. and Birchfield, S. and Kong, L.P. and Zhong, Y.R.", TITLE = "Vicinity Vision Transformer", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12635-12649", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149751"} @article{bb153829, AUTHOR = "Cao, C.J. and Dong, Q.L. and Fu, Y.W.", TITLE = "ZITS++: Image Inpainting by Improving the Incremental Transformer on Structural Priors", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12667-12684", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149752"} @article{bb153830, AUTHOR = "Fang, Y.X. and Wang, X.G. and Wu, R. and Liu, W.Y.", TITLE = "What Makes for Hierarchical Vision Transformer?", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12714-12720", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149753"} @article{bb153831, AUTHOR = "Liu, J. and Guo, H.R. and He, Y. and Li, H.L.", TITLE = "Vision Transformer-Based Ensemble Learning for Hyperspectral Image Classification", JOURNAL = RS, VOLUME = "15", YEAR = "2023", NUMBER = "21", PAGES = "5208", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149754"} @article{bb153832, AUTHOR = "Lin, M.B. and Chen, M.Z. and Zhang, Y.X. and Shen, C.H. and Ji, R.R. and Cao, L.J.", TITLE = "Super Vision Transformer", JOURNAL = IJCV, VOLUME = "131", YEAR = "2023", NUMBER = "12", MONTH = "December", PAGES = "3136-3151", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149755"} @article{bb153833, AUTHOR = "Li, Z.Y. and Gao, S.H. and Cheng, M.M.", TITLE = "SERE: Exploring Feature Self-Relation for Self-Supervised Transformer", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "12", MONTH = "December", PAGES = "15619-15631", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149756"} @article{bb153834, AUTHOR = "Yuan, Y.H. and Liang, W.C. and Ding, H.H. and Liang, Z.H. and Zhang, C. and Hu, H.", TITLE = "Expediting Large-Scale Vision Transformer for Dense Prediction Without Fine-Tuning", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "250-266", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149757"} @article{bb153835, AUTHOR = "Jiao, J. and Tang, Y.M. and Lin, K.Y. and Gao, Y.P. and Ma, A.J. and Wang, Y.W. and Zheng, W.S.", TITLE = "DilateFormer: Multi-Scale Dilated Transformer for Visual Recognition", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "8906-8919", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149758"} @article{bb153836, AUTHOR = "Fu, K. and Yuan, M.Z. and Liu, S.L. and Wang, M.", TITLE = "Boosting Point-BERT by Multi-Choice Tokens", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "438-447", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149759"} @article{bb153837, AUTHOR = "Ghosal, S.S. and Li, Y.X.", TITLE = "Are Vision Transformers Robust to Spurious Correlations?", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "3", MONTH = "March", PAGES = "689-709", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149760"} @article{bb153838, AUTHOR = "Yan, F.Y. and Yan, B. and Liang, W. and Pei, M.T.", TITLE = "Token labeling-guided multi-scale medical image classification", JOURNAL = PRL, VOLUME = "178", YEAR = "2024", PAGES = "28-34", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149761"} @article{bb153839, AUTHOR = "Li, Y.X. and Huang, Y.W. and He, N. and Ma, K. and Zheng, Y.F.", TITLE = "Improving vision transformer for medical image classification via token-wise perturbation", JOURNAL = JVCIR, VOLUME = "98", YEAR = "2024", PAGES = "104022", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149762"} @article{bb153840, AUTHOR = "Nguyen, H. and Kim, C. and Li, F.", TITLE = "Space-time recurrent memory network", JOURNAL = CVIU, VOLUME = "241", YEAR = "2024", PAGES = "103943", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149763"} @inproceedings{bb153841, AUTHOR = "Kheldouni, A. and Boumhidi, J.", TITLE = "A Study of Bidirectional Encoder Representations from Transformers for Sequential Recommendations", BOOKTITLE = ISCV22, YEAR = "2022", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149764"} @article{bb153842, AUTHOR = "Xiao, Q. and Zhang, Y. and Yang, Q.", TITLE = "Selective Random Walk for Transfer Learning in Heterogeneous Label Spaces", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "6", MONTH = "June", PAGES = "4476-4488", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149765"} @article{bb153843, AUTHOR = "Akkaya, I.B. and Kathiresan, S.S. and Arani, E. and Zonooz, B.", TITLE = "Enhancing performance of vision transformers on small datasets through local inductive bias incorporation", JOURNAL = PR, VOLUME = "153", YEAR = "2024", PAGES = "110510", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149766"} @article{bb153844, AUTHOR = "Yao, T. and Li, Y. and Pan, Y.W. and Mei, T.", TITLE = "HIRI-ViT: Scaling Vision Transformer With High Resolution Inputs", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "9", MONTH = "September", PAGES = "6431-6442", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149767"} @article{bb153845, AUTHOR = "Xu, G.Y. and Ye, J.Y. and Liu, X.Y. and Wen, X.B. and Li, Y. and Wang, J.J.", TITLE = "LV-Adapter: Adapting Vision Transformers for Visual Classification with Linear-layers and Vectors", JOURNAL = CVIU, VOLUME = "246", YEAR = "2024", PAGES = "104049", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149768"} @article{bb153846, AUTHOR = "Yan, L.Q. and Yan, R.X. and Chai, B. and Geng, G.H. and Zhou, P. and Gao, J.", TITLE = "DM-GAN: CNN hybrid vits for training GANs under limited data", JOURNAL = PR, VOLUME = "156", YEAR = "2024", PAGES = "110810", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149769"} @article{bb153847, AUTHOR = "Feng, Q.H. and Li, P.Y. and Lu, Z.X. and Li, C.Z. and Wang, Z. and Liu, Z.Q. and Duan, C.H. and Huang, F. and Weng, J. and Yu, P.S.", TITLE = "EViT: Privacy-Preserving Image Retrieval via Encrypted Vision Transformer in Cloud Computing", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "7467-7483", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149770"} @article{bb153848, AUTHOR = "Wang, H.Y. and Ma, S.M. and Dong, L. and Huang, S. and Zhang, D.D. and Wei, F.", TITLE = "DeepNet: Scaling Transformers to 1,000 Layers", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "10", MONTH = "October", PAGES = "6761-6774", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149771"} @article{bb153849, AUTHOR = "Papa, L. and Russo, P. and Amerini, I. and Zhou, L.P.", TITLE = "A Survey on Efficient Vision Transformers: Algorithms, Techniques, and Performance Benchmarking", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "7682-7700", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149772"} @article{bb153850, AUTHOR = "Hu, S.C. and Shen, L. and Zhang, Y. and Chen, Y.X. and Tao, D.C.", TITLE = "On Transforming Reinforcement Learning With Transformers: The Development Trajectory", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "8580-8599", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149773"} @article{bb153851, AUTHOR = "Xu, R.S. and Chen, C.J. and Tu, Z.Z. and Yang, M.H.", TITLE = "V2X-ViTv2: Improved Vision Transformers for Vehicle-to-Everything Cooperative Perception", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "650-662", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149774"} @inproceedings{bb153852, AUTHOR = "Xu, R.S. and Xiang, H. and Tu, Z.Z. and Xia, X. and Yang, M.H. and Ma, J.Q.", TITLE = "V2X-ViT: Vehicle-to-Everything Cooperative Perception with Vision Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:107-124", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149775"} @inproceedings{bb153853, AUTHOR = "Xiang, H. and Zheng, Z.L. and Xia, X. and Xu, R.S. and Gao, L. and Zhou, Z.W. and Han, X. and Ji, X. and Li, M.X. and Meng, Z.L. and Jin, L. and Lei, M.Y. and Ma, Z.Y. and He, Z.H. and Ma, H.X. and Yuan, Y.S. and Zhao, Y.Q. and Ma, J.Q.", TITLE = "V2X-Real: A Largs-scale Dataset for Vehicle-to-everything Cooperative Perception", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LII: 455-470", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149776"} @inproceedings{bb153854, AUTHOR = "Xiang, H. and Xu, R.S. and Ma, J.Q.", TITLE = "HM-ViT: Hetero-modal Vehicle-to-Vehicle Cooperative Perception with Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "284-295", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149777"} @article{bb153855, AUTHOR = "Ma, X. and Zhang, Z. and Yu, R. and Ji, Z. and Li, M.C. and Zhang, Y.H. and Chen, Q.", TITLE = "SAVE: Encoding spatial interactions for vision transformers", JOURNAL = IVC, VOLUME = "152", YEAR = "2024", PAGES = "105312", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149778"} @article{bb153856, AUTHOR = "Xiao, J. and Li, Z.K. and Li, J.Q. and Yang, L.W. and Gu, Q.Y.", TITLE = "BinaryViT: Toward Efficient and Accurate Binary Vision Transformers", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "1", MONTH = "January", PAGES = "195-206", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149779"} @article{bb153857, AUTHOR = "Mao, C.X. and Li, J. and Hu, T. and Zhao, X.Y.", TITLE = "CMVT: ConVit Transformer Network Recombined with Convolutional Layer", JOURNAL = IJIG, VOLUME = "25", YEAR = "2025", NUMBER = "1", MONTH = "Jan", PAGES = "2450060", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149780"} @article{bb153858, AUTHOR = "Niu, Y. and Song, Z.C. and Luo, Q.Y. and Chen, G.C. and Ma, M.M. and Li, F.", TITLE = "ATMformer: An Adaptive Token Merging Vision Transformer for Remote Sensing Image Scene Classification", JOURNAL = RS, VOLUME = "17", YEAR = "2025", NUMBER = "4", PAGES = "660", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149781"} @article{bb153859, AUTHOR = "Zhong, Y.S. and Huang, Y. and Hu, J.W. and Zhang, Y.X. and Ji, R.R.", TITLE = "Towards Accurate Post-Training Quantization of Vision Transformers via Error Reduction", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "4", MONTH = "April", PAGES = "2676-2692", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149782"} @article{bb153860, AUTHOR = "Tian, R. and Wu, Z.X. and Dai, Q. and Goldblum, M. and Hu, H. and Jiang, Y.G.", TITLE = "The Role of ViT Design and Training in Robustness to Common Corruptions", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "1374-1385", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149783"} @article{bb153861, AUTHOR = "Li, X.T. and Jiao, L.C. and Liu, F. and Yang, S.Y. and Zhu, H. and Liu, X. and Li, L.L. and Ma, W.P.", TITLE = "Adaptive Complex Wavelet Informed Transformer Operator", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "3513-3526", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149784"} @article{bb153862, AUTHOR = "Pan, Y.W. and Li, Y. and Yao, T. and Ngo, C.W. and Mei, T.", TITLE = "Stream-ViT: Learning Streamlined Convolutions in Vision Transformer", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "3755-3765", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149785"} @article{bb153863, AUTHOR = "Wen, T. and Wang, H. and Wang, L.G.", TITLE = "Dual-Branch Spatial-Spectral Transformer with Similarity Propagation for Hyperspectral Image Classification", JOURNAL = RS, VOLUME = "17", YEAR = "2025", NUMBER = "14", PAGES = "2386", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149786"} @article{bb153864, AUTHOR = "Yu, C. and Chen, T. and Gan, Z.X.", TITLE = "Taylor-Series-Expansion-Based Vision Transformer Models", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "9", MONTH = "September", PAGES = "8213-8230", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149787"} @inproceedings{bb153865, AUTHOR = "Bergner, B. and Lippert, C. and Mahendran, A.", TITLE = "Token Cropr: Faster ViTs for Quite a Few Tasks", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "9740-9750", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149788"} @article{bb153866, AUTHOR = "Kong, Z.L. and Xu, D.K. and Li, Z.G. and Dong, P.Y. and Tang, H. and Wang, Y.Z. and Mukherjee, S.", TITLE = "AutoViT: Achieving Real-Time Vision Transformers on Mobile via Latency-aware Coarse-to-Fine Search", JOURNAL = IJCV, VOLUME = "133", YEAR = "2025", NUMBER = "9", MONTH = "September", PAGES = "6170-6186", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149789"} @article{bb153867, AUTHOR = "Tan, C. and Gao, Z.Y. and Li, S.Y. and Li, S.Z.", TITLE = "SimVPv2: Towards Simple Yet Powerful Spatiotemporal Predictive Learning", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "5170-5184", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149790"} @inproceedings{bb153868, AUTHOR = "Tan, C. and Gao, Z.Y. and Wu, L.R. and Xu, Y.J. and Xia, J. and Li, S.Y. and Li, S.Z.", TITLE = "Temporal Attention Unit: Towards Efficient Spatiotemporal Predictive Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18770-18782", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149791"} @inproceedings{bb153869, AUTHOR = "He, X. and Quan, Y.H. and Xu, R. and Ji, H.", TITLE = "A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "12731-12741", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149792"} @inproceedings{bb153870, AUTHOR = "Fixelle, J.", TITLE = "Hypergraph Vision Transformers: Images are More than Nodes, More than Edges", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "9751-9761", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149793"} @inproceedings{bb153871, AUTHOR = "Zhang, W. and Zhang, B.P. and Teng, Z. and Luo, W.X. and Zou, J. and Fan, J.P.", TITLE = "Less Attention is More: Prompt Transformer for Generalized Category Discovery", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "30322-30331", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149794"} @inproceedings{bb153872, AUTHOR = "Ahmed, S. and Arafat, A.A. and Najafi, D. and Mahmood, A. and Rizve, M.N. and Nahian, M.A. and Zhou, R.Y. and Angizi, S. and Rakin, A.S.", TITLE = "DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "30147-30156", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149795"} @inproceedings{bb153873, AUTHOR = "Bouniot, Q. and Redko, I. and Mallasto, A. and Laclau, C. and Struckmeier, O. and Arndt, K. and Heinonen, M. and Kyrki, V. and Kaski, S.", TITLE = "From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "25250-25260", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149796"} @inproceedings{bb153874, AUTHOR = "Fan, Q.H. and Huang, H.B. and He, R.", TITLE = "Breaking the Low-Rank Dilemma of Linear Attention", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "25271-25280", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149797"} @inproceedings{bb153875, AUTHOR = "Wang, Z.Q. and Xia, X.B. and Chen, R. and Yu, D.D. and Wang, C.H. and Gong, M.M. and Liu, T.L.", TITLE = "LaVin-DiT: Large Vision Diffusion Transformer", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "20060-20070", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149798"} @inproceedings{bb153876, AUTHOR = "Zhou, Y. and Xu, Q.S. and Cui, J. and Zhou, J. and Zhang, J. and Hong, R.C. and Zhang, H.W.", TITLE = "CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "20135-20145", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149799"} @inproceedings{bb153877, AUTHOR = "Miao, Z.C. and Chen, W. and Qiu, Q.", TITLE = "Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "20146-20146", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149800"} @inproceedings{bb153878, AUTHOR = "Yu, H. and Jiang, T. and Jia, S. and Yan, S.N. and Liu, S.N. and Qian, H.L. and Li, G.H. and Dong, S.T. and Yuan, C.", TITLE = "ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "4508-4517", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149801"} @inproceedings{bb153879, AUTHOR = "Sun, Y.W. and Ochiai, H. and Wu, Z.R. and Lin, S. and Kanai, R.", TITLE = "Associative Transformer", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "4518-4527", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149802"} @inproceedings{bb153880, AUTHOR = "Chen, G.L. and Fu, T.W. and Chen, H.W. and Teng, W.B. and Xiao, H.Y. and Zhao, Y.J.", TITLE = "RDD: Robust Feature Detector and Descriptor using Deformable Transformer", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "6394-6403", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149803"} @inproceedings{bb153881, AUTHOR = "Chen, L.Y. and Meyer, G.P. and Zhang, Z. and Wolff, E.M. and Vernaza, P.", TITLE = "Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "6595-6604", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149804"} @inproceedings{bb153882, AUTHOR = "Dang, C.X. and Duan, Z. and An, P. and Zhang, X.M. and Hu, X. and Ma, J.", TITLE = "FASTer: Focal Token Acquiring-and-Scaling Transformer for Long-term 3D Object Detection", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "17029-17038", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149805"} @inproceedings{bb153883, AUTHOR = "Pardyl, A. and Kurzejamski, G. and Olszewski, J. and Trzcinski, T. and Zielinski, B.", TITLE = "Beyond Grids: Exploring Elastic Input Sampling for Vision Transformers", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "8536-8545", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149806"} @inproceedings{bb153884, AUTHOR = "Zhong, Y. and Zhou, Y.", TITLE = "Rethinking Low-Rank Adaptation in Vision: Exploring Head-Level Responsiveness across Diverse Tasks", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "7787-7796", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149807"} @inproceedings{bb153885, AUTHOR = "Pipoli, V. and Bolelli, F. and Sarto, S. and Cornia, M. and Baraldi, L. and Grana, C. and Cucchiara, R. and Ficarra, E.", TITLE = "Semantically Conditioned Prompts for Visual Recognition Under Missing Modality Scenarios", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "4968-4977", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149808"} @inproceedings{bb153886, AUTHOR = "Go, J. and Ryu, J.", TITLE = "Channel Propagation Networks for Refreshable Vision Transformer", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "1353-1362", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149809"} @inproceedings{bb153887, AUTHOR = "Olszewski, J. and Rymarczyk, D. and Wojcik, P. and Pach, M. and Zielinski, B.", TITLE = "TORE: Token Recycling in Vision Transformers for Efficient Active Visual Exploration", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "8606-8616", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149810"} @inproceedings{bb153888, AUTHOR = "Eliopoulos, N.J. and Jajal, P. and Davis, J.C. and Liu, G. and Thiravathukal, G.K. and Lu, Y.H.", TITLE = "Pruning One More Token is Enough: Leveraging Latency-Workload Non-Linearities for Vision Transformers on the Edge", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "7153-7162", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149811"} @inproceedings{bb153889, AUTHOR = "Nauen, T.C. and Palacio, S. and Raue, F. and Dengel, A.", TITLE = "Which Transformer to Favor: A Comparative Analysis of Efficiency in Vision Transformers", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "6955-6966", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149812"} @inproceedings{bb153890, AUTHOR = "Tai, Y.S. and Wu, A.Y.A.", TITLE = "AMP-ViT: Optimizing Vision Transformer Efficiency with Adaptive Mixed-Precision Post-Training Quantization", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "6828-6837", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149813"} @inproceedings{bb153891, AUTHOR = "Chen, P.Q. and Yu, L. and Wan, Y. and Zhang, Y.J. and Wang, J. and Zhong, L. and Chen, J.D. and Yang, M.", TITLE = "Ecomatcher: Efficient Clustering Oriented Matcher for Detector-free Image Matching", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXVIII: 344-360", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149814"} @inproceedings{bb153892, AUTHOR = "Wang, H.Q. and Zhang, T. and Salzmann, M.", TITLE = "Sinder: Repairing the Singular Defects of Dinov2", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "VII: 20-35", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149815"} @inproceedings{bb153893, AUTHOR = "Suri, S. and Walmer, M. and Gupta, K. and Shrivastava, A.", TITLE = "Lift: A Surprisingly Simple Lightweight Feature Transform for Dense Vit Descriptors", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "VII: 110-128", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149816"} @inproceedings{bb153894, AUTHOR = "Pan, Z.Z. and Liu, J. and He, H.Y. and Cai, J.F. and Zhuang, B.", TITLE = "Stitched VITS are Flexible Vision Backbones", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLI: 258-274", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149817"} @inproceedings{bb153895, AUTHOR = "Kim, D.H. and Heo, B. and Han, D.Y.", TITLE = "Densenets Reloaded: Paradigm Shift Beyond Resnets and VITS", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "III: 395-415", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149818"} @inproceedings{bb153896, AUTHOR = "Zhang, C. and Cheng, J. and Li, Q.X.", TITLE = "An Optimal Control View of Lora and Binary Controller Design for Vision Transformers", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LIII: 144-160", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149819"} @inproceedings{bb153897, AUTHOR = "Koner, R. and Jain, G. and Jain, P. and Tresp, V. and Paul, S.", TITLE = "LookupVIT: Compressing Visual Information to a Limited Number of Tokens", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXXVI: 322-337", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149820"} @inproceedings{bb153898, AUTHOR = "Zhang, T. and Bai, J. and Lu, Z. and Lian, D.Z. and Wang, G. and Wang, X.C. and Xia, S.T.", TITLE = "Parameter-efficient and Memory-efficient Tuning for Vision Transformer: A Disentangled Approach", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XLV: 346-363", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149821"} @inproceedings{bb153899, AUTHOR = "Wang, H.Y. and Tang, H. and Jiang, L. and Shi, S.S. and Naeem, M.F. and Li, H.S. and Schiele, B. and Wang, L.W.", TITLE = "Git: Towards Generalist Vision Transformer Through Universal Language Interface", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXIX: 55-73", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149822"}