@article{bb141600,
        AUTHOR = "Wu, J.J. and Wei, Z.Q. and Zhang, J.P. and Zhang, Y.S. and Jia, D.N. and Yin, B. and Yu, Y.C.",
        TITLE = "Full-Coupled Convolutional Transformer for Surface-Based Duct
Refractivity Inversion",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "17",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137593"}

@article{bb141601,
        AUTHOR = "Jiang, K. and Peng, P. and Lian, Y. and Xu, W.S.",
        TITLE = "The encoding method of position embeddings in vision transformer",
        JOURNAL = JVCIR,
        VOLUME = "89",
        YEAR = "2022",
        PAGES = "103664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137594"}

@article{bb141602,
        AUTHOR = "Han, K. and Wang, Y.H. and Chen, H.T. and Chen, X.H. and Guo, J.Y. and Liu, Z.H. and Tang, Y. and Xiao, A. and Xu, C.J. and Xu, Y.X. and Yang, Z.H. and Zhang, Y. and Tao, D.C.",
        TITLE = "A Survey on Vision Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "87-110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137595"}

@article{bb141603,
        AUTHOR = "Hou, Q. and Jiang, Z.H. and Yuan, L. and Cheng, M.M. and Yan, S.C. and Feng, J.S.",
        TITLE = "Vision Permutator:
A Permutable MLP-Like Architecture for Visual Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1328-1334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137596"}

@article{bb141604,
        AUTHOR = "Yu, W.H. and Si, C.Y. and Zhou, P. and Luo, M. and Zhou, Y.C. and Feng, J.S. and Yan, S.C. and Wang, X.C.",
        TITLE = "MetaFormer Baselines for Vision",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "896-912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137597"}

@inproceedings{bb141605,
        AUTHOR = "Yu, W.H. and Luo, M. and Zhou, P. and Si, C.Y. and Zhou, Y.C. and Wang, X.C. and Feng, J.S. and Yan, S.C.",
        TITLE = "MetaFormer is Actually What You Need for Vision",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10809-10819",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137598"}

@article{bb141606,
        AUTHOR = "Zhou, D. and Hou, Q. and Yang, L.J. and Jin, X.J. and Feng, J.S.",
        TITLE = "Token Selection is a Simple Booster for Vision Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12738-12746",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137599"}

@article{bb141607,
        AUTHOR = "Yuan, L. and Hou, Q. and Jiang, Z.H. and Feng, J.S. and Yan, S.C.",
        TITLE = "VOLO: Vision Outlooker for Visual Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "6575-6586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137600"}

@inproceedings{bb141608,
        AUTHOR = "Ren, S. and Zhou, D. and He, S.F. and Feng, J.S. and Wang, X.C.",
        TITLE = "Shunted Self-Attention via Multi-Scale Token Aggregation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10843-10852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137601"}

@article{bb141609,
        AUTHOR = "Wu, Y.H. and Liu, Y. and Zhan, X. and Cheng, M.M.",
        TITLE = "P2T: Pyramid Pooling Transformer for Scene Understanding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12760-12771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137602"}

@article{bb141610,
        AUTHOR = "Li, Y. and Yao, T. and Pan, Y.W. and Mei, T.",
        TITLE = "Contextual Transformer Networks for Visual Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1489-1500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137603"}

@article{bb141611,
        AUTHOR = "Wang, H. and Du, Y. and Zhang, Y. and Li, S. and Zhang, L.",
        TITLE = "One-Stage Visual Relationship Referring With Transformers and
Adaptive Message Passing",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "190-202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137604"}

@article{bb141612,
        AUTHOR = "Kim, B. and Kim, J. and Ye, J.C.",
        TITLE = "Task-Agnostic Vision Transformer for Distributed Learning of Image
Processing",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "203-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137605"}

@article{bb141613,
        AUTHOR = "Park, S. and Ye, J.C.",
        TITLE = "Multi-Task Distributed Learning Using Vision Transformer With Random
Patch Permutation",
        JOURNAL = MedImg,
        VOLUME = "42",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2091-2105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137606"}

@article{bb141614,
        AUTHOR = "Kiya, H. and Iijima, R. and Maungmaung, A. and Kinoshit, Y.",
        TITLE = "Image and Model Transformation with Secret Key for Vision Transformer",
        JOURNAL = IEICE,
        VOLUME = "E106-D",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "2-11",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137607"}

@article{bb141615,
        AUTHOR = "Zhang, H.F. and Mao, F. and Xue, M.Q. and Fang, G.F. and Feng, Z.L. and Song, J. and Song, M.L.",
        TITLE = "Knowledge Amalgamation for Object Detection With Transformers",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "2093-2106",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137608"}

@article{bb141616,
        AUTHOR = "Li, Y. and Chen, K. and Sun, S.L. and He, C.",
        TITLE = "Multi-scale homography estimation based on dual feature aggregation
transformer",
        JOURNAL = IET-IPR,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "5",
        PAGES = "1403-1416",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137609"}

@article{bb141617,
        AUTHOR = "Wang, G.Q. and Chen, H. and Chen, L. and Zhuang, Y. and Zhang, S.H. and Zhang, T. and Dong, H. and Gao, P.",
        TITLE = "P2FEViT: Plug-and-Play CNN Feature Embedded Hybrid Vision Transformer
for Remote Sensing Image Classification",
        JOURNAL = RS,
        VOLUME = "15",
        YEAR = "2023",
        NUMBER = "7",
        PAGES = "1773",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137610"}

@article{bb141618,
        AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.",
        TITLE = "ViTAEv2: Vision Transformer Advanced by Exploring Inductive Bias for
Image Recognition and Beyond",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1141-1162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137611"}

@article{bb141619,
        AUTHOR = "Fan, X. and Liu, H.J.",
        TITLE = "FlexFormer: Flexible Transformer for efficient visual recognition",
        JOURNAL = PRL,
        VOLUME = "169",
        YEAR = "2023",
        PAGES = "95-101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137612"}

@article{bb141620,
        AUTHOR = "Cho, S. and Hong, S. and Kim, S.",
        TITLE = "CATs++: Boosting Cost Aggregation With Convolutions and Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7174-7194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137613"}

@article{bb141621,
        AUTHOR = "Kim, B.J. and Choi, H. and Jang, H. and Lee, D.G. and Jeong, W. and Kim, S.W.",
        TITLE = "Improved robustness of vision transformers via prelayernorm in patch
embedding",
        JOURNAL = PR,
        VOLUME = "141",
        YEAR = "2023",
        PAGES = "109659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137614"}

@article{bb141622,
        AUTHOR = "Wang, Z.W. and Wang, C.Y. and Xu, X.W. and Zhou, J. and Lu, J.W.",
        TITLE = "Quantformer: Learning Extremely Low-Precision Vision Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "8813-8826",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137615"}

@article{bb141623,
        AUTHOR = "Sun, S.Y. and Yue, X.Y. and Zhao, H.S. and Torr, P.H.S. and Bai, S.",
        TITLE = "Patch-Based Separable Transformer for Visual Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "9241-9247",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137616"}

@inproceedings{bb141624,
        AUTHOR = "Yue, X.Y. and Sun, S.Y. and Kuang, Z.H. and Wei, M. and Torr, P.H.S. and Zhang, W. and Lin, D.",
        TITLE = "Vision Transformer with Progressive Sampling",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "377-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137617"}

@article{bb141625,
        AUTHOR = "Peng, Z.L. and Guo, Z.H. and Huang, W. and Wang, Y.W. and Xie, L.X. and Jiao, J.B. and Tian, Q. and Ye, Q.X.",
        TITLE = "Conformer: Local Features Coupling Global Representations for
Recognition and Detection",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "9454-9468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137618"}

@inproceedings{bb141626,
        AUTHOR = "Peng, Z.L. and Huang, W. and Gu, S.Z. and Xie, L.X. and Wang, Y. and Jiao, J.B. and Ye, Q.X.",
        TITLE = "Conformer: Local Features Coupling Global Representations for Visual
Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "357-366",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137619"}

@article{bb141627,
        AUTHOR = "Feng, Z.Z. and Zhang, S.L.",
        TITLE = "Efficient Vision Transformer via Token Merger",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "4156-4169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137620"}

@article{bb141628,
        AUTHOR = "Yang, J.H. and Li, X.Y. and Zheng, M. and Wang, Z.H. and Zhu, Y.Q. and Guo, X.Q. and Yuan, Y.C. and Chai, Z. and Jiang, S.Q.",
        TITLE = "MemBridge: Video-Language Pre-Training With Memory-Augmented
Inter-Modality Bridge",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "4073-4087",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137621"}

@article{bb141629,
        AUTHOR = "Wang, D.L. and Chen, Y. and Naz, B. and Sun, L. and Li, B.Z.",
        TITLE = "Spatial-Aware Transformer (SAT): Enhancing Global Modeling in
Transformer Segmentation for Remote Sensing Images",
        JOURNAL = RS,
        VOLUME = "15",
        YEAR = "2023",
        NUMBER = "14",
        PAGES = "3607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137622"}

@article{bb141630,
        AUTHOR = "Huang, X.Y. and Liu, F. and Cui, Y.H. and Chen, P. and Li, L.L. and Li, P.F.",
        TITLE = "Faster and Better: A Lightweight Transformer Network for Remote
Sensing Scene Classification",
        JOURNAL = RS,
        VOLUME = "15",
        YEAR = "2023",
        NUMBER = "14",
        PAGES = "3645",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137623"}

@article{bb141631,
        AUTHOR = "Yao, T. and Li, Y. and Pan, Y.W. and Wang, Y. and Zhang, X.P. and Mei, T.",
        TITLE = "Dual Vision Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "10870-10882",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137624"}

@article{bb141632,
        AUTHOR = "Rao, Y.M. and Liu, Z. and Zhao, W.L. and Zhou, J. and Lu, J.W.",
        TITLE = "Dynamic Spatial Sparsification for Efficient Vision Transformers and
Convolutional Neural Networks",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "10883-10897",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137625"}

@article{bb141633,
        AUTHOR = "Li, J. and Liu, Z. and Li, L. and Lin, J.Q. and Yao, J. and Tu, J.",
        TITLE = "Multi-view convolutional vision transformer for 3D object recognition",
        JOURNAL = JVCIR,
        VOLUME = "95",
        YEAR = "2023",
        PAGES = "103906",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137626"}

@article{bb141634,
        AUTHOR = "Shang, J.H. and Li, X. and Kahatapitiya, K. and Lee, Y.C. and Ryoo, M.S.",
        TITLE = "StARformer: Transformer With State-Action-Reward Representations for
Robot Learning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12862-12877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137627"}

@inproceedings{bb141635,
        AUTHOR = "Shang, J.H. and Kahatapitiya, K. and Li, X. and Ryoo, M.S.",
        TITLE = "StARformer: Transformer with State-Action-Reward Representations for
Visual Reinforcement Learning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIX:462-479",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137628"}

@article{bb141636,
        AUTHOR = "Duan, H.R. and Long, Y. and Wang, S.D. and Zhang, H.F. and Willcocks, C.G. and Shao, L.",
        TITLE = "Dynamic Unary Convolution in Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12747-12759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137629"}

@article{bb141637,
        AUTHOR = "Chen, S.M. and Hong, Z.M. and Hou, W.J. and Xie, G.S. and Song, Y.B. and Zhao, J. and You, X.G. and Yan, S.C. and Shao, L.",
        TITLE = "TransZero++:
Cross Attribute-Guided Transformer for Zero-Shot Learning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12844-12861",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137630"}

@article{bb141638,
        AUTHOR = "Qian, S.J. and Zhu, Y. and Li, W.B. and Li, M. and Jia, J.Y.",
        TITLE = "What Makes for Good Tokenizers in Vision Transformer?",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "13011-13023",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137631"}

@article{bb141639,
        AUTHOR = "Sun, W.X. and Qin, Z. and Deng, H. and Wang, J. and Zhang, Y. and Zhang, K. and Barnes, N. and Birchfield, S. and Kong, L.P. and Zhong, Y.",
        TITLE = "Vicinity Vision Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12635-12649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137632"}

@article{bb141640,
        AUTHOR = "Cao, C.J. and Dong, Q. and Fu, Y.W.",
        TITLE = "ZITS++: Image Inpainting by Improving the Incremental Transformer on
Structural Priors",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12667-12684",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137633"}

@article{bb141641,
        AUTHOR = "Fang, Y.X. and Wang, X.G. and Wu, R. and Liu, W.Y.",
        TITLE = "What Makes for Hierarchical Vision Transformer?",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12714-12720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137634"}

@article{bb141642,
        AUTHOR = "Xu, P. and Zhu, X.T. and Clifton, D.A.",
        TITLE = "Multimodal Learning With Transformers: A Survey",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12113-12132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137635"}

@article{bb141643,
        AUTHOR = "Liu, J. and Guo, H.R. and He, Y. and Li, H.L.",
        TITLE = "Vision Transformer-Based Ensemble Learning for Hyperspectral Image
Classification",
        JOURNAL = RS,
        VOLUME = "15",
        YEAR = "2023",
        NUMBER = "21",
        PAGES = "5208",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137636"}

@article{bb141644,
        AUTHOR = "Lin, M.B. and Chen, M.Z. and Zhang, Y.X. and Shen, C.H. and Ji, R.R. and Cao, L.J.",
        TITLE = "Super Vision Transformer",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3136-3151",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137637"}

@article{bb141645,
        AUTHOR = "Li, Z.Y. and Gao, S.H. and Cheng, M.M.",
        TITLE = "SERE: Exploring Feature Self-Relation for Self-Supervised Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "15619-15631",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137638"}

@article{bb141646,
        AUTHOR = "Yuan, Y.H. and Liang, W.C. and Ding, H.H. and Liang, Z.H. and Zhang, C. and Hu, H.",
        TITLE = "Expediting Large-Scale Vision Transformer for Dense Prediction
Without Fine-Tuning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "250-266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137639"}

@article{bb141647,
        AUTHOR = "Jiao, J. and Tang, Y.M. and Lin, K.Y. and Gao, Y.P. and Ma, A.J. and Wang, Y.W. and Zheng, W.S.",
        TITLE = "DilateFormer: Multi-Scale Dilated Transformer for Visual Recognition",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8906-8919",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137640"}

@article{bb141648,
        AUTHOR = "Li, Z.H. and Li, Y.X. and Li, Q.D. and Wang, P. and Guo, D. and Lu, L. and Jin, D. and Zhang, Y. and Hong, Q.Q.",
        TITLE = "LViT: Language Meets Vision Transformer in Medical Image Segmentation",
        JOURNAL = MedImg,
        VOLUME = "43",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "96-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137641"}

@article{bb141649,
        AUTHOR = "Fu, K. and Yuan, M.Z. and Liu, S.L. and Wang, M.",
        TITLE = "Boosting Point-BERT by Multi-Choice Tokens",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "438-447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137642"}

@article{bb141650,
        AUTHOR = "Ghosal, S.S. and Li, Y.X.",
        TITLE = "Are Vision Transformers Robust to Spurious Correlations?",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "689-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137643"}

@article{bb141651,
        AUTHOR = "Yan, F.Y. and Yan, B. and Liang, W. and Pei, M.T.",
        TITLE = "Token labeling-guided multi-scale medical image classification",
        JOURNAL = PRL,
        VOLUME = "178",
        YEAR = "2024",
        PAGES = "28-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137644"}

@article{bb141652,
        AUTHOR = "Li, Y.X. and Huang, Y.W. and He, N. and Ma, K. and Zheng, Y.F.",
        TITLE = "Improving vision transformer for medical image classification via
token-wise perturbation",
        JOURNAL = JVCIR,
        VOLUME = "98",
        YEAR = "2024",
        PAGES = "104022",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137645"}

@article{bb141653,
        AUTHOR = "Nguyen, H. and Kim, C. and Li, F.",
        TITLE = "Space-time recurrent memory network",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137646"}

@inproceedings{bb141654,
        AUTHOR = "Kheldouni, A. and Boumhidi, J.",
        TITLE = "A Study of Bidirectional Encoder Representations from Transformers
for Sequential Recommendations",
        BOOKTITLE = ISCV22,
        YEAR = "2022",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137647"}

@article{bb141655,
        AUTHOR = "Chen, Z. and Bai, C.Y. and Zhu, Y.L. and Lu, X.W.",
        TITLE = "TUT: Template-Augmented U-Net Transformer for Unsupervised Anomaly
Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "780-784",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137648"}

@article{bb141656,
        AUTHOR = "Xiao, Q. and Zhang, Y. and Yang, Q.",
        TITLE = "Selective Random Walk for Transfer Learning in Heterogeneous Label
Spaces",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4476-4488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137649"}

@article{bb141657,
        AUTHOR = "Zhang, J.S. and Gu, L.F. and Lai, Y.K. and Wang, X.Y. and Li, K.",
        TITLE = "Toward Grouping in Large Scenes With Occlusion-Aware Spatio-Temporal
Transformers",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3919-3929",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137650"}

@article{bb141658,
        AUTHOR = "Akkaya, I.B. and Kathiresan, S.S. and Arani, E. and Zonooz, B.",
        TITLE = "Enhancing performance of vision transformers on small datasets
through local inductive bias incorporation",
        JOURNAL = PR,
        VOLUME = "153",
        YEAR = "2024",
        PAGES = "110510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137651"}

@inproceedings{bb141659,
        AUTHOR = "Edalati, A. and Hameed, M.G.A. and Mosleh, A.",
        TITLE = "Generalized Kronecker-based Adapters for Parameter-efficient
Fine-tuning of Vision Transformers",
        BOOKTITLE = "CRV23",
        YEAR = "2023",
        PAGES = "97-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137652"}

@inproceedings{bb141660,
        AUTHOR = "Herzig, R. and Abramovich, O. and Ben Avraham, E. and Arbelle, A. and Karlinsky, L. and Shamir, A. and Darrell, T.J. and Globerson, A.",
        TITLE = "PromptonomyViT: Multi-Task Prompt Learning Improves Video
Transformers using Synthetic Scene Data",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "6789-6801",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137653"}

@inproceedings{bb141661,
        AUTHOR = "Marouf, I.E. and Tartaglione, E. and Lathuiliere, S.",
        TITLE = "Mini but Mighty: Finetuning ViTs with Mini Adapters",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "1721-1730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137654"}

@inproceedings{bb141662,
        AUTHOR = "Kim, G. and Kim, J. and Lee, J.S.",
        TITLE = "Exploring Adversarial Robustness of Vision Transformers in the
Spectral Perspective",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "3964-3973",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137655"}

@inproceedings{bb141663,
        AUTHOR = "Xu, X. and Wang, S. and Chen, Y.D. and Zheng, Y.P. and Wei, Z.W. and Liu, J.J.",
        TITLE = "GTP-ViT: Efficient Vision Transformers via Graph-based Token
Propagation",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "86-95",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137656"}

@inproceedings{bb141664,
        AUTHOR = "Han, Q. and Zhang, G.J. and Huang, J.X. and Gao, P. and Wei, Z. and Lu, S.J.",
        TITLE = "Efficient MAE towards Large-Scale Vision Transformers",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "595-604",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137657"}

@inproceedings{bb141665,
        AUTHOR = "Park, J.W. and Kahatapitiya, K. and Kim, D.H. and Sudalairaj, S. and Fan, Q.F. and Ryoo, M.S.",
        TITLE = "Grafting Vision Transformers",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "1134-1143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137658"}

@inproceedings{bb141666,
        AUTHOR = "Shimizu, S. and Tamaki, T.",
        TITLE = "Joint learning of images and videos with a single Vision Transformer",
        BOOKTITLE = MVA23,
        YEAR = "2023",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137659"}

@inproceedings{bb141667,
        AUTHOR = "Li, K.C. and Wang, Y. and Li, Y.Z. and Wang, Y. and He, Y. and Wang, L.M. and Qiao, Y.",
        TITLE = "Unmasked Teacher: Towards Training-Efficient Video Foundation Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "19891-19903",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137660"}

@inproceedings{bb141668,
        AUTHOR = "Ding, S.R. and Zhao, P.S. and Zhang, X.P. and Qian, R. and Xiong, H.K. and Tian, Q.",
        TITLE = "Prune Spatio-temporal Tokens by Semantic-aware Temporal Accumulation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16899-16910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137661"}

@inproceedings{bb141669,
        AUTHOR = "Chen, M.Z. and Lin, M. and Lin, Z.H. and Zhang, Y.X. and Chao, F. and Ji, R.R.",
        TITLE = "SMMix: Self-Motivated Image Mixing for Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17214-17224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137662"}

@inproceedings{bb141670,
        AUTHOR = "Kim, D. and Angelova, A. and Kuo, W.C.",
        TITLE = "Contrastive Feature Masking Open-Vocabulary Vision Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15556-15566",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137663"}

@inproceedings{bb141671,
        AUTHOR = "Zhang, Y. and Chen, D. and Kundu, S. and Li, C.H. and Beerel, P.A.",
        TITLE = "SAL-ViT: Towards Latency Efficient Private Inference on ViT using
Selective Attention Search with a Learnable Softmax Approximation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5093-5102",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137664"}

@inproceedings{bb141672,
        AUTHOR = "Li, Z. and Gu, Q.Y.",
        TITLE = "I-ViT: Integer-only Quantization for Efficient Vision Transformer
Inference",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17019-17029",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137665"}

@inproceedings{bb141673,
        AUTHOR = "Frumkin, N. and Gope, D. and Marculescu, D.",
        TITLE = "Jumping through Local Minima: Quantization in the Loss Landscape of
Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16932-16942",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137666"}

@inproceedings{bb141674,
        AUTHOR = "Li, Z. and Xiao, J. and Yang, L. and Gu, Q.Y.",
        TITLE = "RepQ-ViT: Scale Reparameterization for Post-Training Quantization of
Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17181-17190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137667"}

@inproceedings{bb141675,
        AUTHOR = "Havtorn, J.D. and Royer, A. and Blankevoort, T. and Bejnordi, B.E.",
        TITLE = "MSViT: Dynamic Mixed-scale Tokenization for Vision Transformers",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "838-848",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137668"}

@inproceedings{bb141676,
        AUTHOR = "Haurum, J.B. and Escalera, S. and Taylor, G.W. and Moeslund, T.B.",
        TITLE = "Which Tokens to Use? Investigating Token Reduction in Vision
Transformers",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "773-783",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137669"}

@inproceedings{bb141677,
        AUTHOR = "Wang, X. and Chu, X.J. and Han, C. and Zhang, X.Y.",
        TITLE = "SCSC: Spatial Cross-scale Convolution Module to Strengthen both CNNs
and Transformers",
        BOOKTITLE = NIVT23,
        YEAR = "2023",
        PAGES = "731-741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137670"}

@inproceedings{bb141678,
        AUTHOR = "Chen, Y.H. and Weng, Y.C. and Kao, C.H. and Chien, C. and Chiu, W.C. and Peng, W.H.",
        TITLE = "TransTIC: Transferring Transformer-based Image Compression from Human
Perception to Machine Perception",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "23240-23250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137671"}

@inproceedings{bb141679,
        AUTHOR = "Li, Y. and Hu, J. and Wen, Y. and Evangelidis, G. and Salahi, K. and Wang, Y.Z. and Tulyakov, S. and Ren, J.",
        TITLE = "Rethinking Vision Transformers for MobileNet Size and Speed",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16843-16854",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137672"}

@inproceedings{bb141680,
        AUTHOR = "Nurgazin, M. and Tu, N.A.",
        TITLE = "A Comparative Study of Vision Transformer Encoders and Few-shot
Learning for Medical Image Classification",
        BOOKTITLE = CVAMD23,
        YEAR = "2023",
        PAGES = "2505-2513",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137673"}

@inproceedings{bb141681,
        AUTHOR = "Yeganeh, Y. and Farshad, A. and Weinberger, P. and Ahmadi, S.A. and Adeli, E. and Navab, N.",
        TITLE = "Transformers Pay Attention to Convolutions Leveraging Emerging
Properties of ViTs by Dual Attention-Image Network",
        BOOKTITLE = CVAMD23,
        YEAR = "2023",
        PAGES = "2296-2307",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137674"}

@inproceedings{bb141682,
        AUTHOR = "Zheng, J.H. and Yang, L.Q. and Li, Y. and Yang, K. and Wang, Z.Y. and Zhou, J.",
        TITLE = "Lightweight Vision Transformer with Spatial and Channel Enhanced
Self-Attention",
        BOOKTITLE = REDLCV23,
        YEAR = "2023",
        PAGES = "1484-1488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137675"}

@inproceedings{bb141683,
        AUTHOR = "Xie, W. and Zhao, Z. and Li, S.Y. and Zuo, B.H. and Wang, Y.G.",
        TITLE = "Nonrigid Object Contact Estimation With Regional Unwrapping
Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "9308-9317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137676"}

@inproceedings{bb141684,
        AUTHOR = "Vasu, P.K.A. and Gabriel, J. and Zhu, J. and Tuzel, O. and Ranjan, A.",
        TITLE = "FastViT: A Fast Hybrid Vision Transformer using Structural
Reparameterization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5762-5772",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137677"}

@inproceedings{bb141685,
        AUTHOR = "Hyeon Woo, N. and Yu Ji, K. and Heo, B. and Han, D.Y. and Oh, S.J. and Oh, T.H.",
        TITLE = "Scratching Visual Transformer's Back with Uniform Attention",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5784-5795",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137678"}

@inproceedings{bb141686,
        AUTHOR = "Tang, C. and Zhang, L.L. and Jiang, H.Q. and Xu, J.H. and Cao, T. and Zhang, Q. and Yang, Y.Q. and Wang, Z. and Yang, M.",
        TITLE = "ElasticViT: Conflict-aware Supernet Training for Deploying Fast
Vision Transformer on Diverse Mobile Devices",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5806-5817",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137679"}

@inproceedings{bb141687,
        AUTHOR = "Ren, S. and Yang, X.Y. and Liu, S. and Wang, X.C.",
        TITLE = "SG-Former: Self-guided Transformer with Evolving Token Reallocation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5980-5991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137680"}

@inproceedings{bb141688,
        AUTHOR = "Lin, W.F. and Wu, Z.H. and Chen, J. and Huang, J. and Jin, L.W.",
        TITLE = "Scale-Aware Modulation Meet Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5992-6003",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137681"}

@inproceedings{bb141689,
        AUTHOR = "Zhang, H.K. and Hu, W.Z. and Wang, X.Y.",
        TITLE = "Fcaformer: Forward Cross Attention in Hybrid Vision Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "6037-6046",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137682"}

@inproceedings{bb141690,
        AUTHOR = "He, Y.F. and Lou, Z.Y. and Zhang, L. and Liu, J. and Wu, W.J. and Zhou, H. and Zhuang, B.",
        TITLE = "BiViT: Extremely Compressed Binary Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5628-5640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137683"}

@inproceedings{bb141691,
        AUTHOR = "Dutson, M. and Li, Y. and Gupta, M.",
        TITLE = "Eventful Transformers:
Leveraging Temporal Redundancy in Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16865-16877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137684"}

@inproceedings{bb141692,
        AUTHOR = "Wang, Z.Q. and Fang, Y.T. and Cao, J.H. and Zhang, Q. and Wang, Z. and Xu, R.",
        TITLE = "Masked Spiking Transformer",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1761-1771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137685"}

@inproceedings{bb141693,
        AUTHOR = "Peebles, W. and Xie, S.",
        TITLE = "Scalable Diffusion Models with Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "4172-4182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137686"}

@inproceedings{bb141694,
        AUTHOR = "Zeng, W.X. and Li, M. and Xiong, W.J. and Tong, T. and Lu, W.J. and Tan, J. and Wang, R.S. and Huang, R.",
        TITLE = "MPCViT: Searching for Accurate and Efficient MPC-Friendly Vision
Transformer with Heterogeneous Attention",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5029-5040",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137687"}

@inproceedings{bb141695,
        AUTHOR = "Mentzer, F. and Agustson, E. and Tschannen, M.",
        TITLE = "M2T: Masking Transformers Twice for Faster Decoding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5317-5326",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137688"}

@inproceedings{bb141696,
        AUTHOR = "Psomas, B. and Kakogeorgiou, I. and Karantzalos, K. and Avrithis, Y.",
        TITLE = "Keep It SimPool:Who Said Supervised Transformers Suffer from
Attention Deficit?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5327-5337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137689"}

@inproceedings{bb141697,
        AUTHOR = "Xiao, H. and Zheng, W.Z. and Zhu, Z. and Zhou, J. and Lu, J.W.",
        TITLE = "Token-Label Alignment for Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5472-5481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137690"}

@inproceedings{bb141698,
        AUTHOR = "Yu, R.Y. and Wang, Z.N. and Wang, Y.H. and Li, K. and Liu, C. and Duan, H. and Ji, X.Y. and Chen, J.",
        TITLE = "LaPE: Layer-adaptive Position Embedding for Vision Transformers with
Independent Layer Normalization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5863-5873",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137691"}

@inproceedings{bb141699,
        AUTHOR = "Roy, A. and Verma, V.K. and Voonna, S. and Ghosh, K. and Ghosh, S. and Das, A.",
        TITLE = "Exemplar-Free Continual Transformer with Convolutions",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5874-5884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT137692"}

Last update:Jul 18, 2024 at 20:50:34