@inproceedings{bb153900, AUTHOR = "Wu, Z.G.Y. and Chen, J.X. and Zhong, H. and Huang, D. and Wang, Y.H.", TITLE = "Adalog: Post-training Quantization for Vision Transformers with Adaptive Logarithm Quantizer", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XXVII: 411-427", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149823"} @inproceedings{bb153901, AUTHOR = "Jie, S. and Tang, Y.H. and Guo, J.Y. and Deng, Z.H. and Han, K. and Wang, Y.H.", TITLE = "Token Compensator: Altering Inference Cost of Vision Transformer Without Re-tuning", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XVI: 76-94", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149824"} @inproceedings{bb153902, AUTHOR = "Xiao, H. and Zheng, W.Z. and Zuo, S.C. and Gao, P. and Zhou, J. and Lu, J.W.", TITLE = "Spatialformer: Towards Generalizable Vision Transformers with Explicit Spatial Understanding", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XIII: 37-54", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149825"} @inproceedings{bb153903, AUTHOR = "Heo, B. and Park, S. and Han, D.Y. and Yun, S.", TITLE = "Rotary Position Embedding for Vision Transformer", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "X: 289-305", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149826"} @inproceedings{bb153904, AUTHOR = "Bellitto, G. and Sortino, R. and Spadaro, P. and Palazzo, S. and Salanitri, F.P. and Fiameni, G. and Gavves, E. and Spampinato, C.", TITLE = "Vito: Vision Transformer Optimization Via Knowledge Distillation On Decoders", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "493-499", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149827"} @inproceedings{bb153905, AUTHOR = "Gani, H. and Saadi, N. and Hussein, N. and Nandakumar, K.", TITLE = "Multi-Attribute Vision Transformers are Efficient and Robust Learners", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "766-772", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149828"} @inproceedings{bb153906, AUTHOR = "Huang, W.X. and Shen, Y.H. and Xie, J. and Zhang, B.C. and He, G.Q. and Li, K. and Sun, X. and Lin, S.H.", TITLE = "A General and Efficient Training for Transformer via Token Expansion", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15783-15792", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149829"} @inproceedings{bb153907, AUTHOR = "Cho, J.H. and Krahenbuhl, P.", TITLE = "Language-Conditioned Detection Transformer", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16593-16603", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149830"} @inproceedings{bb153908, AUTHOR = "Lin, S. and Lyu, P. and Liu, D. and Tang, T. and Liang, X.D. and Song, A. and Chang, X.J.", TITLE = "MLP Can Be a Good Transformer Learner", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "19489-19498", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149831"} @inproceedings{bb153909, AUTHOR = "Wang, A. and Chen, H. and Lin, Z.J. and Han, J.G. and Ding, G.G.", TITLE = "Rep ViT: Revisiting Mobile CNN From ViT Perspective", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "15909-15920", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149832"} @inproceedings{bb153910, AUTHOR = "Weng, H.H. and Huang, D. and Qiao, Y. and Hu, Z. and Lin, C.Y. and Zhang, T. and Chen, C.L.P.", TITLE = "Desigen: A Pipeline for Controllable Design Template Generation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12721-12732", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149833"} @inproceedings{bb153911, AUTHOR = "Park, S. and Byun, H.R.", TITLE = "Fair-VPT: Fair Visual Prompt Tuning for Image Classification", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "12268-12278", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149834"} @inproceedings{bb153912, AUTHOR = "Xu, H.Y. and Xiang, L. and Ye, H.Y. and Yao, D. and Chu, P.Z. and Li, B.C.", TITLE = "Permutation Equivariance of Transformers and its Applications", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "5987-5996", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149835"} @inproceedings{bb153913, AUTHOR = "Zhang, Y.Y. and Ding, X.H. and Gong, K.X. and Ge, Y.X. and Shan, Y. and Yue, X.Y.", TITLE = "Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "6108-6117", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149836"} @inproceedings{bb153914, AUTHOR = "Kobayashi, T.", TITLE = "Mean-Shift Feature Transformer", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "6047-6056", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149837"} @inproceedings{bb153915, AUTHOR = "Wu, J. and Duan, B. and Kang, W.T. and Tang, H. and Yan, Y.", TITLE = "Token Transformation Matters: Towards Faithful Post-Hoc Explanation for Vision Transformer", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "10926-10935", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149838"} @inproceedings{bb153916, AUTHOR = "Shi, X.Y. and Hao, Z.C. and Yu, Z.F.", TITLE = "SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "5610-5619", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149839"} @inproceedings{bb153917, AUTHOR = "Ye, H.C. and Yu, C. and Ye, P. and Xia, R. and Tang, Y.S. and Lu, J.W. and Chen, T. and Zhang, B.", TITLE = "Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "5578-5588", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149840"} @inproceedings{bb153918, AUTHOR = "Zhang, J. and Herrmann, C. and Hur, J. and Chen, E. and Jampani, V. and Sun, D.Q. and Yang, M.H.", TITLE = "Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "3076-3085", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149841"} @inproceedings{bb153919, AUTHOR = "Huang, N.C. and Chang, C.C. and Lin, W.C. and Taka, E. and Marculescu, D. and Wu, K.C.A.", TITLE = "ELSA: Exploiting Layer-wise N:M Sparsity for Vision Transformer Acceleration", BOOKTITLE = ECV24, YEAR = "2024", PAGES = "8006-8015", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149842"} @inproceedings{bb153920, AUTHOR = "Devulapally, A. and Khan, M.F.F. and Advani, S. and Narayanan, V.", TITLE = "Multi-Modal Fusion of Event and RGB for Monocular Depth Estimation Using a Unified Transformer-based Architecture", BOOKTITLE = MULA24, YEAR = "2024", PAGES = "2081-2089", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149843"} @inproceedings{bb153921, AUTHOR = "Yang, Z.D. and Li, Z. and Zeng, A.L. and Li, Z.X. and Yuan, C. and Li, Y.", TITLE = "ViTKD: Feature-based Knowledge Distillation for Vision Transformers", BOOKTITLE = PBDL24, YEAR = "2024", PAGES = "1379-1388", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149844"} @inproceedings{bb153922, AUTHOR = "Mehri, F. and Baghshah, M.S. and Pilehvar, M.T.", TITLE = "LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "67-78", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149845"} @inproceedings{bb153923, AUTHOR = "Mehri, F. and Fayyaz, M. and Baghshah, M.S. and Pilehvar, M.T.", TITLE = "SkipPLUS: Skip the First Few Layers to Better Explain Vision Transformers", BOOKTITLE = FaDE-TCV24, YEAR = "2024", PAGES = "204-215", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149846"} @inproceedings{bb153924, AUTHOR = "Jain, S. and Dutta, T.", TITLE = "Towards Understanding and Improving Adversarial Robustness of Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24736-24745", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149847"} @inproceedings{bb153925, AUTHOR = "Yang, S. and Bai, J. and Gao, K. and Yang, Y. and Li, Y.M. and Xia, S.T.", TITLE = "Not All Prompts Are Secure: A Switchable Backdoor Attack Against Pre-trained Vision Transfomers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24431-24441", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149848"} @inproceedings{bb153926, AUTHOR = "Steitz, J.M.O. and Roth, S.", TITLE = "Adapters Strike Back", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23449-23459", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149849"} @inproceedings{bb153927, AUTHOR = "Rangwani, H. and Mondal, P. and Mondal, P. and Mishra, M. and Asokan, A.R. and Babu, R.V.", TITLE = "DeiT-LT: Distillation Strikes Back for Vision Transformer Training on Long-Tailed Datasets", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23396-23406", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149850"} @inproceedings{bb153928, AUTHOR = "Liu, J.Y. and Teshome, W. and Ghimire, S. and Sznaier, M. and Camps, O.", TITLE = "Solving Masked Jigsaw Puzzles with Diffusion Vision Transformers*", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "23009-23018", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149851"} @inproceedings{bb153929, AUTHOR = "Kim, M. and Seo, P.H. and Schmid, C. and Cho, M.", TITLE = "Learning Correlation Structures for Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "18941-18951", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149852"} @inproceedings{bb153930, AUTHOR = "Shi, D.", TITLE = "TransNeXt: Robust Foveal Visual Perception for Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "17773-17783", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149853"} @inproceedings{bb153931, AUTHOR = "Agiza, A. and Neseem, M. and Reda, S.", TITLE = "MTLoRA: A Low-Rank Adaptation Approach for Efficient Multi-Task Learning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16196-16205", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149854"} @inproceedings{bb153932, AUTHOR = "Dong, W. and Zhang, X. and Chen, B. and Yan, D.W. and Lin, Z.J. and Yan, Q. and Wang, P. and Yang, Y.", TITLE = "Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16101-16110", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149855"} @inproceedings{bb153933, AUTHOR = "Wu, J. and Kang, W.T. and Tang, H. and Hong, Y. and Yan, Y.", TITLE = "On the Faithfulness of Vision Transformer Explanations", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "10936-10945", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149856"} @inproceedings{bb153934, AUTHOR = "Navaneet, K.L. and Koohpayegani, S.A. and Sleiman, E. and Pirsiavash, H.", TITLE = "SlowFormer: Adversarial Attack on Compute and Energy Consumption of Efficient Vision Transformers", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "24786-24797", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149857"} @inproceedings{bb153935, AUTHOR = "Koyun, O.C. and Toreyin, B.U.", TITLE = "HaLViT: Half of the Weights are Enough", BOOKTITLE = LargeVM24, YEAR = "2024", PAGES = "3669-3678", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149858"} @inproceedings{bb153936, AUTHOR = "Bafghi, R.A. and Harilal, N. and Monteleoni, C. and Raissi, M.", TITLE = "Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting", BOOKTITLE = LargeVM24, YEAR = "2024", PAGES = "3679-3684", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149859"} @inproceedings{bb153937, AUTHOR = "Raissi, M. and Monteleoni, C. and Harilal, N. and Bafghi, R.A.", TITLE = "Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting", BOOKTITLE = LargeVM24, YEAR = "2024", PAGES = "7864-7869", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149859"} @inproceedings{bb153938, AUTHOR = "Yuan, X. and Fei, H.L. and Baek, J.", TITLE = "Efficient Transformer Adaptation with Soft Token Merging", BOOKTITLE = LargeVM24, YEAR = "2024", PAGES = "3658-3668", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149860"} @inproceedings{bb153939, AUTHOR = "Edalati, A. and Hameed, M.G.A. and Mosleh, A.", TITLE = "Generalized Kronecker-based Adapters for Parameter-efficient Fine-tuning of Vision Transformers", BOOKTITLE = CRV23, YEAR = "2023", PAGES = "97-104", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149861"} @inproceedings{bb153940, AUTHOR = "Marouf, I.E. and Tartaglione, E. and Lathuiliere, S.", TITLE = "Mini but Mighty: Finetuning ViTs with Mini Adapters", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1721-1730", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149862"} @inproceedings{bb153941, AUTHOR = "Kim, G. and Kim, J. and Lee, J.S.", TITLE = "Exploring Adversarial Robustness of Vision Transformers in the Spectral Perspective", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "3964-3973", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149863"} @inproceedings{bb153942, AUTHOR = "Xu, X. and Wang, S. and Chen, Y.D. and Zheng, Y.P. and Wei, Z.W. and Liu, J.J.", TITLE = "GTP-ViT: Efficient Vision Transformers via Graph-based Token Propagation", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "86-95", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149864"} @inproceedings{bb153943, AUTHOR = "Han, Q. and Zhang, G.J. and Huang, J.X. and Gao, P. and Wei, Z. and Lu, S.J.", TITLE = "Efficient MAE towards Large-Scale Vision Transformers", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "595-604", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149865"} @inproceedings{bb153944, AUTHOR = "Park, J.W. and Kahatapitiya, K. and Kim, D.H. and Sudalairaj, S. and Fan, Q.F. and Ryoo, M.S.", TITLE = "Grafting Vision Transformers", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1134-1143", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149866"} @inproceedings{bb153945, AUTHOR = "Shimizu, S. and Tamaki, T.", TITLE = "Joint learning of images and videos with a single Vision Transformer", BOOKTITLE = MVA23, YEAR = "2023", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149867"} @inproceedings{bb153946, AUTHOR = "Ding, S.R. and Zhao, P.S. and Zhang, X.P. and Qian, R. and Xiong, H.K. and Tian, Q.", TITLE = "Prune Spatio-temporal Tokens by Semantic-aware Temporal Accumulation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16899-16910", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149868"} @inproceedings{bb153947, AUTHOR = "Chen, M.Z. and Lin, M. and Lin, Z.H. and Zhang, Y.X. and Chao, F. and Ji, R.R.", TITLE = "SMMix: Self-Motivated Image Mixing for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17214-17224", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149869"} @inproceedings{bb153948, AUTHOR = "Kim, D. and Angelova, A. and Kuo, W.C.", TITLE = "Contrastive Feature Masking Open-Vocabulary Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15556-15566", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149870"} @inproceedings{bb153949, AUTHOR = "Li, Z.K. and Gu, Q.Y.", TITLE = "I-ViT: Integer-only Quantization for Efficient Vision Transformer Inference", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17019-17029", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149871"} @inproceedings{bb153950, AUTHOR = "Frumkin, N. and Gope, D. and Marculescu, D.", TITLE = "Jumping through Local Minima: Quantization in the Loss Landscape of Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16932-16942", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149872"} @inproceedings{bb153951, AUTHOR = "Li, Z.K. and Xiao, J.R. and Yang, L.W. and Gu, Q.Y.", TITLE = "RepQ-ViT: Scale Reparameterization for Post-Training Quantization of Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17181-17190", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149873"} @inproceedings{bb153952, AUTHOR = "Havtorn, J.D. and Royer, A. and Blankevoort, T. and Bejnordi, B.E.", TITLE = "MSViT: Dynamic Mixed-scale Tokenization for Vision Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "838-848", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149874"} @inproceedings{bb153953, AUTHOR = "Haurum, J.B. and Escalera, S. and Taylor, G.W. and Moeslund, T.B.", TITLE = "Which Tokens to Use? Investigating Token Reduction in Vision Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "773-783", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149875"} @inproceedings{bb153954, AUTHOR = "Wang, X. and Chu, X.J. and Han, C. and Zhang, X.Y.", TITLE = "SCSC: Spatial Cross-scale Convolution Module to Strengthen both CNNs and Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "731-741", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149876"} @inproceedings{bb153955, AUTHOR = "Chen, Y.H. and Weng, Y.C. and Kao, C.H. and Chien, C. and Chiu, W.C. and Peng, W.H.", TITLE = "TransTIC: Transferring Transformer-based Image Compression from Human Perception to Machine Perception", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "23240-23250", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149877"} @inproceedings{bb153956, AUTHOR = "Li, Y. and Hu, J. and Wen, Y. and Evangelidis, G. and Salahi, K. and Wang, Y.Z. and Tulyakov, S. and Ren, J.", TITLE = "Rethinking Vision Transformers for MobileNet Size and Speed", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16843-16854", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149878"} @inproceedings{bb153957, AUTHOR = "Nurgazin, M. and Tu, N.A.", TITLE = "A Comparative Study of Vision Transformer Encoders and Few-shot Learning for Medical Image Classification", BOOKTITLE = CVAMD23, YEAR = "2023", PAGES = "2505-2513", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149879"} @inproceedings{bb153958, AUTHOR = "Xie, W. and Zhao, Z. and Li, S.Y. and Zuo, B.H. and Wang, Y.G.", TITLE = "Nonrigid Object Contact Estimation With Regional Unwrapping Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "9308-9317", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149880"} @inproceedings{bb153959, AUTHOR = "Vasu, P.K.A. and Gabriel, J. and Zhu, J. and Tuzel, O. and Ranjan, A.", TITLE = "FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5762-5772", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149881"} @inproceedings{bb153960, AUTHOR = "Tang, C. and Zhang, L.L. and Jiang, H.Q. and Xu, J.H. and Cao, T. and Zhang, Q. and Yang, Y.Q. and Wang, Z. and Yang, M.", TITLE = "ElasticViT: Conflict-aware Supernet Training for Deploying Fast Vision Transformer on Diverse Mobile Devices", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5806-5817", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149882"} @inproceedings{bb153961, AUTHOR = "Ren, S. and Yang, X.Y. and Liu, S. and Wang, X.C.", TITLE = "SG-Former: Self-guided Transformer with Evolving Token Reallocation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5980-5991", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149883"} @inproceedings{bb153962, AUTHOR = "Lin, W.F. and Wu, Z.H. and Chen, J. and Huang, J. and Jin, L.W.", TITLE = "Scale-Aware Modulation Meet Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5992-6003", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149884"} @inproceedings{bb153963, AUTHOR = "He, Y.F. and Lou, Z.Y. and Zhang, L. and Liu, J. and Wu, W.J. and Zhou, H. and Zhuang, B.", TITLE = "BiViT: Extremely Compressed Binary Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5628-5640", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149885"} @inproceedings{bb153964, AUTHOR = "Dutson, M. and Li, Y. and Gupta, M.", TITLE = "Eventful Transformers: Leveraging Temporal Redundancy in Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16865-16877", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149886"} @inproceedings{bb153965, AUTHOR = "Wang, Z.Q. and Fang, Y.T. and Cao, J.H. and Zhang, Q. and Wang, Z. and Xu, R.", TITLE = "Masked Spiking Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "1761-1771", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149887"} @inproceedings{bb153966, AUTHOR = "Peebles, W. and Xie, S.", TITLE = "Scalable Diffusion Models with Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "4172-4182", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149888"} @inproceedings{bb153967, AUTHOR = "Mentzer, F. and Agustson, E. and Tschannen, M.", TITLE = "M2T: Masking Transformers Twice for Faster Decoding", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5317-5326", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149889"} @inproceedings{bb153968, AUTHOR = "Xiao, H. and Zheng, W.Z. and Zhu, Z. and Zhou, J. and Lu, J.W.", TITLE = "Token-Label Alignment for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5472-5481", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149890"} @inproceedings{bb153969, AUTHOR = "Yu, R.Y. and Wang, Z.N. and Wang, Y.H. and Li, K. and Liu, C. and Duan, H.Y. and Ji, X.Y. and Chen, J.", TITLE = "LaPE: Layer-adaptive Position Embedding for Vision Transformers with Independent Layer Normalization", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5863-5873", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149891"} @inproceedings{bb153970, AUTHOR = "Roy, A. and Verma, V.K. and Voonna, S. and Ghosh, K. and Ghosh, S. and Das, A.", TITLE = "Exemplar-Free Continual Transformer with Convolutions", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5874-5884", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149892"} @inproceedings{bb153971, AUTHOR = "Xu, Y.X. and Li, C. and Li, D. and Sheng, X. and Jiang, F. and Tian, L. and Sirasao, A.", TITLE = "FDViT: Improve the Hierarchical Architecture of Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5927-5937", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149893"} @inproceedings{bb153972, AUTHOR = "Chen, Y.J. and Liu, H.M. and Yin, H.R. and Fan, B.", TITLE = "Building Vision Transformers with Hierarchy Aware Feature Aggregation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5885-5895", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149894"} @inproceedings{bb153973, AUTHOR = "Quetu, V. and Milovanovic, M. and Tartaglione, E.", TITLE = "Sparse Double Descent in Vision Transformers: Real or Phantom Threat?", BOOKTITLE = CIAP23, YEAR = "2023", PAGES = "II:490-502", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149895"} @inproceedings{bb153974, AUTHOR = "Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W.", TITLE = "Leveraging Efficient Training and Feature Fusion in Transformers for Multimodal Classification", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1420-1424", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149896"} @inproceedings{bb153975, AUTHOR = "Popovic, N. and Paudel, D.P. and Probst, T. and Van Gool, L.J.", TITLE = "Token-Consistent Dropout For Calibrated Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1030-1034", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149897"} @inproceedings{bb153976, AUTHOR = "Sajjadi, M.S.M. and Mahendran, A. and Kipf, T. and Pot, E. and Duckworth, D. and Lucic, M. and Greff, K.", TITLE = "RUST: Latent Neural Scene Representations from Unposed Imagery", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "17297-17306", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149898"} @inproceedings{bb153977, AUTHOR = "Bowman, B. and Achille, A. and Zancato, L. and Trager, M. and Perera, P. and Paolini, G. and Soatto, S.", TITLE = "À-la-carte Prompt Tuning (APT): Combining Distinct Data Via Composable Prompting", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14984-14993", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149899"} @inproceedings{bb153978, AUTHOR = "Nakhli, R. and Moghadam, P.A. and Mi, H.Y. and Farahani, H. and Baras, A. and Gilks, B. and Bashashati, A.", TITLE = "Sparse Multi-Modal Graph Transformer with Shared-Context Processing for Representation Learning of Giga-pixel Images", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11547-11557", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149900"} @inproceedings{bb153979, AUTHOR = "Gartner, E. and Metz, L. and Andriluka, M. and Freeman, C.D. and Sminchisescu, C.", TITLE = "Transformer-Based Learned Optimization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11970-11979", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149901"} @inproceedings{bb153980, AUTHOR = "Li, J.C. and Hassani, A. and Walton, S. and Shi, H.", TITLE = "ConvMLP: Hierarchical Convolutional MLPs for Vision", BOOKTITLE = WFM23, YEAR = "2023", PAGES = "6307-6316", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149902"} @inproceedings{bb153981, AUTHOR = "Walmer, M. and Suri, S. and Gupta, K. and Shrivastava, A.", TITLE = "Teaching Matters: Investigating the Role of Supervision in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7486-7496", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149903"} @inproceedings{bb153982, AUTHOR = "Wang, S.G. and Xie, T. and Cheng, J. and Zhang, X.C. and Liu, H.J.", TITLE = "MDL-NAS: A Joint Multi-domain Learning Framework for Vision Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "20094-20104", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149904"} @inproceedings{bb153983, AUTHOR = "Ren, S. and Wei, F.Y. and Zhang, Z. and Hu, H.", TITLE = "TinyMIM: An Empirical Study of Distilling MIM Pre-trained Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "3687-3697", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149905"} @inproceedings{bb153984, AUTHOR = "He, J.F. and Gao, Y. and Zhang, T.Z. and Zhang, Z. and Wu, F.", TITLE = "D2Former: Jointly Learning Hierarchical Detectors and Contextual Descriptors via Agent-Based Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2904-2914", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149906"} @inproceedings{bb153985, AUTHOR = "Chen, X.Y. and Liu, Z.J. and Tang, H.T. and Yi, L. and Zhao, H. and Han, S.", TITLE = "SparseViT: Revisiting Activation Sparsity for Efficient High-Resolution Vision Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2061-2070", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149907"} @inproceedings{bb153986, AUTHOR = "Wei, S.Y. and Ye, T.Z. and Zhang, S. and Tang, Y. and Liang, J.J.", TITLE = "Joint Token Pruning and Squeezing Towards More Aggressive Compression of Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2092-2101", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149908"} @inproceedings{bb153987, AUTHOR = "Lin, Y.B. and Bertasius, G.", TITLE = "Siamese Vision Transformers are Scalable Audio-Visual Learners", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XIV: 303-321", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149909"} @inproceedings{bb153988, AUTHOR = "Lin, Y.B. and Sung, Y.L. and Lei, J. and Bansal, M. and Bertasius, G.", TITLE = "Vision Transformers are Parameter-Efficient Audio-Visual Learners", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2299-2309", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149910"} @inproceedings{bb153989, AUTHOR = "Das, R. and Dukler, Y. and Ravichandran, A. and Swaminathan, A.", TITLE = "Learning Expressive Prompting With Residuals for Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "3366-3377", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149911"} @inproceedings{bb153990, AUTHOR = "Zheng, M.X. and Lou, Q. and Jiang, L.", TITLE = "TrojViT: Trojan Insertion in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "4025-4034", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149912"} @inproceedings{bb153991, AUTHOR = "Li, Y.X. and Xu, C.", TITLE = "Trade-off between Robustness and Accuracy of Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7558-7568", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149913"} @inproceedings{bb153992, AUTHOR = "Tarasiou, M. and Chavez, E. and Zafeiriou, S.", TITLE = "ViTs for SITS: Vision Transformers for Satellite Image Time Series", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10418-10428", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149914"} @inproceedings{bb153993, AUTHOR = "Yu, Z.Z. and Wu, S. and Fu, Y.G. and Zhang, S. and Lin, Y.Y.C.", TITLE = "Hint-Aug: Drawing Hints from Foundation Vision Transformers towards Boosted Few-shot Parameter-Efficient Tuning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11102-11112", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149915"} @inproceedings{bb153994, AUTHOR = "Hou, J. and Dai, X.L. and He, Z.J. and Dai, A. and Nießner, M.", TITLE = "Mask3D: Pretraining 2D Vision Transformers by Learning Masked 3D Priors", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "13510-13519", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149916"} @inproceedings{bb153995, AUTHOR = "Xu, Z.Z. and Liu, R.K. and Yang, S. and Chai, Z.H. and Yuan, C.", TITLE = "Learning Imbalanced Data with Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15793-15803", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149917"} @inproceedings{bb153996, AUTHOR = "Zhang, J.P. and Huang, Y.Z. and Wu, W.B. and Lyu, M.R.", TITLE = "Transferable Adversarial Attacks on Vision Transformers with Token Gradient Regularization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "16415-16424", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149918"} @inproceedings{bb153997, AUTHOR = "Yang, H. and Yin, H.X. and Shen, M. and Molchanov, P. and Li, H. and Kautz, J.", TITLE = "Global Vision Transformer Pruning with Hessian-Aware Saliency", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18547-18557", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149919"} @inproceedings{bb153998, AUTHOR = "Nakamura, R. and Kataoka, H. and Takashima, S. and Noriega, E.J.M. and Yokota, R. and Inoue, N.", TITLE = "Pre-training Vision Transformers with Very Limited Synthesized Images", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20303-20312", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149920"} @inproceedings{bb153999, AUTHOR = "Takashima, S. and Hayamizu, R. and Inoue, N. and Kataoka, H. and Yokota, R.", TITLE = "Visual Atoms: Pre-Training Vision Transformers with Sinusoidal Waves", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18579-18588", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT149921"}