@inproceedings{bb132300, AUTHOR = "Chen, C. and Zhang, J. and Yang, K. and Peng, K. and Stiefelhagen, R.", TITLE = "Trans4Map: Revisiting Holistic Bird's-Eye-View Mapping from Egocentric Images to Allocentric Semantics with Vision Transformers", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4002-4011", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128389"} @inproceedings{bb132301, AUTHOR = "Lan, H. and Wang, X. and Shen, H. and Liang, P. and Wei, X.", TITLE = "Couplformer: Rethinking Vision Transformer with Coupling Attention", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "6464-6473", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128390"} @inproceedings{bb132302, AUTHOR = "Marin, D. and Chang, J.H.R. and Ranjan, A. and Prabhu, A. and Rastegari, M. and Tuzel, O.", TITLE = "Token Pooling in Vision Transformers for Image Classification", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "12-21", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128391"} @inproceedings{bb132303, AUTHOR = "Song, C.H. and Yoon, J.Y. and Choi, S. and Avrithis, Y.", TITLE = "Boosting vision transformers for image retrieval", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "107-117", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128392"} @inproceedings{bb132304, AUTHOR = "Yang, J. and Liu, J.J. and Xu, N. and Huang, J.Z.", TITLE = "TVT: Transferable Vision Transformer for Unsupervised Domain Adaptation", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "520-530", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128393"} @inproceedings{bb132305, AUTHOR = "Lin, K.E. and Yen Chen, L. and Lai, W.S. and Lin, T.Y. and Shih, Y.C. and Ramamoorthi, R.", TITLE = "Vision Transformer for NeRF-Based View Synthesis from a Single Input Image", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "806-815", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128394"} @inproceedings{bb132306, AUTHOR = "Saavedra Ruiz, M. and Morin, S. and Paull, L.", TITLE = "Monocular Robot Navigation with Self-Supervised Pretrained Vision Transformers", BOOKTITLE = CRV22, YEAR = "2022", PAGES = "197-204", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128395"} @inproceedings{bb132307, AUTHOR = "Debnath, B. and Po, O. and Chowdhury, F.A. and Chakradhar, S.", TITLE = "Cosine Similarity based Few-Shot Video Classifier with Attention-based Aggregation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1273-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128396"} @inproceedings{bb132308, AUTHOR = "Patel, K. and Bur, A.M. and Li, F.J. and Wang, G.H.", TITLE = "Aggregating Global Features into Local Vision Transformer", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1141-1147", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128397"} @inproceedings{bb132309, AUTHOR = "Shen, Z.Q. and Liu, Z. and Xing, E.", TITLE = "Sliced Recursive Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:727-744", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128398"} @inproceedings{bb132310, AUTHOR = "Shao, Y. and Loy, C.C. and Dai, B.", TITLE = "Transformer with Implicit Edges for Particle-Based Physics Simulation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XIX:549-564", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128399"} @inproceedings{bb132311, AUTHOR = "Wang, W. and Zhang, J. and Cao, Y. and Shen, Y.L. and Tao, D.C.", TITLE = "Towards Data-Efficient Detection Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IX:88-105", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128400"} @inproceedings{bb132312, AUTHOR = "Mari, C.R. and Gonzalez, D.V. and Bou Balust, E.", TITLE = "Multi-Scale Transformer-Based Feature Combination for Image Retrieval", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3166-3170", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128401"} @inproceedings{bb132313, AUTHOR = "Lorenzana, M.B. and Engstrom, C. and Chandra, S.S.", TITLE = "Transformer Compressed Sensing Via Global Image Tokens", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3011-3015", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128402"} @inproceedings{bb132314, AUTHOR = "Furukawa, R. and Hotta, K.", TITLE = "Local Embedding for Axial Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2586-2590", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128403"} @inproceedings{bb132315, AUTHOR = "Lu, X.Y. and Du, S.", TITLE = "NCTR: Neighborhood Consensus Transformer for Feature Matching", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2726-2730", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128404"} @inproceedings{bb132316, AUTHOR = "Jeny, A.A. and Junayed, M.S. and Islam, M.B.", TITLE = "An Efficient End-To-End Image Compression Transformer", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1786-1790", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128405"} @inproceedings{bb132317, AUTHOR = "Shang, J.H. and Kahatapitiya, K. and Li, X. and Ryoo, M.S.", TITLE = "StARformer: Transformer with State-Action-Reward Representations for Visual Reinforcement Learning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:462-479", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128406"} @inproceedings{bb132318, AUTHOR = "Kakogeorgiou, I. and Gidaris, S. and Psomas, B. and Avrithis, Y. and Bursuc, A. and Karantzalos, K. and Komodakis, N.", TITLE = "What to Hide from Your Students: Attention-Guided Masked Image Modeling", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXX:300-318", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128407"} @inproceedings{bb132319, AUTHOR = "Bai, J.W. and Yuan, L. and Xia, S.T. and Yan, S.C. and Li, Z.F. and Liu, W.", TITLE = "Improving Vision Transformers by Revisiting High-Frequency Components", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:1-18", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128408"} @inproceedings{bb132320, AUTHOR = "Ding, M.Y. and Xiao, B. and Codella, N. and Luo, P. and Wang, J.D. and Yuan, L.", TITLE = "DaViT: Dual Attention Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:74-92", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128409"} @inproceedings{bb132321, AUTHOR = "Li, K. and Yu, R. and Wang, Z. and Yuan, L. and Song, G. and Chen, J.", TITLE = "Locality Guidance for Improving Vision Transformers on Tiny Datasets", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:110-127", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128410"} @inproceedings{bb132322, AUTHOR = "Wang, P. and Wang, X. and Wang, F. and Lin, M. and Chang, S. and Li, H. and Jin, R.", TITLE = "KVT: k-NN Attention for Boosting Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:285-302", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128411"} @inproceedings{bb132323, AUTHOR = "Tu, Z.Z. and Talebi, H. and Zhang, H. and Yang, F. and Milanfar, P. and Bovik, A.C. and Li, Y.", TITLE = "MaxViT: Multi-axis Vision Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:459-479", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128412"} @inproceedings{bb132324, AUTHOR = "Yang, R. and Ma, H.L. and Wu, J. and Tang, Y.S. and Xiao, X.F. and Zheng, M. and Li, X.", TITLE = "ScalableViT: Rethinking the Context-Oriented Generalization of Vision Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:480-496", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128413"} @inproceedings{bb132325, AUTHOR = "Touvron, H. and Cord, M. and El Nouby, A. and Verbeek, J. and Jegou, H.", TITLE = "Three Things Everyone Should Know About Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:497-515", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128414"} @inproceedings{bb132326, AUTHOR = "Touvron, H. and Cord, M. and Jegou, H.", TITLE = "DeiT III: Revenge of the ViT", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:516-533", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128415"} @inproceedings{bb132327, AUTHOR = "Li, Y.H. and Mao, H.Z. and Girshick, R. and He, K.M.", TITLE = "Exploring Plain Vision Transformer Backbones for Object Detection", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IX:280-296", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128416"} @inproceedings{bb132328, AUTHOR = "Yu, Q.H. and Wang, H.Y. and Qiao, S.Y. and Collins, M. and Zhu, Y.K. and Adam, H. and Yuille, A.L. and Chen, L.C.", TITLE = "k-means Mask Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:288-307", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128417"} @inproceedings{bb132329, AUTHOR = "Lezama, J. and Chang, H. and Jiang, L. and Essa, I.", TITLE = "Improved Masked Image Generation with Token-Critic", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIII:70-86", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128418"} @inproceedings{bb132330, AUTHOR = "Rao, Y.M. and Zhao, W. and Zhou, J. and Lu, J.W.", TITLE = "AMixer: Adaptive Weight Mixing for Self-Attention Free Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXI:50-67", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128419"} @inproceedings{bb132331, AUTHOR = "Pham, K. and Kafle, K. and Lin, Z. and Ding, Z.H. and Cohen, S. and Tran, Q. and Shrivastava, A.", TITLE = "Improving Closed and Open-Vocabulary Attribute Prediction Using Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:201-219", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128420"} @inproceedings{bb132332, AUTHOR = "Yu, W.X. and Zhang, H. and Lan, T.X. and Hu, Y.C. and Yin, D.", TITLE = "CBPT: A New Backbone for Enhancing Information Transmission of Vision Transformers", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "156-160", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128421"} @inproceedings{bb132333, AUTHOR = "Takeda, M. and Yanai, K.", TITLE = "Continual Learning in Vision Transformer", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "616-620", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128422"} @inproceedings{bb132334, AUTHOR = "Zhou, W.L. and Kamata, S.I. and Luo, Z. and Xue, X.", TITLE = "Rethinking Unified Spectral-Spatial-Based Hyperspectral Image Classification Under 3D Configuration of Vision Transformer", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "711-715", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128423"} @inproceedings{bb132335, AUTHOR = "Li, A. and Jiao, J. and Li, N. and Qi, W. and Xu, W. and Pang, M.", TITLE = "Conmw Transformer: A General Vision Transformer Backbone With Merged-Window Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1551-1555", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128424"} @inproceedings{bb132336, AUTHOR = "Li, J. and Zhang, H. and Xie, C.", TITLE = "ViP: Unified Certified Detection and Recovery for Patch Attack with Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:573-587", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128425"} @inproceedings{bb132337, AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.", TITLE = "VSA: Learning Varied-Size Window Attention in Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:466-483", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128426"} @inproceedings{bb132338, AUTHOR = "Cao, Y.H. and Yu, H. and Wu, J.X.", TITLE = "Training Vision Transformers with only 2040 Images", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:220-237", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128427"} @inproceedings{bb132339, AUTHOR = "Wang, C. and Xu, H.M. and Zhang, X. and Wang, L. and Zheng, Z. and Liu, H.F.", TITLE = "Convolutional Embedding Makes Hierarchical Vision Transformer Stronger", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XX:739-756", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128428"} @inproceedings{bb132340, AUTHOR = "Wu, B. and Gu, J.D. and Li, Z.F. and Cai, D. and He, X.F. and Liu, W.", TITLE = "Towards Efficient Adversarial Training on Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XIII:307-325", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128429"} @inproceedings{bb132341, AUTHOR = "Gu, J.D. and Tresp, V. and Qin, Y.", TITLE = "Are Vision Transformers Robust to Patch Perturbations?", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XII:404-421", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128430"} @inproceedings{bb132342, AUTHOR = "Zong, Z. and Li, K. and Song, G. and Wang, Y. and Qiao, Y. and Leng, B. and Liu, Y.", TITLE = "Self-slimmed Vision Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:432-448", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128431"} @inproceedings{bb132343, AUTHOR = "Fayyaz, M. and Koohpayegani, S.A. and Jafari, F.R. and Sengupta, S. and Joze, H.R.V. and Sommerlade, E. and Pirsiavash, H. and Gall, J.", TITLE = "Adaptive Token Sampling for Efficient Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:396-414", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128432"} @inproceedings{bb132344, AUTHOR = "Li, Z.K. and Ma, L.P. and Chen, M.J. and Xiao, J.R. and Gu, Q.Y.", TITLE = "Patch Similarity Aware Data-Free Quantization for Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:154-170", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128433"} @inproceedings{bb132345, AUTHOR = "Weng, Z.J. and Yang, X.T. and Li, A. and Wu, Z.X. and Jiang, Y.G.", TITLE = "Semi-supervised Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXX:605-620", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128434"} @inproceedings{bb132346, AUTHOR = "Mallick, R. and Benois Pineau, J. and Zemmari, A.", TITLE = "I Saw: A Self-Attention Weighted Method for Explanation of Visual Transformers", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3271-3275", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128435"} @inproceedings{bb132347, AUTHOR = "Su, T. and Ye, S. and Song, C.Q. and Cheng, J.", TITLE = "Mask-Vit: an Object Mask Embedding in Vision Transformer for Fine-Grained Visual Classification", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1626-1630", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128436"} @inproceedings{bb132348, AUTHOR = "Gai, L. and Chen, W. and Gao, R. and Chen, Y.W. and Qiao, X.", TITLE = "Using Vision Transformers in 3-D Medical Image Classifications", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "696-700", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128437"} @inproceedings{bb132349, AUTHOR = "Wu, K. and Zhang, J. and Peng, H. and Liu, M. and Xiao, B. and Fu, J.L. and Yuan, L.", TITLE = "TinyViT: Fast Pretraining Distillation for Small Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXI:68-85", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128438"} @inproceedings{bb132350, AUTHOR = "Gao, L. and Nie, D. and Li, B. and Ren, X.F.", TITLE = "Doubly-Fused ViT: Fuse Information from Vision Transformer Doubly with Local Representation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIII:744-761", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128439"} @inproceedings{bb132351, AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Ngo, C.W. and Mei, T.", TITLE = "Wave-ViT: Unifying Wavelet and Transformers for Visual Representation Learning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:328-345", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128440"} @inproceedings{bb132352, AUTHOR = "Yuan, Z.H. and Xue, C.H. and Chen, Y.Q. and Wu, Q. and Sun, G.", TITLE = "PTQ4ViT: Post-training Quantization for Vision Transformers with Twin Uniform Quantization", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XII:191-207", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128441"} @inproceedings{bb132353, AUTHOR = "Kong, Z.L. and Dong, P.Y. and Ma, X.L. and Meng, X. and Niu, W. and Sun, M.S. and Shen, X. and Yuan, G. and Ren, B. and Tang, H. and Qin, M. and Wang, Y.Z.", TITLE = "SPViT: Enabling Faster Vision Transformers via Latency-Aware Soft Token Pruning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:620-640", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128442"} @inproceedings{bb132354, AUTHOR = "Pan, J. and Bulat, A. and Tan, F. and Zhu, X.T. and Dudziak, L. and Li, H.S. and Tzimiropoulos, G. and Martinez, B.", TITLE = "EdgeViTs: Competing Light-Weight CNNs on Mobile Devices with Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XI:294-311", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128443"} @inproceedings{bb132355, AUTHOR = "Xu, R.S. and Xiang, H. and Tu, Z.Z. and Xia, X. and Yang, M.H. and Ma, J.Q.", TITLE = "V2X-ViT: Vehicle-to-Everything Cooperative Perception with Vision Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:107-124", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128444"} @inproceedings{bb132356, AUTHOR = "Liu, Y. and Mai, S.Q. and Chen, X.N. and Hsieh, C.J. and You, Y.", TITLE = "Towards Efficient and Scalable Sharpness-Aware Minimization", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12350-12360", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128445"} @inproceedings{bb132357, AUTHOR = "Ren, P.Z. and Li, C. and Wang, G. and Xiao, Y. and Du, Q. and Liang, X.D. and Chang, X.J.", TITLE = "Beyond Fixation: Dynamic Window Visual Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11977-11987", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128446"} @inproceedings{bb132358, AUTHOR = "Liu, Z. and Hu, H. and Lin, Y.T. and Yao, Z.L. and Xie, Z.D. and Wei, Y.X. and Ning, J. and Cao, Y. and Zhang, Z. and Dong, L. and Wei, F. and Guo, B.", TITLE = "Swin Transformer V2: Scaling Up Capacity and Resolution", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11999-12009", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128447"} @inproceedings{bb132359, AUTHOR = "Bhattacharjee, D. and Zhang, T. and Susstrunk, S. and Salzmann, M.", TITLE = "MuIT: An End-to-End Multitask Learning Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12021-12031", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128448"} @inproceedings{bb132360, AUTHOR = "Fang, J. and Xie, L.X. and Wang, X.G. and Zhang, X.P. and Liu, W.Y. and Tian, Q.", TITLE = "MSG-Transformer: Exchanging Local Spatial Information by Manipulating Messenger Tokens", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12053-12062", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128449"} @inproceedings{bb132361, AUTHOR = "Sandler, M. and Zhmoginov, A. and Vladymyrov, M. and Jackson, A.", TITLE = "Fine-tuning Image Transformers using Learnable Memory", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12145-12154", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128450"} @inproceedings{bb132362, AUTHOR = "Yu, X. and Tang, L. and Rao, Y.M. and Huang, T.J. and Zhou, J. and Lu, J.W.", TITLE = "Point-BERT: Pre-training 3D Point Cloud Transformers with Masked Point Modeling", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19291-19300", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128451"} @inproceedings{bb132363, AUTHOR = "Park, C. and Jeong, Y. and Cho, M. and Park, J.", TITLE = "Fast Point Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16928-16937", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128452"} @inproceedings{bb132364, AUTHOR = "Ren, S. and Zhou, D. and He, S.F. and Feng, J.S. and Wang, X.C.", TITLE = "Shunted Self-Attention via Multi-Scale Token Aggregation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10843-10852", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128453"} @inproceedings{bb132365, AUTHOR = "Zeng, W. and Jin, S. and Liu, W.T. and Qian, C. and Luo, P. and Ouyang, W.L. and Wang, X.G.", TITLE = "Not All Tokens Are Equal: Human-centric Visual Analysis via Token Clustering Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11091-11101", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128454"} @inproceedings{bb132366, AUTHOR = "Yu, W.H. and Luo, M. and Zhou, P. and Si, C.Y. and Zhou, Y.C. and Wang, X.C. and Feng, J.S. and Yan, S.C.", TITLE = "MetaFormer is Actually What You Need for Vision", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10809-10819", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128455"} @inproceedings{bb132367, AUTHOR = "Xie, Z.D. and Zhang, Z. and Cao, Y. and Lin, Y.T. and Bao, J.M. and Yao, Z.L. and Dai, Q. and Hu, H.", TITLE = "SimMIM: a Simple Framework for Masked Image Modeling", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "9643-9653", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128456"} @inproceedings{bb132368, AUTHOR = "Song, Z. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.", TITLE = "Transformer Tracking with Cyclic Shifting Window Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8781-8790", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128457"} @inproceedings{bb132369, AUTHOR = "Tu, Z.Z. and Talebi, H. and Zhang, H. and Yang, F. and Milanfar, P. and Bovik, A. and Li, Y.X.", TITLE = "MAXIM: Multi-Axis MLP for Image Processing", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5759-5770", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128458"} @inproceedings{bb132370, AUTHOR = "Yun, S. and Lee, H. and Kim, J. and Shin, J.", TITLE = "Patch-level Representation Learning for Self-supervised Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8344-8353", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128459"} @inproceedings{bb132371, AUTHOR = "Hou, Z.J. and Kung, S.Y.", TITLE = "Multi-Dimensional Vision Transformer Compression via Dependency Guided Gaussian Process Search", BOOKTITLE = EVW22, YEAR = "2022", PAGES = "3668-3677", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128460"} @inproceedings{bb132372, AUTHOR = "Zhang, G.J. and Luo, Z.P. and Yu, Y.C. and Cui, K. and Lu, S.J.", TITLE = "Accelerating DETR Convergence via Semantic-Aligned Matching", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "939-948", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128461"} @inproceedings{bb132373, AUTHOR = "Gupta, A. and Narayan, S. and Joseph, K.J. and Khan, S. and Khan, F.S. and Shah, M.", TITLE = "OW-DETR: Open-world Detection Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "9225-9234", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128462"} @inproceedings{bb132374, AUTHOR = "Lou, Q. and Hsu, Y.C. and Uzkent, B. and Hua, T. and Shen, Y. and Jin, H.X.", TITLE = "Lite-MDETR: A Lightweight Multi-Modal Detector", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12196-12205", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128463"} @inproceedings{bb132375, AUTHOR = "Li, F. and Zhang, H. and Liu, S. and Guo, J. and Ni, L.M. and Zhang, L.", TITLE = "DN-DETR: Accelerate DETR Training by Introducing Query DeNoising", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "13609-13617", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128464"} @inproceedings{bb132376, AUTHOR = "La Bonte, T. and Song, Y. and Wang, X. and Vineet, V. and Joshi, N.", TITLE = "Scaling Novel Object Detection with Weakly Supervised Detection Transformers", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "85-96", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128465"} @inproceedings{bb132377, AUTHOR = "Bar, A. and Wang, X. and Kantorov, V. and Reed, C.J. and Herzig, R. and Chechik, G. and Rohrbach, A. and Darrell, T.J. and Globerson, A.", TITLE = "DETReg: Unsupervised Pretraining with Region Priors for Object Detection", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14585-14595", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128466"} @inproceedings{bb132378, AUTHOR = "Salman, H. and Jain, S. and Wong, E. and Madry, A.", TITLE = "Certified Patch Robustness via Smoothed Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15116-15126", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128467"} @inproceedings{bb132379, AUTHOR = "Wang, Y.K. and Chen, X.H. and Cao, L. and Huang, W.B. and Sun, F.C. and Wang, Y.H.", TITLE = "Multimodal Token Fusion for Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12176-12185", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128468"} @inproceedings{bb132380, AUTHOR = "Tang, Y. and Han, K. and Wang, Y.H. and Xu, C. and Guo, J.Y. and Xu, C. and Tao, D.C.", TITLE = "Patch Slimming for Efficient Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12155-12164", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128469"} @inproceedings{bb132381, AUTHOR = "Zhang, J. and Peng, H. and Wu, K. and Liu, M. and Xiao, B. and Fu, J.L. and Yuan, L.", TITLE = "MiniViT: Compressing Vision Transformers with Weight Multiplexing", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12135-12144", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128470"} @inproceedings{bb132382, AUTHOR = "Chen, J.N. and Sun, S. and He, J. and Torr, P.H.S. and Yuille, A.L. and Bai, S.", TITLE = "TransMix: Attend to Mix for Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12125-12134", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128471"} @inproceedings{bb132383, AUTHOR = "Dong, X.Y. and Bao, J. and Chen, D.D. and Zhang, W.M. and Yu, N.H. and Yuan, L. and Chen, D. and Guo, B.", TITLE = "CSWin Transformer: A General Vision Transformer Backbone with Cross-Shaped Windows", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12114-12124", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128472"} @inproceedings{bb132384, AUTHOR = "Liu, H. and Jiang, X.H. and Li, X. and Bao, Z.M. and Jiang, D.Q. and Ren, B.", TITLE = "NomMer: Nominate Synergistic Context in Vision Transformer for Visual Recognition", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12063-12072", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128473"} @inproceedings{bb132385, AUTHOR = "Chen, T.L. and Zhang, Z.Y. and Cheng, Y. and Awadallah, A. and Wang, Z.Y.", TITLE = "The Principle of Diversity: Training Stronger Vision Transformers Calls for Reducing All Levels of Redundancy", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12010-12020", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128474"} @inproceedings{bb132386, AUTHOR = "Yang, C. and Wang, Y. and Zhang, J.M. and Zhang, H. and Wei, Z.J. and Lin, Z. and Yuille, A.L.", TITLE = "Lite Vision Transformer with Enhanced Self-Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11988-11998", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128475"} @inproceedings{bb132387, AUTHOR = "Yin, H.X. and Vahdat, A. and Alvarez, J.M. and Mallya, A. and Kautz, J. and Molchanov, P.", TITLE = "A-ViT: Adaptive Tokens for Efficient Vision Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10799-10808", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128476"} @inproceedings{bb132388, AUTHOR = "Lu, J.H. and Zhang, X.S. and Zhao, T.L. and He, X.Y. and Cheng, J.", TITLE = "APRIL: Finding the Achilles' Heel on Privacy for Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10041-10050", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128477"} @inproceedings{bb132389, AUTHOR = "Hatamizadeh, A. and Yin, H.X. and Roth, H. and Li, W.Q. and Kautz, J. and Xu, D. and Molchanov, P.", TITLE = "GradViT: Gradient Inversion of Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10011-10020", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128478"} @inproceedings{bb132390, AUTHOR = "Zhang, H. and Duan, J.R. and Xue, M.Q. and Song, J. and Sun, L. and Song, M.L.", TITLE = "Bootstrapping ViTs: Towards Liberating Vision Transformers from Pre-training", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8934-8943", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128479"} @inproceedings{bb132391, AUTHOR = "Chavan, A. and Shen, Z.Q. and Liu, Z. and Liu, Z. and Cheng, K.T. and Xing, E.", TITLE = "Vision Transformer Slimming: Multi-Dimension Searching in Continuous Optimization Space", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "4921-4931", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128480"} @inproceedings{bb132392, AUTHOR = "Xia, Z.F. and Pan, X. and Song, S. and Li, L.E. and Huang, G.", TITLE = "Vision Transformer with Deformable Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "4784-4793", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128481"} @inproceedings{bb132393, AUTHOR = "Hong, W.X. and Lao, J.W. and Ren, W. and Wang, J. and Chen, J.D. and Chu, W.", TITLE = "Training Object Detectors from Scratch: An Empirical Study in the Era of Vision Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "4652-4661", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128482"} @inproceedings{bb132394, AUTHOR = "Chen, Z.Y. and Li, B. and Wu, S. and Xu, J.H. and Ding, S.H. and Zhang, W.Q.", TITLE = "Shape Matters: Deformable Patch Attack", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IV:529-548", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128483"} @inproceedings{bb132395, AUTHOR = "Chen, Z.Y. and Li, B. and Xu, J.H. and Wu, S. and Ding, S.H. and Zhang, W.Q.", TITLE = "Towards Practical Certifiable Patch Defense with Vision Transformer", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15127-15137", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128484"} @inproceedings{bb132396, AUTHOR = "Chen, R.J. and Chen, C. and Li, Y.C. and Chen, T.Y. and Trister, A.D. and Krishnan, R.G. and Mahmood, F.", TITLE = "Scaling Vision Transformers to Gigapixel Images via Hierarchical Self-Supervised Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16123-16134", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128485"} @inproceedings{bb132397, AUTHOR = "Yang, Z. and Wang, J.Q. and Tang, Y.S. and Chen, K. and Zhao, H.S. and Torr, P.H.S.", TITLE = "LAVT: Language-Aware Vision Transformer for Referring Image Segmentation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18134-18144", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128486"} @inproceedings{bb132398, AUTHOR = "Scheibenreif, L. and Hanna, J. and Mommert, M. and Borth, D.", TITLE = "Self-supervised Vision Transformers for Land-cover Segmentation and Classification", BOOKTITLE = EarthVision22, YEAR = "2022", PAGES = "1421-1430", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128487"} @inproceedings{bb132399, AUTHOR = "Zhai, X.H. and Kolesnikov, A. and Houlsby, N. and Beyer, L.", TITLE = "Scaling Vision Transformers", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "1204-1213", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT128488"}