@inproceedings{bb140200, AUTHOR = "Frumkin, N. and Gope, D. and Marculescu, D.", TITLE = "Jumping through Local Minima: Quantization in the Loss Landscape of Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16932-16942", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136204"} @inproceedings{bb140201, AUTHOR = "Li, Z. and Xiao, J. and Yang, L. and Gu, Q.Y.", TITLE = "RepQ-ViT: Scale Reparameterization for Post-Training Quantization of Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17181-17190", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136205"} @inproceedings{bb140202, AUTHOR = "Havtorn, J.D. and Royer, A. and Blankevoort, T. and Bejnordi, B.E.", TITLE = "MSViT: Dynamic Mixed-scale Tokenization for Vision Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "838-848", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136206"} @inproceedings{bb140203, AUTHOR = "Haurum, J.B. and Escalera, S. and Taylor, G.W. and Moeslund, T.B.", TITLE = "Which Tokens to Use? Investigating Token Reduction in Vision Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "773-783", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136207"} @inproceedings{bb140204, AUTHOR = "Wang, X. and Chu, X.J. and Han, C. and Zhang, X.Y.", TITLE = "SCSC: Spatial Cross-scale Convolution Module to Strengthen both CNNs and Transformers", BOOKTITLE = NIVT23, YEAR = "2023", PAGES = "731-741", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136208"} @inproceedings{bb140205, AUTHOR = "Chen, Y.H. and Weng, Y.C. and Kao, C.H. and Chien, C. and Chiu, W.C. and Peng, W.H.", TITLE = "TransTIC: Transferring Transformer-based Image Compression from Human Perception to Machine Perception", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "23240-23250", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136209"} @inproceedings{bb140206, AUTHOR = "Li, Y. and Hu, J. and Wen, Y. and Evangelidis, G. and Salahi, K. and Wang, Y.Z. and Tulyakov, S. and Ren, J.", TITLE = "Rethinking Vision Transformers for MobileNet Size and Speed", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16843-16854", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136210"} @inproceedings{bb140207, AUTHOR = "Nurgazin, M. and Tu, N.A.", TITLE = "A Comparative Study of Vision Transformer Encoders and Few-shot Learning for Medical Image Classification", BOOKTITLE = CVAMD23, YEAR = "2023", PAGES = "2505-2513", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136211"} @inproceedings{bb140208, AUTHOR = "Yeganeh, Y. and Farshad, A. and Weinberger, P. and Ahmadi, S.A. and Adeli, E. and Navab, N.", TITLE = "Transformers Pay Attention to Convolutions Leveraging Emerging Properties of ViTs by Dual Attention-Image Network", BOOKTITLE = CVAMD23, YEAR = "2023", PAGES = "2296-2307", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136212"} @inproceedings{bb140209, AUTHOR = "Zheng, J.H. and Yang, L.Q. and Li, Y. and Yang, K. and Wang, Z.Y. and Zhou, J.", TITLE = "Lightweight Vision Transformer with Spatial and Channel Enhanced Self-Attention", BOOKTITLE = REDLCV23, YEAR = "2023", PAGES = "1484-1488", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136213"} @inproceedings{bb140210, AUTHOR = "Xie, W. and Zhao, Z. and Li, S.Y. and Zuo, B.H. and Wang, Y.G.", TITLE = "Nonrigid Object Contact Estimation With Regional Unwrapping Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "9308-9317", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136214"} @inproceedings{bb140211, AUTHOR = "Vasu, P.K.A. and Gabriel, J. and Zhu, J. and Tuzel, O. and Ranjan, A.", TITLE = "FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5762-5772", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136215"} @inproceedings{bb140212, AUTHOR = "Hyeon Woo, N. and Yu Ji, K. and Heo, B. and Han, D.Y. and Oh, S.J. and Oh, T.H.", TITLE = "Scratching Visual Transformer's Back with Uniform Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5784-5795", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136216"} @inproceedings{bb140213, AUTHOR = "Tang, C. and Zhang, L.L. and Jiang, H.Q. and Xu, J.H. and Cao, T. and Zhang, Q. and Yang, Y.Q. and Wang, Z. and Yang, M.", TITLE = "ElasticViT: Conflict-aware Supernet Training for Deploying Fast Vision Transformer on Diverse Mobile Devices", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5806-5817", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136217"} @inproceedings{bb140214, AUTHOR = "Ren, S. and Yang, X.Y. and Liu, S. and Wang, X.C.", TITLE = "SG-Former: Self-guided Transformer with Evolving Token Reallocation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5980-5991", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136218"} @inproceedings{bb140215, AUTHOR = "Lin, W.F. and Wu, Z.H. and Chen, J. and Huang, J. and Jin, L.W.", TITLE = "Scale-Aware Modulation Meet Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5992-6003", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136219"} @inproceedings{bb140216, AUTHOR = "Zhang, H.K. and Hu, W.Z. and Wang, X.Y.", TITLE = "Fcaformer: Forward Cross Attention in Hybrid Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "6037-6046", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136220"} @inproceedings{bb140217, AUTHOR = "He, Y.F. and Lou, Z.Y. and Zhang, L. and Liu, J. and Wu, W.J. and Zhou, H. and Zhuang, B.", TITLE = "BiViT: Extremely Compressed Binary Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5628-5640", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136221"} @inproceedings{bb140218, AUTHOR = "Dutson, M. and Li, Y. and Gupta, M.", TITLE = "Eventful Transformers: Leveraging Temporal Redundancy in Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16865-16877", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136222"} @inproceedings{bb140219, AUTHOR = "Wang, Z.Q. and Fang, Y.T. and Cao, J.H. and Zhang, Q. and Wang, Z. and Xu, R.", TITLE = "Masked Spiking Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "1761-1771", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136223"} @inproceedings{bb140220, AUTHOR = "Peebles, W. and Xie, S.", TITLE = "Scalable Diffusion Models with Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "4172-4182", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136224"} @inproceedings{bb140221, AUTHOR = "Zeng, W.X. and Li, M. and Xiong, W.J. and Tong, T. and Lu, W.J. and Tan, J. and Wang, R.S. and Huang, R.", TITLE = "MPCViT: Searching for Accurate and Efficient MPC-Friendly Vision Transformer with Heterogeneous Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5029-5040", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136225"} @inproceedings{bb140222, AUTHOR = "Mentzer, F. and Agustson, E. and Tschannen, M.", TITLE = "M2T: Masking Transformers Twice for Faster Decoding", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5317-5326", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136226"} @inproceedings{bb140223, AUTHOR = "Psomas, B. and Kakogeorgiou, I. and Karantzalos, K. and Avrithis, Y.", TITLE = "Keep It SimPool:Who Said Supervised Transformers Suffer from Attention Deficit?", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5327-5337", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136227"} @inproceedings{bb140224, AUTHOR = "Xiao, H. and Zheng, W.Z. and Zhu, Z. and Zhou, J. and Lu, J.W.", TITLE = "Token-Label Alignment for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5472-5481", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136228"} @inproceedings{bb140225, AUTHOR = "Yu, R.Y. and Wang, Z.N. and Wang, Y.H. and Li, K. and Liu, C. and Duan, H. and Ji, X.Y. and Chen, J.", TITLE = "LaPE: Layer-adaptive Position Embedding for Vision Transformers with Independent Layer Normalization", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5863-5873", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136229"} @inproceedings{bb140226, AUTHOR = "Roy, A. and Verma, V.K. and Voonna, S. and Ghosh, K. and Ghosh, S. and Das, A.", TITLE = "Exemplar-Free Continual Transformer with Convolutions", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5874-5884", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136230"} @inproceedings{bb140227, AUTHOR = "Xu, Y.X. and Li, C. and Li, D. and Sheng, X. and Jiang, F. and Tian, L. and Sirasao, A.", TITLE = "FDViT: Improve the Hierarchical Architecture of Vision Transformer", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5927-5937", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136231"} @inproceedings{bb140228, AUTHOR = "Han, D.C. and Pan, X. and Han, Y.Z. and Song, S. and Huang, G.", TITLE = "FLatten Transformer: Vision Transformer using Focused Linear Attention", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5938-5948", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136232"} @inproceedings{bb140229, AUTHOR = "Chen, Y.J. and Liu, H.M. and Yin, H.R. and Fan, B.", TITLE = "Building Vision Transformers with Hierarchy Aware Feature Aggregation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5885-5895", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136233"} @inproceedings{bb140230, AUTHOR = "Quetu, V. and Milovanovic, M. and Tartaglione, E.", TITLE = "Sparse Double Descent in Vision Transformers: Real or Phantom Threat?", BOOKTITLE = CIAP23, YEAR = "2023", PAGES = "II:490-502", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136234"} @inproceedings{bb140231, AUTHOR = "Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W.", TITLE = "Leveraging Efficient Training and Feature Fusion in Transformers for Multimodal Classification", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1420-1424", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136235"} @inproceedings{bb140232, AUTHOR = "Popovic, N. and Paudel, D.P. and Probst, T. and Van Gool, L.J.", TITLE = "Token-Consistent Dropout For Calibrated Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1030-1034", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136236"} @inproceedings{bb140233, AUTHOR = "Sajjadi, M.S.M. and Mahendran, A. and Kipf, T. and Pot, E. and Duckworth, D. and Lucic, M. and Greff, K.", TITLE = "RUST: Latent Neural Scene Representations from Unposed Imagery", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "17297-17306", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136237"} @inproceedings{bb140234, AUTHOR = "Bowman, B. and Achille, A. and Zancato, L. and Trager, M. and Perera, P. and Paolini, G. and Soatto, S.", TITLE = "À-la-carte Prompt Tuning (APT): Combining Distinct Data Via Composable Prompting", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14984-14993", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136238"} @inproceedings{bb140235, AUTHOR = "Nakhli, R. and Moghadam, P.A. and Mi, H.Y. and Farahani, H. and Baras, A. and Gilks, B. and Bashashati, A.", TITLE = "Sparse Multi-Modal Graph Transformer with Shared-Context Processing for Representation Learning of Giga-pixel Images", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11547-11557", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136239"} @inproceedings{bb140236, AUTHOR = "Gartner, E. and Metz, L. and Andriluka, M. and Freeman, C.D. and Sminchisescu, C.", TITLE = "Transformer-Based Learned Optimization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11970-11979", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136240"} @inproceedings{bb140237, AUTHOR = "Li, J.C. and Hassani, A. and Walton, S. and Shi, H.", TITLE = "ConvMLP: Hierarchical Convolutional MLPs for Vision", BOOKTITLE = WFM23, YEAR = "2023", PAGES = "6307-6316", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136241"} @inproceedings{bb140238, AUTHOR = "Walmer, M. and Suri, S. and Gupta, K. and Shrivastava, A.", TITLE = "Teaching Matters: Investigating the Role of Supervision in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7486-7496", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136242"} @inproceedings{bb140239, AUTHOR = "Wang, S.G. and Xie, T. and Cheng, J. and Zhang, X.C. and Liu, H.J.", TITLE = "MDL-NAS: A Joint Multi-domain Learning Framework for Vision Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "20094-20104", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136243"} @inproceedings{bb140240, AUTHOR = "Ko, D. and Choi, J. and Choi, H.K. and On, K.W. and Roh, B. and Kim, H.W.J.", TITLE = "MELTR: Meta Loss Transformer for Learning to Fine-tune Video Foundation Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "20105-20115", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136244"} @inproceedings{bb140241, AUTHOR = "Ren, S. and Wei, F.Y. and Zhang, Z. and Hu, H.", TITLE = "TinyMIM: An Empirical Study of Distilling MIM Pre-trained Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "3687-3697", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136245"} @inproceedings{bb140242, AUTHOR = "He, J.F. and Gao, Y. and Zhang, T.Z. and Zhang, Z. and Wu, F.", TITLE = "D2Former: Jointly Learning Hierarchical Detectors and Contextual Descriptors via Agent-Based Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2904-2914", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136246"} @inproceedings{bb140243, AUTHOR = "Chen, X.Y. and Liu, Z.J. and Tang, H.T. and Yi, L. and Zhao, H. and Han, S.", TITLE = "SparseViT: Revisiting Activation Sparsity for Efficient High-Resolution Vision Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2061-2070", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136247"} @inproceedings{bb140244, AUTHOR = "Wei, S.Y. and Ye, T.Z. and Zhang, S. and Tang, Y. and Liang, J.J.", TITLE = "Joint Token Pruning and Squeezing Towards More Aggressive Compression of Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2092-2101", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136248"} @inproceedings{bb140245, AUTHOR = "Lin, Y.B. and Sung, Y.L. and Lei, J. and Bansal, M. and Bertasius, G.", TITLE = "Vision Transformers are Parameter-Efficient Audio-Visual Learners", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2299-2309", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136249"} @inproceedings{bb140246, AUTHOR = "Das, R. and Dukler, Y. and Ravichandran, A. and Swaminathan, A.", TITLE = "Learning Expressive Prompting With Residuals for Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "3366-3377", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136250"} @inproceedings{bb140247, AUTHOR = "Zheng, M.X. and Lou, Q. and Jiang, L.", TITLE = "TrojViT: Trojan Insertion in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "4025-4034", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136251"} @inproceedings{bb140248, AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.", TITLE = "Improving Robustness of Vision Transformers by Reducing Sensitivity to Patch Corruptions", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "4108-4118", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136252"} @inproceedings{bb140249, AUTHOR = "Li, Y.X. and Xu, C.", TITLE = "Trade-off between Robustness and Accuracy of Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "7558-7568", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136253"} @inproceedings{bb140250, AUTHOR = "Tarasiou, M. and Chavez, E. and Zafeiriou, S.", TITLE = "ViTs for SITS: Vision Transformers for Satellite Image Time Series", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10418-10428", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136254"} @inproceedings{bb140251, AUTHOR = "Yu, Z.Z. and Wu, S. and Fu, Y.G. and Zhang, S. and Lin, Y.Y.C.", TITLE = "Hint-Aug: Drawing Hints from Foundation Vision Transformers towards Boosted Few-shot Parameter-Efficient Tuning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11102-11112", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136255"} @inproceedings{bb140252, AUTHOR = "Kim, D. and Angelova, A. and Kuo, W.C.", TITLE = "Region-Aware Pretraining for Open-Vocabulary Object Detection with Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11144-11154", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136256"} @inproceedings{bb140253, AUTHOR = "Hou, J. and Dai, X.L. and He, Z.J. and Dai, A. and Nießner, M.", TITLE = "Mask3D: Pretraining 2D Vision Transformers by Learning Masked 3D Priors", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "13510-13519", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136257"} @inproceedings{bb140254, AUTHOR = "Xu, Z.Z. and Liu, R. and Yang, S. and Chai, Z. and Yuan, C.", TITLE = "Learning Imbalanced Data with Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15793-15803", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136258"} @inproceedings{bb140255, AUTHOR = "Zhang, J.P. and Huang, Y.Z. and Wu, W.B. and Lyu, M.R.", TITLE = "Transferable Adversarial Attacks on Vision Transformers with Token Gradient Regularization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "16415-16424", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136259"} @inproceedings{bb140256, AUTHOR = "Yang, H. and Yin, H.X. and Shen, M. and Molchanov, P. and Li, H. and Kautz, J.", TITLE = "Global Vision Transformer Pruning with Hessian-Aware Saliency", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18547-18557", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136260"} @inproceedings{bb140257, AUTHOR = "Nakamura, R. and Kataoka, H. and Takashima, S. and Noriega, E.J.M. and Yokota, R. and Inoue, N.", TITLE = "Pre-training Vision Transformers with Very Limited Synthesized Images", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20303-20312", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136261"} @inproceedings{bb140258, AUTHOR = "Takashima, S. and Hayamizu, R. and Inoue, N. and Kataoka, H. and Yokota, R.", TITLE = "Visual Atoms: Pre-Training Vision Transformers with Sinusoidal Waves", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18579-18588", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136262"} @inproceedings{bb140259, AUTHOR = "Kang, D. and Koniusz, P. and Cho, M. and Murray, N.", TITLE = "Distilling Self-Supervised Vision Transformers for Weakly-Supervised Few-Shot Classification and Segmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19627-19638", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136263"} @inproceedings{bb140260, AUTHOR = "Liu, Y.J. and Yang, H.R. and Dong, Z. and Keutzer, K. and Du, L. and Zhang, S.H.", TITLE = "NoisyQuant: Noisy Bias-Enhanced Post-Training Activation Quantization for Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "20321-20330", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136264"} @inproceedings{bb140261, AUTHOR = "Park, J. and Johnson, J.", TITLE = "RGB No More: Minimally-Decoded JPEG Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22334-22346", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136265"} @inproceedings{bb140262, AUTHOR = "Yu, C. and Chen, T. and Gan, Z.X. and Fan, J.Y.", TITLE = "Boost Vision Transformer with GPU-Friendly Sparsity and Quantization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22658-22668", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136266"} @inproceedings{bb140263, AUTHOR = "Bao, F. and Nie, S. and Xue, K.W. and Cao, Y. and Li, C.X. and Su, H. and Zhu, J.", TITLE = "All are Worth Words: A ViT Backbone for Diffusion Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22669-22679", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136267"} @inproceedings{bb140264, AUTHOR = "Li, B. and Hu, Y. and Nie, X.C. and Han, C.Y. and Jiang, X.J. and Guo, T.D. and Liu, L.Q.", TITLE = "DropKey for Vision Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22700-22709", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136268"} @inproceedings{bb140265, AUTHOR = "Lan, S.Y. and Yang, X. and Yu, Z. and Wu, Z. and Alvarez, J.M. and Anandkumar, A.", TITLE = "Vision Transformers are Good Mask Auto-Labelers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23745-23755", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136269"} @inproceedings{bb140266, AUTHOR = "Yu, L. and Xiang, W.", TITLE = "X-Pruner: eXplainable Pruning for Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "24355-24363", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136270"} @inproceedings{bb140267, AUTHOR = "Singh, A.", TITLE = "Training Strategies for Vision Transformers for Object Detection", BOOKTITLE = WAD23, YEAR = "2023", PAGES = "110-118", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136271"} @inproceedings{bb140268, AUTHOR = "Hukkelas, H. and Lindseth, F.", TITLE = "Does Image Anonymization Impact Computer Vision Training?", BOOKTITLE = WAD23, YEAR = "2023", PAGES = "140-150", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136272"} @inproceedings{bb140269, AUTHOR = "Marnissi, M.A.", TITLE = "Revolutionizing Thermal Imaging: GAN-Based Vision Transformers for Image Enhancement", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "2735-2739", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136273"} @inproceedings{bb140270, AUTHOR = "Marnissi, M.A. and Fathallah, A.", TITLE = "GAN-based Vision Transformer for High-Quality Thermal Image Enhancement", BOOKTITLE = GCV23, YEAR = "2023", PAGES = "817-825", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136274"} @inproceedings{bb140271, AUTHOR = "Scheibenreif, L. and Mommert, M. and Borth, D.", TITLE = "Masked Vision Transformers for Hyperspectral Image Classification", BOOKTITLE = EarthVision23, YEAR = "2023", PAGES = "2166-2176", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136275"} @inproceedings{bb140272, AUTHOR = "Komorowski, P. and Baniecki, H. and Biecek, P.", TITLE = "Towards Evaluating Explanations of Vision Transformers for Medical Imaging", BOOKTITLE = XAI4CV23, YEAR = "2023", PAGES = "3726-3732", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136276"} @inproceedings{bb140273, AUTHOR = "Nalmpantis, A. and Panagiotopoulos, A. and Gkountouras, J. and Papakostas, K. and Aziz, W.", TITLE = "Vision DiffMask: Faithful Interpretation of Vision Transformers with Differentiable Patch Masking", BOOKTITLE = XAI4CV23, YEAR = "2023", PAGES = "3756-3763", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136277"} @inproceedings{bb140274, AUTHOR = "Ronen, T. and Levy, O. and Golbert, A.", TITLE = "Vision Transformers with Mixed-Resolution Tokenization", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4613-4622", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136278"} @inproceedings{bb140275, AUTHOR = "Le, P.H.C. and Li, X.", TITLE = "BinaryViT: Pushing Binary Vision Transformers Towards Convolutional Models", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4665-4674", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136279"} @inproceedings{bb140276, AUTHOR = "Ma, D. and Zhao, P.F. and Jiao, X.", TITLE = "PerfHD: Efficient ViT Architecture Performance Ranking using Hyperdimensional Computing", BOOKTITLE = NAS23, YEAR = "2023", PAGES = "2230-2237", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136280"} @inproceedings{bb140277, AUTHOR = "Wang, J. and Alamayreh, O. and Tondi, B. and Barni, M.", TITLE = "Open Set Classification of GAN-based Image Manipulations via a ViT-based Hybrid Architecture", BOOKTITLE = WMF23, YEAR = "2023", PAGES = "953-962", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136281"} @inproceedings{bb140278, AUTHOR = "Tian, R. and Wu, Z. and Dai, Q. and Hu, H. and Qiao, Y. and Jiang, Y.G.", TITLE = "ResFormer: Scaling ViTs with Multi-Resolution Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22721-22731", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136282"} @inproceedings{bb140279, AUTHOR = "Li, Y. and Min, K. and Tripathi, S. and Vasconcelos, N.M.", TITLE = "SViTT: Temporal Learning of Sparse Video-Text Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18919-18929", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136283"} @inproceedings{bb140280, AUTHOR = "Beyer, L. and Izmailov, P. and Kolesnikov, A. and Caron, M. and Kornblith, S. and Zhai, X.H. and Minderer, M. and Tschannen, M. and Alabdulmohsin, I. and Pavetic, F.", TITLE = "FlexiViT: One Model for All Patch Sizes", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14496-14506", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136284"} @inproceedings{bb140281, AUTHOR = "Chang, S.N. and Wang, P. and Lin, M. and Wang, F. and Zhang, D.J.H. and Jin, R. and Shou, M.Z.", TITLE = "Making Vision Transformers Efficient from A Token Sparsification View", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6195-6205", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136285"} @inproceedings{bb140282, AUTHOR = "Naeem, M.F. and Khan, M.G.Z.A. and Xian, Y.Q. and Afzal, M.Z. and Stricker, D. and Van Gool, L.J. and Tombari, F.", TITLE = "I2MVFormer: Large Language Model Generated Multi-View Document Supervision for Zero-Shot Image Classification", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15169-15179", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136286"} @inproceedings{bb140283, AUTHOR = "Phan, L. and Nguyen, H.T.H. and Warrier, H. and Gupta, Y.", TITLE = "Patch Embedding as Local Features: Unifying Deep Local and Global Features via Vision Transformer for Image Retrieval", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "II:204-221", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136287"} @inproceedings{bb140284, AUTHOR = "Guo, X.D. and Sun, Y. and Zhao, R. and Kuang, L.Q. and Han, X.", TITLE = "SWPT: Spherical Window-based Point Cloud Transformer", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "I:396-412", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136288"} @inproceedings{bb140285, AUTHOR = "Wang, W.J. and Chen, G. and Zhou, H.R. and Wang, X.L.", TITLE = "OVPT: Optimal Viewset Pooling Transformer for 3d Object Recognition", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "I:486-503", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136289"} @inproceedings{bb140286, AUTHOR = "Kim, D. and Kim, J.", TITLE = "Vision Transformer Compression and Architecture Exploration with Efficient Embedding Space Search", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "III:524-540", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136290"} @inproceedings{bb140287, AUTHOR = "Lee, Y.S. and Lee, G. and Ryoo, K. and Go, H. and Park, J. and Kim, S.", TITLE = "Towards Flexible Inductive Bias via Progressive Reparameterization Scheduling", BOOKTITLE = VIPriors22, YEAR = "2022", PAGES = "706-720", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136291"} @inproceedings{bb140288, AUTHOR = "Amir, S. and Gandelsman, Y. and Bagon, S. and Dekel, T.", TITLE = "On the Effectiveness of VIT Features as Local Semantic Descriptors", BOOKTITLE = SelfLearn22, YEAR = "2022", PAGES = "39-55", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136292"} @inproceedings{bb140289, AUTHOR = "Deng, X. and Liu, C.B. and Lu, Z.Y.", TITLE = "Recombining Vision Transformer Architecture for Fine-grained Visual Categorization", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "II: 127-138", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136293"} @inproceedings{bb140290, AUTHOR = "Tonkes, V. and Sabatelli, M.", TITLE = "How Well Do Vision Transformers (vts) Transfer to the Non-natural Image Domain? An Empirical Study Involving Art Classification", BOOKTITLE = VisArt22, YEAR = "2022", PAGES = "234-250", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136294"} @inproceedings{bb140291, AUTHOR = "Rangrej, S.B. and Liang, K.J. and Hassner, T. and Clark, J.J.", TITLE = "GliTr: Glimpse Transformers with Spatiotemporal Consistency for Online Action Prediction", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "3402-3412", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136295"} @inproceedings{bb140292, AUTHOR = "Liu, Y. and Matsoukas, C. and Strand, F. and Azizpour, H. and Smith, K.", TITLE = "PatchDropout: Economizing Vision Transformers Using Patch Dropout", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "3942-3951", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136296"} @inproceedings{bb140293, AUTHOR = "Song, C.H. and Yoon, J.Y. and Choi, S. and Avrithis, Y.", TITLE = "Boosting vision transformers for image retrieval", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "107-117", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136297"} @inproceedings{bb140294, AUTHOR = "Yang, J. and Liu, J.J. and Xu, N. and Huang, J.Z.", TITLE = "TVT: Transferable Vision Transformer for Unsupervised Domain Adaptation", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "520-530", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136298"} @inproceedings{bb140295, AUTHOR = "Saavedra Ruiz, M. and Morin, S. and Paull, L.", TITLE = "Monocular Robot Navigation with Self-Supervised Pretrained Vision Transformers", BOOKTITLE = CRV22, YEAR = "2022", PAGES = "197-204", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136299"} @inproceedings{bb140296, AUTHOR = "Patel, K. and Bur, A.M. and Li, F.J. and Wang, G.H.", TITLE = "Aggregating Global Features into Local Vision Transformer", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1141-1147", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136300"} @inproceedings{bb140297, AUTHOR = "Shen, Z.Q. and Liu, Z. and Xing, E.", TITLE = "Sliced Recursive Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:727-744", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136301"} @inproceedings{bb140298, AUTHOR = "Shao, Y. and Loy, C.C. and Dai, B.", TITLE = "Transformer with Implicit Edges for Particle-Based Physics Simulation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XIX:549-564", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136302"} @inproceedings{bb140299, AUTHOR = "Wang, W. and Zhang, J. and Cao, Y. and Shen, Y.L. and Tao, D.C.", TITLE = "Towards Data-Efficient Detection Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IX:88-105", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136303"}