@inproceedings{bb239700,
        AUTHOR = "Chen, J.H. and Yang, Z.Q. and Xu, H.Y.G. and Zhang, D.D. and Mylonas, G.",
        TITLE = "Multi-Agent Systems for Robotic Autonomy with LLMs",
        BOOKTITLE = MultiEmbodied25,
        YEAR = "2025",
        PAGES = "4194-4204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234618"}

@inproceedings{bb239701,
        AUTHOR = "Singh, K. and Singh, S. and Khanna, M.",
        TITLE = "Trishul: Towards Region Identification and Screen Hierarchy
Understanding for Large VLM Based GUI Agents",
        BOOKTITLE = "MULA25",
        YEAR = "2025",
        PAGES = "170-179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234619"}

@inproceedings{bb239702,
        AUTHOR = "Ardakani, M. and Malekar, J. and Zand, R.",
        TITLE = "LLMPi: Optimizing LLMs for High-Throughput on Raspberry Pi",
        BOOKTITLE = EDGE25,
        YEAR = "2025",
        PAGES = "6369-6378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234620"}

@inproceedings{bb239703,
        AUTHOR = "Villa, A. and Alcazar, J.L. and Soto, A. and Ghanem, B.",
        TITLE = "Behind the Magic, MERLIM: Multi-Modal Evaluation Benchmark for Large
Image-Language Models",
        BOOKTITLE = "AIBench25",
        YEAR = "2025",
        PAGES = "492-502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234621"}

@inproceedings{bb239704,
        AUTHOR = "Mei, G.F. and Lin, W. and Riz, L. and Wu, Y.J. and Poiesi, F. and Wang, Y.M.",
        TITLE = "PerLA: Perceptive 3D language assistant",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14369-14379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234622"}

@inproceedings{bb239705,
        AUTHOR = "Han, Y.D. and Guo, Q. and Pan, L.Y. and Liu, L. and Guan, Y. and Yang, M.",
        TITLE = "DynFocus: Dynamic Cooperative Network Empowers LLMs with Video
Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8512-8522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234623"}

@inproceedings{bb239706,
        AUTHOR = "Liu, Y. and Liang, Z.Y. and Wang, Y.Z. and Wu, X.F. and Tang, F.L. and He, M. and Li, J. and Liu, Z. and Yang, H. and Lim, S. and Zhao, B.",
        TITLE = "Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering
Incorrectly",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9087-9097",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234624"}

@inproceedings{bb239707,
        AUTHOR = "Wang, Z.T. and Hu, S.M. and Zhao, S.Y. and Lin, X.W. and Juefei Xu, F. and Li, Z. and Han, L. and Subramanyam, H. and Chen, L. and Chen, J. and Jiang, N. and Lyu, L. and Ma, S.Q. and Metaxas, D.N. and Jain, A.",
        TITLE = "MLLM-as-a-Judge for Image Safety without Human Labeling",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14657-14666",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234625"}

@inproceedings{bb239708,
        AUTHOR = "Zhu, M. and Tian, Y.Z. and Chen, H. and Zhou, C. and Guo, Q. and Liu, Y. and Yang, M. and Shen, C.H.",
        TITLE = "SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by
Imitating Human Annotator Trajectories",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3686-3696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234626"}

@inproceedings{bb239709,
        AUTHOR = "Zhu, L. and Chen, T.R. and Xu, Q.X. and Liu, X. and Ji, D. and Wu, H.Y. and Soh, D.W. and Liu, J.",
        TITLE = "POPEN: Preference-Based Optimization and Ensemble for LVLM-Based
Reasoning Segmentation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "30231-30240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234627"}

@inproceedings{bb239710,
        AUTHOR = "Niu, J. and Li, Y.F. and Miao, Z.Y. and Ge, C.J. and Zhou, Y.H. and He, Q.H. and Dong, X.Y. and Duan, H.D. and Ding, S. and Qian, R. and Zhang, P. and Zang, Y.H. and Cao, Y.H. and He, C.H. and Wang, J.Q.",
        TITLE = "OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video
Understanding?",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "18902-18913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234628"}

@inproceedings{bb239711,
        AUTHOR = "Xue, X.Y. and Lu, Z. and Huang, D. and Wang, Z.D. and Ouyang, W.L. and Bai, L.",
        TITLE = "ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously
Designing Collaborative AI Systems",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24614-24624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234629"}

@inproceedings{bb239712,
        AUTHOR = "Zhao, Z. and Huo, Y.Q. and Yue, T.T. and Guo, L.T. and Lu, H.Y. and Wang, B.N. and Chen, W.P. and Liu, J.",
        TITLE = "Efficient Motion-Aware Video MLLM",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24159-24168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234630"}

@inproceedings{bb239713,
        AUTHOR = "Wu, R.H. and Su, W. and Liao, J.",
        TITLE = "Chat2SVG: Vector Graphics Generation with Large Language Models and
Image Diffusion Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "23690-23700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234631"}

@inproceedings{bb239714,
        AUTHOR = "Yang, S. and Chen, Y. and Tian, Z. and Wang, C.Y. and Li, J.Y. and Yu, B. and Jia, J.Y.",
        TITLE = "VisionZip: Longer is Better but Not Necessary in Vision Language
Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19792-19802",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234632"}

@inproceedings{bb239715,
        AUTHOR = "Xie, J.Y. and Yang, J.T. and Luo, Z. and Cao, Y. and Gao, Q. and Zhang, M.Y. and Hu, W.P.",
        TITLE = "AdaDARE-y: Balancing Stability and Plasticity in Multi-modal LLMs
through Efficient Adaptation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19758-19768",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234633"}

@inproceedings{bb239716,
        AUTHOR = "Tao, K. and Qin, C. and You, H.X. and Sui, Y. and Wang, H.",
        TITLE = "DyCoke: Dynamic Compression of Tokens for Fast Video Large Language
Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "18992-19001",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234634"}

@inproceedings{bb239717,
        AUTHOR = "Tao, C.X. and Su, S.Q. and Zhu, X.Z. and Zhang, C.Y. and Chen, Z. and Liu, J. and Wang, W.H. and Lu, L.W. and Huang, G. and Qiao, Y. and Dai, J.F.",
        TITLE = "HoVLE: Unleashing the Power of Monolithic Vision-Language Models with
Holistic Vision-Language Embedding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14559-14569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234635"}

@inproceedings{bb239718,
        AUTHOR = "Yin, H. and Si, G.Z. and Wang, Z.",
        TITLE = "Lifting the Veil on Visual Information Flow in MLLMs: Unlocking
Pathways to Faster Inference",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9382-9391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234636"}

@inproceedings{bb239719,
        AUTHOR = "Yang, L.R. and Shen, D. and Cai, C.X. and Chen, K.B. and Yang, F. and Gao, T.T. and Zhang, D. and Li, X.",
        TITLE = "Libra-Merging: Importance-Redundancy and Pruning-Merging Trade-Off
for Acceleration Plug-In in Large Vision-Language Model",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9402-9412",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234637"}

@inproceedings{bb239720,
        AUTHOR = "Liang, Y. and Wang, Z.W. and Xu, X.W. and Zhou, J. and Lu, J.W.",
        TITLE = "EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language
Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9445-9454",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234638"}

@inproceedings{bb239721,
        AUTHOR = "Heo, M. and Chen, M.H. and Huang, D.A. and Liu, S. and Radhakrishnan, S. and Kim, S.J. and Wang, Y.C.A.F. and Hachiuma, R.",
        TITLE = "Omni-RGPT: Unifying Image and Video Region-level Understanding via
Token Marks",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3919-3930",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234639"}

@inproceedings{bb239722,
        AUTHOR = "Ouali, Y. and Bulat, A. and Xenos, A. and Zaganidis, A. and Metaxas, I.M. and Martinez, B. and Tzimiropoulos, G.",
        TITLE = "VladVA: Discriminative Fine-tuning of LVLMs",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4101-4111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234640"}

@inproceedings{bb239723,
        AUTHOR = "Schnaus, D. and Araslanov, N. and Cremers, D.",
        TITLE = "It's a (Blind) Match! Towards Vision-Language Correspondence without
Parallel Data",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24983-24992",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234641"}

@inproceedings{bb239724,
        AUTHOR = "Zhao, Y.Q. and Yin, Y.Y. and Li, L. and Lin, M. and Huang, V.S.J. and Chen, S.W. and Chen, W.P. and Yin, B. and Zhou, Z. and Zhang, W.T.",
        TITLE = "Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual
Knowledge",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24950-24959",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234642"}

@inproceedings{bb239725,
        AUTHOR = "Ye, X. and Gan, Y. and Huang, X. and Ge, Y.X. and Tang, Y.S.",
        TITLE = "VoCo-LLaMA: Towards Vision Compression with Large Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29836-29846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234643"}

@inproceedings{bb239726,
        AUTHOR = "Hu, Y. and Song, Z.K. and Feng, N. and Luo, Y. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.",
        TITLE = "SF2T: Self-supervised Fragment Finetuning of Video-LLMs for
Fine-Grained Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29108-29117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234644"}

@inproceedings{bb239727,
        AUTHOR = "Chen, J. and Zeng, Z.Y. and Lin, Y.Q. and Li, W. and Ma, Z. and Shou, M.Z.",
        TITLE = "Live: Learning Video LLM with Streaming Speech Transcription at Scale",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29083-29095",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234645"}

@inproceedings{bb239728,
        AUTHOR = "Wang, Z.W. and Chen, W.Z. and Yang, L. and Zhou, S. and Zhao, S. and Zhan, H. and Jin, J.C. and Li, L.C. and Shao, Z. and Bu, J.J.",
        TITLE = "MP-GUI: Modality Perception with MLLMs for GUI Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29711-29721",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234646"}

@inproceedings{bb239729,
        AUTHOR = "Vayani, A. and Dissanayake, D. and Watawana, H. and Ahsan, N. and Sasikumar, N. and Thawakar, O. and Ademtew, H.B. and Hmaiti, Y. and Kumar, A. and Kuckreja, K. and Maslych, M. and Ghallabi, W.A. and Mihaylov, M. and Qin, C. and Shaker, A.M. and Zhang, M. and Ihsani, M.K. and Esplana, A. and Gokani, M. and Mirkin, S. and Singh, H. and Srivastava, A. and Hamerlik, E. and Izzati, F.A. and Maani, F.A. and Cavada, S. and Chim, J. and Gupta, R. and Manjunath, S. and Zhumakhanova, K. and Rabevohitra, F.H. and Amirudin, A. and Ridzuan, M. and Kareem, D. and More, K. and Li, K. and Shakya, P. and Saad, M. and Ghasemaghaei, A. and Djanibekov, A. and Azizov, D. and Jankovic, B. and Bhatia, N. and Cabrera, A. and Obando Ceron, J. and Otieno, O. and Farestam, F. and Rabbani, M. and Baliah, S. and Sanjeev, S. and Shtanchaev, A. and Fatima, M. and Nguyen, T. and Kareem, A. and Aremu, T. and Xavier, N. and Bhatkal, A. and Toyin, H. and Chadha, A. and Cholakkal, H. and Anwer, R.M. and Felsberg, M. and Laaksonen, J. and Solorio, T. and Choudhury, M. and Laptev, I. and Shah, M. and Khan, S. and Khan, F.S.",
        TITLE = "All Languages Matter: Evaluating LMMs on Culturally Diverse 100
Languages",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19565-19575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234647"}

@inproceedings{bb239730,
        AUTHOR = "Cao, A. and Wei, X. and Ma, Z.H.",
        TITLE = "FLAME: Frozen Large Language Models Enable Data-Efficient
Language-Image Pre-training",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4080-4090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234648"}

@inproceedings{bb239731,
        AUTHOR = "Bi, J. and Guo, J.J. and Tang, Y.L. and Wen, L.G.B. and Liu, Z. and Wang, B.J. and Xu, C.L.",
        TITLE = "Unveiling Visual Perception in Language Models: An Attention Head
Analysis Approach",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4135-4144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234649"}

@inproceedings{bb239732,
        AUTHOR = "Li, S. and Hu, Y.C. and Ning, X.F. and Liu, X.H. and Hong, K. and Jia, X.T. and Li, X. and Yan, Y.Q. and Ran, P. and Dai, G.H. and Yan, S. and Yang, H.Z. and Wang, Y.",
        TITLE = "MBQ: Modality-Balanced Quantization for Large Vision-Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4167-4177",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234650"}

@inproceedings{bb239733,
        AUTHOR = "Liu, Z. and Li, Y.Q. and Nguyen, K.D. and Zhong, Y. and Li, Y.",
        TITLE = "PAVE: Patching and Adapting Video Large Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3306-3317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234651"}

@inproceedings{bb239734,
        AUTHOR = "Malakouti, S. and Aghazadeh, A. and Khandelwal, A. and Kovashka, A.",
        TITLE = "Benchmarking VLMs' Reasoning About Persuasive Atypical Images",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "4788-4798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234652"}

@inproceedings{bb239735,
        AUTHOR = "Lee, H. and Seo, G. and Choi, W. and Jung, G. and Song, K. and Jung, J.Y.",
        TITLE = "Enhancing Visual Classification Using Comparative Descriptors",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5274-5283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234653"}

@inproceedings{bb239736,
        AUTHOR = "Ee, Y.K. and Zhang, H. and Matyasko, A. and Fernando, B.",
        TITLE = "Deduce and Select Evidences with Language Models for Training-Free
Video Goal Inference",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5937-5947",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234654"}

@inproceedings{bb239737,
        AUTHOR = "Fu, R. and Liu, J.Y. and Chen, X. and Nie, Y.X. and Xiong, W.H.",
        TITLE = "Scene-LLM: Extending Language Model for 3D Visual Reasoning",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "2195-2206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234655"}

@inproceedings{bb239738,
        AUTHOR = "Awais, M. and Alharthi, A.H.S.A. and Kumar, A. and Cholakkal, H. and Anwer, R.M.",
        TITLE = "AgroGPT: Efficient Agricultural Vision-Language Model with Expert
Tuning",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5687-5696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234656"}

@inproceedings{bb239739,
        AUTHOR = "Kruzhkov, E. and Behnke, S.",
        TITLE = "LiLMaps: Learnable Implicit Language Maps",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "7711-7720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234657"}

@inproceedings{bb239740,
        AUTHOR = "Singh, C.K. and Kumar, D. and Sanap, V. and Sinha, R.",
        TITLE = "LLM-RSPF: Large Language Model-Based Robotic System Planning
Framework for Domain Specific Use-cases",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "7277-7286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234658"}

@inproceedings{bb239741,
        AUTHOR = "Sun, L. and Ahuja, C. and Chen, P. and D'Zmura, M. and Batmanghelich, K. and Bontrager, P.",
        TITLE = "Multi-Modal Large Language Models are Effective Vision Learners",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8617-8626",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234659"}

@inproceedings{bb239742,
        AUTHOR = "Tateno, M. and Yagi, T. and Furuta, R. and Sato, Y.",
        TITLE = "Learning Multiple Object States from Actions via Large Language
Models",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "9555-9565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234660"}

@inproceedings{bb239743,
        AUTHOR = "Bahadir, C.D. and Akar, G.B. and Sabuncu, M.R.",
        TITLE = "LLM-Generated Rewrite and Context Modulation for Enhanced Vision
Language Models in Digital Pathology",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "327-336",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234661"}

@inproceedings{bb239744,
        AUTHOR = "Chu, X.X. and Su, J.L. and Zhang, B. and Shen, C.H.",
        TITLE = "VisionlLaMA: A Unified LLaMA Backbone for Vision Tasks",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVI: 1-18",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234662"}

@inproceedings{bb239745,
        AUTHOR = "Long, F.C. and Qiu, Z.F. and Yao, T. and Mei, T.",
        TITLE = "VideoStudio: Generating Consistent-content and Multi-scene Videos",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LX: 468-485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234663"}

@inproceedings{bb239746,
        AUTHOR = "Kong, X.H. and Chen, J. and Wang, W.G. and Su, H. and Hu, X.L. and Yang, Y. and Liu, S.",
        TITLE = "Controllable Navigation Instruction Generation with Chain of Thought
Prompting",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXIX: 37-54",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234664"}

@inproceedings{bb239747,
        AUTHOR = "Zhu, W.Y.C. and Ye, K. and Ke, J.J. and Yu, J.H. and Guibas, L.J. and Milanfar, P. and Yang, F.",
        TITLE = "ARTVLM: Attribute Recognition Through Vision-based Prefix Language
Modeling",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXVII: 127-145",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234665"}

@inproceedings{bb239748,
        AUTHOR = "Kim, D. and Cho, S. and Kim, S. and Luo, C. and Hong, S.",
        TITLE = "Chameleon: A Data-efficient Generalist for Dense Visual Prediction in
the Wild",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXIII: 422-441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234666"}

@inproceedings{bb239749,
        AUTHOR = "Ke, F. and Cai, Z.X. and Jahangard, S. and Wang, W.Q. and Haghighi, P.D. and Rezatofighi, H.",
        TITLE = "Hydra: A Hyper Agent for Dynamic Compositional Visual Reasoning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XX: 132-149",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234667"}

@inproceedings{bb239750,
        AUTHOR = "Bao, X.Y. and Sun, S.Y. and Ma, S.L. and Zheng, K.C. and Guo, Y.X. and Zhao, G.S. and Zheng, Y. and Wang, X.G.",
        TITLE = "Cores: Orchestrating the Dance of Reasoning and Segmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVIII: 187-204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234668"}

@inproceedings{bb239751,
        AUTHOR = "Liu, Z. and Liu, B. and Wang, J.H. and Dong, Y.H. and Chen, G.Y. and Rao, Y.M. and Krishna, R. and Lu, J.W.",
        TITLE = "Efficient Inference of Vision Instruction-following Models with Elastic
Cache",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVII: 54-69",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234669"}

@inproceedings{bb239752,
        AUTHOR = "Alaluf, Y. and Richardson, E. and Tulyakov, S. and Aberman, K. and Cohen Or, D.",
        TITLE = "MYVLM: Personalizing VLMS for User-specific Queries",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XIII: 73-91",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234670"}

@inproceedings{bb239753,
        AUTHOR = "Ma, Z.X. and Huang, W. and Zhang, J. and Gupta, T. and Krishna, R.",
        TITLE = "m&m's: A Benchmark to Evaluate Tool-use for multi-step multi-modal
Tasks",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "X: 18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234671"}

@inproceedings{bb239754,
        AUTHOR = "Miao, Y. and Engelmann, F. and Vysotska, O. and Zhao, Z.H. and Chai, W.H. and Wang, X. and Li, B. and Hao, S.Y. and Cao, S.D. and Ye, T. and Wang, G.A.",
        TITLE = "See and Think: Embodied Agent in Virtual Environment",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "VIII: 187-204",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234672"}

@inproceedings{bb239755,
        AUTHOR = "Liu, Y. and Duan, H.D. and Zhang, Y.H. and Li, B. and Zhang, S.Y. and Zhao, W. and Yuan, Y. and Wang, J.Q. and He, C.H. and Liu, Z.W. and Chen, K. and Lin, D.",
        TITLE = "MMBENCH: Is Your Multi-Modal Model an All-Around Player?",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "VI: 216-233",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234673"}

@inproceedings{bb239756,
        AUTHOR = "Liu, Y. and Ding, P.X. and Huang, S. and Zhang, M. and Zhao, H. and Wang, D.L.",
        TITLE = "PITE: Pixel-Temporal Alignment for Large Video-Language Model",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "V: 160-176",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234674"}

@inproceedings{bb239757,
        AUTHOR = "Panagopoulou, A. and Xue, L. and Yu, N. and Li, J. and Li, D.X. and Joty, S. and Xu, R. and Savarese, S. and Xiong, C.M. and Niebles, J.C.",
        TITLE = "X-instructblip: A Framework for Aligning Image, 3d, Audio, Video to
LLMs and its Emergent Cross-modal Reasoning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLV: 177-197",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234675"}

@inproceedings{bb239758,
        AUTHOR = "Mirza, M.J. and Karlinsky, L. and Lin, W. and Doveh, S. and Micorek, J. and Kozinski, M. and Kuehne, H. and Possegger, H.",
        TITLE = "Meta-prompting for Automating Zero-shot Visual Recognition with LLMs",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "II: 370-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234676"}

@inproceedings{bb239759,
        AUTHOR = "Liu, Z.Y. and Lai, Z.Q. and Gao, Z.W. and Cui, E. and Li, Z.H. and Zhu, X.Z. and Lu, L.W. and Chen, Q.F. and Qiao, Y. and Dai, J.F. and Wang, W.H.",
        TITLE = "ControlLLM: Augment Language Models with Tools by Searching on Graphs",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XII: 89-105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234677"}

@inproceedings{bb239760,
        AUTHOR = "Yao, Y. and Hsu, C.F. and Lin, J.H. and Xie, H.X. and Lin, T. and Huang, Y.N. and Shuai, H.H. and Cheng, W.H.",
        TITLE = "The Fabrication of Reality and Fantasy: Scene Generation with
LLM-assisted Prompt Interpretation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXII: 422-438",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234678"}

@inproceedings{bb239761,
        AUTHOR = "Wu, Y.X. and Wang, Y.Z. and Tang, S.X. and Wu, W.H. and He, T. and Ouyang, W.L. and Torr, P.H.S. and Wu, J.",
        TITLE = "Dettoolchain: A New Prompting Paradigm to Unleash Detection Ability of
MLLM",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXII: 164-182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234679"}

@inproceedings{bb239762,
        AUTHOR = "Wang, H. and Ye, Y.J. and Wang, Y.J. and Nie, Y.X. and Huang, C.",
        TITLE = "Elysium: Exploring Object-level Perception in Videos via MLLM",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXII: 166-185",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234680"}

@inproceedings{bb239763,
        AUTHOR = "Guo, Z.H. and Xu, R. and Yao, Y. and Cui, J. and Ni, Z. and Ge, C.J. and Chua, T.S. and Liu, Z.Y. and Huang, G.",
        TITLE = "LLAVA-UHD: An LMM Perceiving Any Aspect Ratio and High-resolution
Images",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXXIII: 390-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234681"}

@inproceedings{bb239764,
        AUTHOR = "Zhou, G.Z. and Hong, Y.C. and Wang, Z. and Wang, X.E. and Wu, Q.",
        TITLE = "NAVGPT-2: Unleashing Navigational Reasoning Capability for Large
Vision-language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "VII: 260-278",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234682"}

@inproceedings{bb239765,
        AUTHOR = "Wei, H.R. and Kong, L.Y. and Chen, J.Y. and Zhao, L. and Ge, Z. and Wei, J.R.Y.H.R. and Wang, T. and Ge, Z. and Zhang, X.Y. and Tao, W.B.",
        TITLE = "Vary: Scaling up the Vision Vocabulary for Large Vision-language Model",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "IV: 408-424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234683"}

@inproceedings{bb239766,
        AUTHOR = "He, S.T. and Ding, H.H. and Jiang, X.D. and Wen, B.",
        TITLE = "Segpoint: Segment Any Point Cloud via Large Language Model",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXII: 349-367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234684"}

@inproceedings{bb239767,
        AUTHOR = "Murugesan, B. and Silva Rodriguez, J. and Ben Ayed, I. and Dolz, J.",
        TITLE = "Robust Calibration of Large Vision-language Adapters",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXIV: 147-165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234685"}

@inproceedings{bb239768,
        AUTHOR = "Xu, R. and Wang, X.L. and Wang, T. and Chen, Y.L. and Pang, J.M. and Lin, D.",
        TITLE = "Pointllm: Empowering Large Language Models to Understand Point Clouds",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXV: 131-147",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234686"}

@inproceedings{bb239769,
        AUTHOR = "Cai, K.W. and Duan, Z.K. and Liu, G. and Fleming, C. and Lu, C.X.X.",
        TITLE = "Self-adapting Large Visual-language Models to Edge Devices Across
Visual Modalities",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXVIII: 301-318",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234687"}

@inproceedings{bb239770,
        AUTHOR = "Yu, R. and Yu, W.H. and Wang, X.C.",
        TITLE = "Attention Prompting on Image for Large Vision-language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXX: 251-268",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234688"}

@inproceedings{bb239771,
        AUTHOR = "Luo, Y.L. and An, R.C. and Zou, B.C. and Tang, Y.M. and Liu, J.M. and Zhang, S.H.",
        TITLE = "Llm as Dataset Analyst: Subpopulation Structure Discovery with Large
Language Model",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXIII: 235-252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234689"}

@inproceedings{bb239772,
        AUTHOR = "Huang, Z.J. and Tang, T. and Chen, S.X. and Lin, S. and Jie, Z.Q. and Ma, L. and Wang, G. and Liang, X.D.",
        TITLE = "Making Large Language Models Better Planners with Reasoning-decision
Alignment",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXVI: 73-90",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234690"}

@inproceedings{bb239773,
        AUTHOR = "Zhan, Y.F. and Zhu, Y. and Chen, Z.Y. and Yang, F. and Tang, M. and Wang, J.Q.",
        TITLE = "Griffon: Spelling Out All Object Locations at Any Granularity with
Large Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLII: 405-422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234691"}

@inproceedings{bb239774,
        AUTHOR = "Li, Y.W. and Wang, C.Y. and Jia, J.Y.",
        TITLE = "Llama-vid: An Image is Worth 2 Tokens in Large Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLVI: 323-340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234692"}

@inproceedings{bb239775,
        AUTHOR = "Ju, C. and Wang, H. and Cheng, H.Z. and Chen, X. and Zhai, Z.H. and Huang, W.L. and Lan, J.S. and Xiao, S. and Zheng, B.",
        TITLE = "Turbo: Informativity-driven Acceleration Plug-in for Vision-language
Large Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLVI: 436-455",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234693"}

@inproceedings{bb239776,
        AUTHOR = "Zhao, Q. and Xu, M. and Gupta, K. and Asthana, A. and Zheng, L. and Gould, S.",
        TITLE = "The First to Know: How Token Distributions Reveal Hidden Knowledge in
Large Vision-language Models?",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLVIII: 127-142",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234694"}

@inproceedings{bb239777,
        AUTHOR = "Lee, B.K. and Park, B. and Kim, C.W. and Ro, Y.M.",
        TITLE = "Moai: Mixture of All Intelligence for Large Language and Vision Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIX: 273-302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234695"}

@inproceedings{bb239778,
        AUTHOR = "Liu, R. and Li, C. and Tang, H.R. and Ge, Y.X. and Shan, Y. and Li, G.",
        TITLE = "ST-LLM: Large Language Models Are Effective Temporal Learners",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LVII: 1-18",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234696"}

@inproceedings{bb239779,
        AUTHOR = "Cheng, H. and Xiao, E. and Gu, J.D. and Yang, L. and Duan, J. and Zhang, J. and Cao, J.H. and Xu, K.D. and Xu, R.",
        TITLE = "Unveiling Typographic Deceptions: Insights of the Typographic
Vulnerability in Large Vision-language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LIX: 179-196",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234697"}

@inproceedings{bb239780,
        AUTHOR = "Lin, Z. and Liu, D.Y. and Zhang, R.R. and Gao, P. and Qiu, L.T. and Xiao, H. and Qiu, H. and Shao, W.Q. and Chen, K.Q. and Han, J.M. and Huang, S.Y. and Zhang, Y. and He, X.M. and Qiao, Y. and Li, H.S.",
        TITLE = "Sphinx: A Mixer of Weights, Visual Embeddings and Image Scales for
Multi-modal Large Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXII: 36-55",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234698"}

@inproceedings{bb239781,
        AUTHOR = "Chiquier, M. and Mall, U. and Vondrick, C.",
        TITLE = "Evolving Interpretable Visual Classifiers with Large Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXIV: 183-201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234699"}

@inproceedings{bb239782,
        AUTHOR = "Chen, L. and Zhao, H.Z. and Liu, T.Y. and Bai, S. and Lin, J.Y. and Zhou, C. and Chang, B.",
        TITLE = "An Image is Worth 1/2 Tokens After Layer 2: Plug-and-play Inference
Acceleration for Large Vision-language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXXI: 19-35",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234700"}

@inproceedings{bb239783,
        AUTHOR = "Shi, B.F. and Wu, Z.Y. and Mao, M.L. and Wang, X. and Darrell, T.J.",
        TITLE = "When Do We Not Need Larger Vision Models?",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "VIII: 444-462",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234701"}

@inproceedings{bb239784,
        AUTHOR = "Yu, Q.H. and Shen, X.H. and Chen, L.C.",
        TITLE = "Towards Open-ended Visual Recognition with Large Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XIV: 359-376",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234702"}

@inproceedings{bb239785,
        AUTHOR = "Huang, K. and Zou, H. and Xi, Y. and Wang, B.C. and Xie, Z. and Yu, L.",
        TITLE = "IVTP: Instruction-guided Visual Token Pruning for Large Vision-language
Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVII: 214-230",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234703"}

@inproceedings{bb239786,
        AUTHOR = "Liu, H.T. and Li, C.Y. and Li, Y.H. and Lee, Y.J.",
        TITLE = "Improved Baselines with Visual Instruction Tuning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26286-26296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234704"}

@inproceedings{bb239787,
        AUTHOR = "Azad, S. and Jain, Y. and Garg, R. and Rawat, Y.S. and Vineet, V.",
        TITLE = "Understanding Depth and Height Perception in Large Visual-Language
Models",
        BOOKTITLE = WhatNext25,
        YEAR = "2025",
        PAGES = "3611-3620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234705"}

@inproceedings{bb239788,
        AUTHOR = "Schiappa, M. and Abdullah, R. and Azad, S. and Claypoole, J. and Cogswell, M. and Divakaran, A. and Rawat, Y.",
        TITLE = "Probing Conceptual Understanding of Large Visual-Language Models",
        BOOKTITLE = WhatNext24,
        YEAR = "2024",
        PAGES = "1797-1807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234706"}

@inproceedings{bb239789,
        AUTHOR = "Yue, T.T. and Cheng, J. and GUo, L.T. and Dai, X.Y. and Zhao, Z. and He, X.J. and Xiong, G. and Lv, Y.S. and Liu, J.",
        TITLE = "SC- Tune: Unleashing Self-Consistent Referential Comprehension in
Large Vision Language Models",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13073-13083",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234707"}

@inproceedings{bb239790,
        AUTHOR = "Wu, T.H. and Lian, L. and Gonzalez, J.E. and Li, B. and Darrell, T.J.",
        TITLE = "Self-Correcting LLM-Controlled Diffusion Models",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "6327-6336",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234708"}

@inproceedings{bb239791,
        AUTHOR = "Zheng, D. and Huang, S. and Zhao, L. and Zhong, Y. and Wang, L.W.",
        TITLE = "Towards Learning a Generalist Model for Embodied Navigation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13624-13634",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234709"}

@inproceedings{bb239792,
        AUTHOR = "Singh, S. and Fore, M. and Stamoulis, D.",
        TITLE = "GeoLLM-Engine: A Realistic Environment for Building Geospatial
Copilots",
        BOOKTITLE = EarthVision24,
        YEAR = "2024",
        PAGES = "585-594",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234710"}

@inproceedings{bb239793,
        AUTHOR = "Zhang, Y.C. and Qian, S.J. and Peng, B. and Liu, S. and Jia, J.Y.",
        TITLE = "Prompt Highlighter: Interactive Control for Multi-Modal LLMs",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13215-13224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234711"}

@inproceedings{bb239794,
        AUTHOR = "Wang, D.K. and Xuan, S.Y. and Zhang, S.L.",
        TITLE = "LocLLM: Exploiting Generalizable Human Keypoint Localization via
Large Language Model",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "614-623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234712"}

@inproceedings{bb239795,
        AUTHOR = "Liu, H.C. and Zhan, X.H. and Huang, S.L. and Mu, T.J. and Shan, Y.",
        TITLE = "Programmable Motion Generation for Open-Set Motion Control Tasks",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "1399-1408",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234713"}

@inproceedings{bb239796,
        AUTHOR = "Zhao, L. and Yang, Y. and Zhang, K. and Shao, W.Q. and Zhang, Y.X. and Qiao, Y. and Luo, P. and Ji, R.R.",
        TITLE = "DiffAgent: Fast and Accurate Text-to-Image API Selection with Large
Language Model",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "6390-6399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234714"}

@inproceedings{bb239797,
        AUTHOR = "Yao, J. and Liu, Y.J. and Dong, Z. and Guo, M.F. and Hu, H. and Keutzer, K. and Du, L. and Zhou, D. and Zhang, S.H.",
        TITLE = "PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "7027-7037",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234715"}

@inproceedings{bb239798,
        AUTHOR = "Cai, Z.P. and Mueller, M. and Birkl, R. and Wofk, D. and Tseng, S.Y. and Cheng, J. and Stan, G.B.M. and Lai, V. and Paulitsch, M.",
        TITLE = "L-MAGIC: Language Model Assisted Generation of Images with Coherence",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "7049-7058",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234716"}

@inproceedings{bb239799,
        AUTHOR = "Li, Y.Y. and Liu, X. and Kag, A. and Hu, J. and Idelbayev, Y. and Sagar, D. and Wang, Y.Z. and Tulyakov, S. and Ren, J.",
        TITLE = "TextCraftor: Your Text Encoder can be Image Quality Controller",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "7985-7995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT234717"}

Last update:Jan 16, 2026 at 20:03:35