Keith Price Bibliography Bibtex Entry (ANCHOR 133100 URL http://dx.doi.org/10.1109/ICPR.2018.8545396 TYPE CONFERENCE PAGES 2422-2427 YEAR 2018 MONTH NIL BIBSOURCE http://www.visionbib.com/bibliography/match607ian2.html#TT129139 VOLUME NIL JOURNAL ICPR18 AUTHOR Jiu, M. and Sahbi, H. and Qi, L. TITLE Deep Context Networks for Image Annotation)


@inproceedings{bb133100,
        AUTHOR = "Jiu, M. and Sahbi, H. and Qi, L.",
        TITLE = "Deep Context Networks for Image Annotation",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "2422-2427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT129139"}

@inproceedings{bb133101,
        AUTHOR = "Khatchatoorian, A.G. and Jamzad, M.",
        TITLE = "Post Rectifying Methods to Improve the Accuracy of Image Annotation",
        BOOKTITLE = DICTA17,
        YEAR = "2017",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT129140"}

@inproceedings{bb133102,
        AUTHOR = "Pellegrin, L. and Escalante, H.J. and Montes y Gomez, M. and Villegas, M. and Gonzalez, F.A.",
        TITLE = "A Flexible Framework for the Evaluation of Unsupervised Image
Annotation",
        BOOKTITLE = CIARP17,
        YEAR = "2017",
        PAGES = "508-516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT129141"}

@inproceedings{bb133103,
        AUTHOR = "Tripathi, A. and Gupta, A. and Chaudhary, S. and Lall, B.",
        TITLE = "Image Annotation Using Latent Components and Transmedia Association",
        BOOKTITLE = PReMI17,
        YEAR = "2017",
        PAGES = "493-500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT129142"}

@inproceedings{bb133104,
        AUTHOR = "Wu, B.Y. and Jia, F. and Liu, W. and Ghanem, B.",
        TITLE = "Diverse Image Annotation",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6194-6202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT129143"}

@article{bb133105,
        AUTHOR = "Gao, L.L. and Guo, Z. and Zhang, H.W. and Xu, X. and Shen, H.T.",
        TITLE = "Video Captioning With Attention-Based LSTM and Semantic Consistency",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2045-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129144"}

@article{bb133106,
        AUTHOR = "Bin, Y. and Yang, Y. and Shen, F. and Xie, N. and Shen, H.T. and Li, X.",
        TITLE = "Describing Video With Attention-Based Bidirectional LSTM",
        JOURNAL = Cyber,
        VOLUME = "49",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2631-2641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129145"}

@article{bb133107,
        AUTHOR = "Fu, K. and Jin, J.Q. and Cui, R.P. and Sha, F. and Zhang, C.S.",
        TITLE = "Aligning Where to See and What to Tell: Image Captioning with
Region-Based Attention and Scene-Specific Contexts",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129146"}

@article{bb133108,
        AUTHOR = "Xiao, C.M. and Yang, Q. and Xu, X.Q. and Zhang, J.W. and Zhou, F. and Zhang, C.S.",
        TITLE = "Where you edit is what you get: Text-guided image editing with
region-based attention",
        JOURNAL = PR,
        VOLUME = "139",
        YEAR = "2023",
        PAGES = "109458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129147"}

@article{bb133109,
        AUTHOR = "Nian, F.D. and Li, T. and Wang, Y. and Wu, X.Y. and Ni, B.B. and Xu, C.S.",
        TITLE = "Learning explicit video attributes from mid-level representation for
video captioning",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "126-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129148"}

@article{bb133110,
        AUTHOR = "Ye, S. and Han, J. and Liu, N.",
        TITLE = "Attentive Linear Transformation for Image Captioning",
        JOURNAL = IP,
        VOLUME = "27",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5514-5524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129149"}

@article{bb133111,
        AUTHOR = "Xian, Y. and Tian, Y.",
        TITLE = "Self-Guiding Multimodal LSTM: When We Do Not Have a Perfect Training
Dataset for Image Captioning",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5241-5252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129150"}

@article{bb133112,
        AUTHOR = "Peng, Y.Q. and Liu, X. and Wang, W.H. and Zhao, X.S. and Wei, M.",
        TITLE = "Image caption model of double LSTM with scene factors",
        JOURNAL = IVC,
        VOLUME = "86",
        YEAR = "2019",
        PAGES = "38-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129151"}

@article{bb133113,
        AUTHOR = "Wu, L. and Xu, M. and Wang, J. and Perry, S.",
        TITLE = "Recall What You See Continually Using GridLSTM in Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "808-818",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129152"}

@article{bb133114,
        AUTHOR = "Deng, Z.R. and Jiang, Z.Q. and Lan, R. and Huang, W.M. and Luo, X.N.",
        TITLE = "Image captioning using DenseNet network and adaptive attention",
        JOURNAL = SP:IC,
        VOLUME = "85",
        YEAR = "2020",
        PAGES = "115836",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129153"}

@article{bb133115,
        AUTHOR = "Ji, J. and Xu, C. and Zhang, X. and Wang, B. and Song, X.",
        TITLE = "Spatio-Temporal Memory Attention for Image Captioning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        PAGES = "7615-7628",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129154"}

@article{bb133116,
        AUTHOR = "Che, W.B. and Fan, X.P. and Xiong, R.Q. and Zhao, D.B.",
        TITLE = "Visual Relationship Embedding Network for Image Paragraph Generation",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2307-2320",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129155"}

@article{bb133117,
        AUTHOR = "Zhang, J. and Li, K.K. and Wang, Z.",
        TITLE = "Parallel-fusion LSTM with synchronous semantic and visual information
for image captioning",
        JOURNAL = JVCIR,
        VOLUME = "75",
        YEAR = "2021",
        PAGES = "103044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129156"}

@article{bb133118,
        AUTHOR = "He, S. and Lu, Y.Y. and Chen, S.N.",
        TITLE = "Image Captioning Algorithm Based on Multi-Branch CNN and Bi-LSTM",
        JOURNAL = IEICE,
        VOLUME = "E104-D",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "941-947",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129157"}

@article{bb133119,
        AUTHOR = "Yuan, J. and Zhu, S. and Huang, S.Y. and Zhang, H.W. and Xiao, Y.Q. and Li, Z.Y. and Wang, M.",
        TITLE = "Discriminative Style Learning for Cross-Domain Image Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "1723-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129158"}

@inproceedings{bb133120,
        AUTHOR = "Zhou, Y. and Zhang, Y. and Hu, Z.Z. and Wang, M.",
        TITLE = "Semi-Autoregressive Transformer for Image Captioning",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3132-3136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129159"}

@article{bb133121,
        AUTHOR = "Lv, G. and Sun, Y.N. and Nian, F. and Zhu, M.F. and Tang, W.L. and Hu, Z.Z.",
        TITLE = "COME: Clip-OCR and Master ObjEct for text image captioning",
        JOURNAL = IVC,
        VOLUME = "136",
        YEAR = "2023",
        PAGES = "104751",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129160"}

@inproceedings{bb133122,
        AUTHOR = "Niu, Z.X. and Zhou, M. and Wang, L. and Gao, X.B. and Hua, G.",
        TITLE = "Hierarchical Multimodal LSTM for Dense Visual-Semantic Embedding",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1899-1907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129161"}

@inproceedings{bb133123,
        AUTHOR = "Tan, Y.H. and Chan, C.S.",
        TITLE = "phi-LSTM: A Phrase-Based Hierarchical LSTM Model for Image Captioning",
        BOOKTITLE = ACCV16,
        YEAR = "2016",
        PAGES = "V: 101-117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129162"}

@inproceedings{bb133124,
        AUTHOR = "Wang, M. and Song, L. and Yang, X.K. and Luo, C.F.",
        TITLE = "A parallel-fusion RNN-LSTM architecture for image caption generation",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "4448-4452",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT129163"}

@article{bb133125,
        AUTHOR = "Verma, Y. and Jawahar, C.V.",
        TITLE = "A support vector approach for cross-modal search of images and texts",
        JOURNAL = CVIU,
        VOLUME = "154",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "48-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129164"}

@inproceedings{bb133126,
        AUTHOR = "Dutta, A. and Verma, Y. and Jawahar, C.V.",
        TITLE = "Recurrent Image Annotation with Explicit Inter-Label Dependencies",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129165"}

@article{bb133127,
        AUTHOR = "Xue, J.F. and Eguchi, K.",
        TITLE = "Video Data Modeling Using Sequential Correspondence Hierarchical
Dirichlet Processes",
        JOURNAL = IEICE,
        VOLUME = "E100-D",
        YEAR = "2017",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "33-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129166"}

@article{bb133128,
        AUTHOR = "Liu, A.A. and Xu, N. and Wong, Y.K. and Li, J. and Su, Y.T. and Kankanhalli, M.",
        TITLE = "Hierarchical & multimodal video captioning: Discovering and
transferring multimodal knowledge for vision to language",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "113-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129167"}

@article{bb133129,
        AUTHOR = "Guan, J.N. and Wang, E.",
        TITLE = "Repeated review based image captioning for image evidence review",
        JOURNAL = SP:IC,
        VOLUME = "63",
        YEAR = "2018",
        PAGES = "141-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129168"}

@article{bb133130,
        AUTHOR = "Hu, M. and Yang, Y. and Shen, F. and Zhang, L. and Shen, H.T. and Li, X.",
        TITLE = "Robust Web Image Annotation via Exploring Multi-Facet and Structural
Knowledge",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "4871-4884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129169"}

@article{bb133131,
        AUTHOR = "Gil Gonzalez, J. and Alvarez Meza, A. and Orozco Gutierrez, A.",
        TITLE = "Learning from multiple annotators using kernel alignment",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "150-156",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129170"}

@article{bb133132,
        AUTHOR = "Zheng, H. and Wu, J.H. and Liang, R. and Li, Y. and Li, X.Z.",
        TITLE = "Multi-task learning for captioning images with novel words",
        JOURNAL = IET-CV,
        VOLUME = "13",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "April",
        PAGES = "294-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129171"}

@article{bb133133,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Towards Personalized Image Captioning via Multimodal Memory Networks",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "999-1012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129172"}

@inproceedings{bb133134,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Attend to You: Personalized Image Captioning with Context Sequence
Memory Networks",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6432-6440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129173"}

@article{bb133135,
        AUTHOR = "Yang, M. and Zhao, W. and Xu, W. and Feng, Y. and Zhao, Z. and Chen, X. and Lei, K.",
        TITLE = "Multitask Learning for Cross-Domain Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1047-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129174"}

@article{bb133136,
        AUTHOR = "Yu, N. and Hu, X. and Song, B. and Yang, J. and Zhang, J.",
        TITLE = "Topic-Oriented Image Captioning Based on Order-Embedding",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2743-2754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129175"}

@article{bb133137,
        AUTHOR = "Li, X. and Xu, C. and Wang, X. and Lan, W. and Jia, Z. and Yang, G. and Xu, J.",
        TITLE = "COCO-CN for Cross-Lingual Image Tagging, Captioning, and Retrieval",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2347-2360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129176"}

@article{bb133138,
        AUTHOR = "Tian, C. and Tian, M. and Jiang, M.M. and Liu, H. and Deng, D.H.",
        TITLE = "How much do cross-modal related semantics benefit image captioning by
weighting attributes and re-ranking sentences?",
        JOURNAL = PRL,
        VOLUME = "125",
        YEAR = "2019",
        PAGES = "639-645",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129177"}

@article{bb133139,
        AUTHOR = "Niu, Y. and Lu, Z. and Wen, J. and Xiang, T. and Chang, S.",
        TITLE = "Multi-Modal Multi-Scale Deep Learning for Large-Scale Image
Annotation",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1720-1731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129178"}

@article{bb133140,
        AUTHOR = "Huang, Y. and Chen, J. and Ouyang, W. and Wan, W. and Xue, Y.",
        TITLE = "Image Captioning With End-to-End Attribute Detection and Subsequent
Attributes Prediction",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        PAGES = "4013-4026",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129179"}

@article{bb133141,
        AUTHOR = "Zhao, W. and Wu, X. and Luo, J.",
        TITLE = "Cross-Domain Image Captioning via Cross-Modal Retrieval and Model
Adaptation",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1180-1192",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129180"}

@article{bb133142,
        AUTHOR = "Wang, H. and Du, Y.T. and Zhang, G.X. and Cai, Z.M. and Su, C.",
        TITLE = "Learning Fundamental Visual Concepts Based on Evolved Multi-Edge
Concept Graph",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4400-4413",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129181"}

@article{bb133143,
        AUTHOR = "Zhang, J. and Mei, K. and Zheng, Y. and Fan, J.",
        TITLE = "Integrating Part of Speech Guidance for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "92-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129182"}

@article{bb133144,
        AUTHOR = "Kim, D.J. and Oh, T.H. and Choi, J. and Kweon, I.S.",
        TITLE = "Dense Relational Image Captioning via Multi-Task Triple-Stream
Networks",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7348-7362",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129183"}

@inproceedings{bb133145,
        AUTHOR = "Kim, D.J. and Choi, J. and Oh, T.H. and Kweon, I.S.",
        TITLE = "Dense Relational Captioning: Triple-Stream Networks for
Relationship-Based Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6264-6273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129184"}

@article{bb133146,
        AUTHOR = "Nguyen, T.S. and Fernando, B.",
        TITLE = "Effective Multimodal Encoding for Image Paragraph Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "6381-6395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129185"}

@article{bb133147,
        AUTHOR = "Duan, Y.Q. and Wang, Z. and Li, Y. and Wang, J.Y.",
        TITLE = "Cross-domain multi-style merge for image captioning",
        JOURNAL = CVIU,
        VOLUME = "228",
        YEAR = "2023",
        PAGES = "103617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129186"}

@article{bb133148,
        AUTHOR = "Wu, X.X. and Li, T.",
        TITLE = "Sentimental Visual Captioning using Multimodal Transformer",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1073-1090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129187"}

@article{bb133149,
        AUTHOR = "Ding, Z.W. and Lan, G.L. and Song, Y.Z. and Yang, Z.W.",
        TITLE = "SGIR: Star Graph-Based Interaction for Efficient and Robust
Multimodal Representation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4217-4229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129188"}

@article{bb133150,
        AUTHOR = "Zhao, W.T. and Wu, X.X.",
        TITLE = "Boosting Entity-Aware Image Captioning With Multi-Modal Knowledge
Graph",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2659-2670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129189"}

@article{bb133151,
        AUTHOR = "Cao, S. and An, G. and Cen, Y.G. and Yang, Z.Q. and Lin, W.S.",
        TITLE = "CAST: Cross-Modal Retrieval and Visual Conditioning for image
captioning",
        JOURNAL = PR,
        VOLUME = "153",
        YEAR = "2024",
        PAGES = "110555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129190"}

@article{bb133152,
        AUTHOR = "Song, Z.J. and Hu, Z.Z. and Zhou, Y. and Zhao, Y. and Hong, R.C. and Wang, M.",
        TITLE = "Embedded Heterogeneous Attention Transformer for Cross-Lingual Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "9008-9020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129191"}

@article{bb133153,
        AUTHOR = "Li, Y. and Ji, J.Y. and Sun, X.S. and Zhou, Y. and Luo, Y.P. and Ji, R.R.",
        TITLE = "M3ixup: A multi-modal data augmentation approach for image captioning",
        JOURNAL = PR,
        VOLUME = "158",
        YEAR = "2025",
        PAGES = "110941",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129192"}

@article{bb133154,
        AUTHOR = "Deng, H.Y. and Xie, Y.S. and Wang, Q. and Wang, J.J. and Ruan, W.J. and Liu, W. and Liu, Y.J.",
        TITLE = "CDKM: Common and Distinct Knowledge Mining Network With Content
Interaction for Dense Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "10462-10473",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129193"}

@article{bb133155,
        AUTHOR = "Zhang, G.Q. and Kan, S.C. and Shi, L. and Xu, W. and An, G. and Cen, Y.G.",
        TITLE = "Cross-scene visual context parsing with large vision-language model",
        JOURNAL = PR,
        VOLUME = "166",
        YEAR = "2025",
        PAGES = "111641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129194"}

@inproceedings{bb133156,
        AUTHOR = "Chen, L. and Li, J.S. and Dong, X.Y. and Zhang, P. and He, C.H. and Wang, J.Q. and Zhao, F. and Lin, D.",
        TITLE = "ShareGPT4V: Improving Large Multi-modal Models with Better Captions",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVII: 370-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129195"}

@inproceedings{bb133157,
        AUTHOR = "Jin, B. and Zheng, Y.P. and Li, P.F. and Li, W. and Zheng, Y.H. and Hu, S. and Liu, X.Y. and Zhu, J. and Yan, Z.J. and Sun, H.Y. and Zhan, K. and Jia, P. and Long, X.X. and Chen, Y.L. and Zhao, H.",
        TITLE = "Tod3cap: Towards 3d Dense Captioning in Outdoor Scenes",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVIII: 367-384",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129196"}

@inproceedings{bb133158,
        AUTHOR = "Kim, M.J. and Lim, H.S. and Lee, S. and Kim, B. and Kim, G.",
        TITLE = "Bi-directional Contextual Attention for 3d Dense Captioning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVIII: 385-401",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129197"}

@inproceedings{bb133159,
        AUTHOR = "Zhao, Y.Z. and Liu, Y. and Guo, Z.H. and Wu, W.J. and Gong, C. and Ye, Q.X. and Wan, F.",
        TITLE = "Controlcap: Controllable Region-level Captioning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXVIII: 21-38",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129198"}

@inproceedings{bb133160,
        AUTHOR = "Wang, Z. and Jiang, X.Y. and Xiao, J. and Chen, T. and Chen, L.",
        TITLE = "Decap: Towards Generalized Explicit Caption Editing via Diffusion
Mechanism",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIII: 365-381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129199"}

@inproceedings{bb133161,
        AUTHOR = "Mao, S.Q. and Zhang, C.Y. and Su, H. and Song, H. and Shalyminov, I. and Cai, W.D.",
        TITLE = "Controllable Contextualized Image Captioning: Directing the Visual
Narrative Through User-defined Highlights",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "L: 464-481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129200"}

@inproceedings{bb133162,
        AUTHOR = "Sarto, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Bridge: Bridging Gaps in Image Captioning Evaluation with Stronger
Visual Cues",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXVIII: 70-87",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129201"}

@inproceedings{bb133163,
        AUTHOR = "Matsuda, K. and Wada, Y. and Sugiura, K.",
        TITLE = "DENEB: A Hallucination-robust Automatic Evaluation Metric for Image
Captioning",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 166-182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129202"}

@inproceedings{bb133164,
        AUTHOR = "Hu, J.C. and Cavicchioli, R. and Capotondi, A.",
        TITLE = "A Request for Clarity over the End of Sequence Token in the
Self-critical Sequence Training",
        BOOKTITLE = CIAP23,
        YEAR = "2023",
        PAGES = "I:39-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129203"}

@inproceedings{bb133165,
        AUTHOR = "Hu, W.Z. and Wang, L.X. and Xu, L.F.",
        TITLE = "Spatial-Semantic Attention for Grounded Image Captioning",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "61-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129204"}

@inproceedings{bb133166,
        AUTHOR = "Sharif, N. and Jalwana, M.A.A.K. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "Leveraging Linguistically-aware Object Relations and NASNet for Image
Captioning",
        BOOKTITLE = IVCNZ20,
        YEAR = "2020",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129205"}

@inproceedings{bb133167,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "Beyond a Pre-Trained Object Detector: Cross-Modal Textual and Visual
Context for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17948-17958",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129206"}

@inproceedings{bb133168,
        AUTHOR = "Zhou, M.Y. and Zhou, L.W. and Wang, S.H. and Cheng, Y. and Li, L.J. and Yu, Z. and Liu, J.J.",
        TITLE = "UC2: Universal Cross-lingual Cross-modal Vision-and-Language
Pre-training",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4153-4163",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129207"}

@inproceedings{bb133169,
        AUTHOR = "Laina, I. and Rupprecht, C. and Navab, N.",
        TITLE = "Towards Unsupervised Image Captioning With Shared Multimodal
Embeddings",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7413-7423",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129208"}

@inproceedings{bb133170,
        AUTHOR = "Akbari, H. and Karaman, S. and Bhargava, S. and Chen, B. and Vondrick, C. and Chang, S.F.",
        TITLE = "Multi-Level Multimodal Common Semantic Space for Image-Phrase Grounding",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12468-12478",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129209"}

@inproceedings{bb133171,
        AUTHOR = "Chen, T.H. and Liao, Y.H. and Chuang, C.Y. and Hsu, W.T. and Fu, J. and Sun, M.",
        TITLE = "Show, Adapt and Tell:
Adversarial Training of Cross-Domain Image Captioner",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "521-530",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129210"}

@inproceedings{bb133172,
        AUTHOR = "Pini, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Towards Video Captioning with Naming:
A Novel Dataset and a Multi-modal Approach",
        BOOKTITLE = CIAP17,
        YEAR = "2017",
        PAGES = "II:384-395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129211"}

@inproceedings{bb133173,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C.",
        TITLE = "MMSS: Graph-based Multi-modal Story-oriented Video Summarization and
Retrieval",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "2004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129212"}

@inproceedings{bb133174,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C. and Duygulu, P.",
        TITLE = "GCap: Graph-based Automatic Image Captioning",
        BOOKTITLE = MMDE04,
        YEAR = "2004",
        PAGES = "146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129213"}

@inproceedings{bb133175,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = CMU-CS,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129214"}

@inproceedings{bb133176,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = Ph.D.,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT129214"}

@article{bb133177,
        AUTHOR = "Yu, J. and Li, J. and Yu, Z. and Huang, Q.",
        TITLE = "Multimodal Transformer With Multi-View Visual Representation for
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "4467-4480",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129215"}

@article{bb133178,
        AUTHOR = "Zhang, Y. and Shi, X.Y. and Mi, S. and Yang, X.",
        TITLE = "Image captioning with transformer and knowledge graph",
        JOURNAL = PRL,
        VOLUME = "143",
        YEAR = "2021",
        PAGES = "43-49",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129216"}

@article{bb133179,
        AUTHOR = "Yan, C.G. and Hao, Y.M. and Li, L. and Yin, J. and Liu, A. and Mao, Z. and Chen, Z.Y. and Gao, X.Y.",
        TITLE = "Task-Adaptive Attention for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "43-51",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129217"}

@article{bb133180,
        AUTHOR = "Ren, Z.H. and Gou, S.P. and Guo, Z. and Mao, S.S. and Li, R.M.",
        TITLE = "A Mask-Guided Transformer Network with Topic Token for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "12",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129218"}

@article{bb133181,
        AUTHOR = "Ji, J.Y. and Ma, Y.W. and Sun, X.S. and Zhou, Y. and Wu, Y.J. and Ji, R.R.",
        TITLE = "Knowing What to Learn: A Metric-Oriented Focal Mechanism for Image
Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4321-4335",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129219"}

@article{bb133182,
        AUTHOR = "Li, X. and Zhang, W.K. and Sun, X. and Gao, X.",
        TITLE = "Semantic-meshed and content-guided transformer for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "5",
        PAGES = "431-444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129220"}

@article{bb133183,
        AUTHOR = "Xian, T.T. and Li, Z.X. and Tang, Z.J. and Ma, H.F.",
        TITLE = "Adaptive Path Selection for Dynamic Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "5762-5775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129221"}

@article{bb133184,
        AUTHOR = "Cao, S. and An, G. and Zheng, Z.X. and Wang, Z.Y.",
        TITLE = "Vision-Enhanced and Consensus-Aware Transformer for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "7005-7018",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129222"}

@article{bb133185,
        AUTHOR = "Jiang, W.T. and Zhou, W. and Hu, H.F.",
        TITLE = "Double-Stream Position Learning Transformer Network for Image
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7706-7718",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129223"}

@article{bb133186,
        AUTHOR = "Li, J.C. and Zhou, W. and Wang, K. and Hu, H.F.",
        TITLE = "Triple-Stream Commonsense Circulation Transformer Network for Image
Captioning",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129224"}

@article{bb133187,
        AUTHOR = "Hu, J.T. and Yang, Y. and Yao, L. and An, Y.Z. and Pan, L.",
        TITLE = "Position-guided transformer for image captioning",
        JOURNAL = IVC,
        VOLUME = "128",
        YEAR = "2022",
        PAGES = "104575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129225"}

@article{bb133188,
        AUTHOR = "Wang, Z.G. and Shi, S. and Zhai, Z.R. and Wu, Y. and Yang, R.",
        TITLE = "ArCo: Attention-reinforced transformer with contrastive learning for
image captioning",
        JOURNAL = IVC,
        VOLUME = "128",
        YEAR = "2022",
        PAGES = "104570",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129226"}

@article{bb133189,
        AUTHOR = "Li, Z.X. and Wei, J.H. and Huang, F.C. and Ma, H.F.",
        TITLE = "Modeling graph-structured contexts for image captioning",
        JOURNAL = IVC,
        VOLUME = "129",
        YEAR = "2023",
        PAGES = "104591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129227"}

@article{bb133190,
        AUTHOR = "Zhang, J. and Xie, Y.S. and Ding, W.C. and Wang, Z.",
        TITLE = "Cross on Cross Attention: Deep Fusion Transformer for Image
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "4257-4268",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129228"}

@article{bb133191,
        AUTHOR = "Lim, J.H. and Chan, C.S.",
        TITLE = "Mask-guided network for image captioning",
        JOURNAL = PRL,
        VOLUME = "173",
        YEAR = "2023",
        PAGES = "79-86",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129229"}

@article{bb133192,
        AUTHOR = "Li, Z.X. and Su, Q. and Chen, T.Y.",
        TITLE = "External knowledge-assisted Transformer for image captioning",
        JOURNAL = IVC,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "104864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129230"}

@article{bb133193,
        AUTHOR = "Chen, J.Q.",
        TITLE = "Transform, contrast and tell:
Coherent entity-aware multi-image captioning",
        JOURNAL = CVIU,
        VOLUME = "238",
        YEAR = "2024",
        PAGES = "103878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129231"}

@article{bb133194,
        AUTHOR = "Yang, X.B. and Tian, X. and Wu, J.S. and Yang, X.C. and Ma, S. and Qi, X. and Hou, Z.Q.",
        TITLE = "LLAFN-Generator: Learnable linear-attention with fast-normalization
for large-scale image captioning",
        JOURNAL = CVIU,
        VOLUME = "248",
        YEAR = "2024",
        PAGES = "104088",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129232"}

@article{bb133195,
        AUTHOR = "Yi, Y.H. and Liang, Y.K. and Kong, D. and Tang, Z.W. and Peng, J.B.",
        TITLE = "Tag-inferring and tag-guided Transformer for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "6",
        PAGES = "801-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129233"}

@inproceedings{bb133196,
        AUTHOR = "Song, J.Y. and Pan, R.J. and Zhou, J. and Yang, H.",
        TITLE = "M-rat: a Multi-grained Retrieval Augmentation Transformer for Image
Captioning",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 185-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129234"}

@inproceedings{bb133197,
        AUTHOR = "Caffagni, D. and Barraco, M. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Synthcap: Augmenting Transformers with Synthetic Data for Image
Captioning",
        BOOKTITLE = CIAP23,
        YEAR = "2023",
        PAGES = "I:112-123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129235"}

@inproceedings{bb133198,
        AUTHOR = "Lou, L.S. and Lu, K. and Xue, J.",
        TITLE = "Improved Transformer with Parallel Encoders for Image Captioning",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "4072-4075",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129236"}

@inproceedings{bb133199,
        AUTHOR = "Wang, Y.H. and Shang, L.",
        TITLE = "Generating Spatial-aware Captions for TextCaps",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "379-385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT129237"}
Last update:Oct 6, 2025 at 14:07:43