@article{bb105300,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "409-430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101643"}

@article{bb105301,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Collateral Text in Understanding Photos in Documents",
        JOURNAL = SPIE,
        VOLUME = "2368",
        YEAR = "1994",
        PAGES = "186-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101644"}

@article{bb105302,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "349-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101645"}

@inproceedings{bb105303,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
        BOOKTITLE = "Radius97",
        YEAR = "1997",
        PAGES = "419-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101646"}

@inproceedings{bb105304,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Linguistic Context In Vision",
        BOOKTITLE = Context95,
        YEAR = "1995",
        PAGES = "xx",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101647"}

@inproceedings{bb105305,
        AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
        TITLE = "Using Speech Input for Image Interpretation and Annotation",
        BOOKTITLE = ARPA96,
        YEAR = "1996",
        PAGES = "501-510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101648"}

@inproceedings{bb105306,
        AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
        TITLE = "Use of Collateral Text in Image Interpretation",
        BOOKTITLE = ARPA94,
        YEAR = "1994",
        PAGES = "II:897-907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101649"}

@inproceedings{bb105307,
        AUTHOR = "Srihari, R.K. and Burhans, D.T.",
        TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
        BOOKTITLE = AAAI-94,
        YEAR = "1994",
        PAGES = "793-798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101650"}

@inproceedings{bb105308,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "Caption-Aided Face Location In Newspaper Photographs",
        BOOKTITLE = ICPR92,
        YEAR = "1992",
        PAGES = "I:474-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101651"}

@inproceedings{bb105309,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
        BOOKTITLE = AAAI-92,
        YEAR = "1992",
        PAGES = "350-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101652"}

@article{bb105310,
        AUTHOR = "Schank, R.C. and Fano, A.",
        TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "261-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101653"}

@article{bb105311,
        AUTHOR = "Wilks, Y.",
        TITLE = "Language, Vision and Metaphor",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "273-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101654"}

@article{bb105312,
        AUTHOR = "Partridge, D.",
        TITLE = "Language and Vision: A Single Perceptual Mechanism",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "291-303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101655"}

@article{bb105313,
        AUTHOR = "Marconi, D.",
        TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "15-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101656"}

@article{bb105314,
        AUTHOR = "Meini, C. and Paternoster, A.",
        TITLE = "Understanding Language Through Vision",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "37-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101657"}

@article{bb105315,
        AUTHOR = "McKevitt, P. and Guo, C.M.",
        TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "49-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101658"}

@article{bb105316,
        AUTHOR = "Grumbach, A.",
        TITLE = "Grounding Symbols into Perceptions",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "131-146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101659"}

@article{bb105317,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
        TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "155-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101660"}

@article{bb105318,
        AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
        TITLE = "Conceptual description of visual scenes from linguistic models",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "173-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101661"}

@article{bb105319,
        AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
        TITLE = "Conceptual representations between video signals and natural language
descriptions",
        JOURNAL = IVC,
        VOLUME = "26",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "53-66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101662"}

@inproceedings{bb105320,
        AUTHOR = "Gerber, R. and Nagel, H.H.",
        TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101663"}

@article{bb105321,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "346-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101664"}

@inproceedings{bb105322,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-graphs for context-aware category discovery",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101665"}

@inproceedings{bb105323,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Learning the easy things first: Self-paced visual category discovery",
        BOOKTITLE = CVPR11,
        YEAR = "2011",
        PAGES = "1721-1728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101666"}

@article{bb105324,
        AUTHOR = "Yu, A. and Grauman, K.",
        TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
        JOURNAL = IJCV,
        VOLUME = "128",
        YEAR = "2020",
        NUMBER = "10-11",
        MONTH = "November",
        PAGES = "2704-2730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101667"}

@inproceedings{bb105325,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101668"}

@inproceedings{bb105326,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101669"}

@inproceedings{bb105327,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101670"}

@inproceedings{bb105328,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101671"}

@inproceedings{bb105329,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT101672"}

@article{bb105330,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101673"}

@inproceedings{bb105331,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101674"}

@article{bb105332,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101675"}

@inproceedings{bb105333,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101676"}

@inproceedings{bb105334,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101677"}

@article{bb105335,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101678"}

@inproceedings{bb105336,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101679"}

@inproceedings{bb105337,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101680"}

@inproceedings{bb105338,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101681"}

@inproceedings{bb105339,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101682"}

@inproceedings{bb105340,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101683"}

@inproceedings{bb105341,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT101684"}

@article{bb105342,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101685"}

@article{bb105343,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        JOURNAL = IEICE,
        VOLUME = "E93-D",
        YEAR = "2010",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1727-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101686"}

@inproceedings{bb105344,
        AUTHOR = "Kuniyoshi, Y. and Harada, T. and Nakayama, H.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        BOOKTITLE = CIVR09,
        YEAR = "2009",
        PAGES = "Article No 17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101686"}

@inproceedings{bb105345,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Global Gaussian approach for scene categorization using information
geometry",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "2336-2343",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101687"}

@inproceedings{bb105346,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "AI Goggles: Real-time Description and Retrieval in the Real World with
Online Learning",
        BOOKTITLE = CRV09,
        YEAR = "2009",
        PAGES = "184-191",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101688"}

@inproceedings{bb105347,
        AUTHOR = "Ushiku, Y. and Yamaguchi, M. and Mukuta, Y. and Harada, T.",
        TITLE = "Common Subspace for Model and Similarity:
Phrase Learning for Caption Generation from Images",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "2668-2676",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101689"}

@inproceedings{bb105348,
        AUTHOR = "Jin, J. and Nakayama, H.",
        TITLE = "Annotation order matters:
Recurrent Image Annotator for arbitrary length image tagging",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "2452-2457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101690"}

@inproceedings{bb105349,
        AUTHOR = "Harada, T. and Nakayama, H. and Kuniyoshi, Y.",
        TITLE = "Improving Local Descriptors by Embedding Global and Local Spatial
Information",
        BOOKTITLE = ECCV10,
        YEAR = "2010",
        PAGES = "IV: 736-749",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101691"}

@inproceedings{bb105350,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Evaluation of dimensionality reduction methods for image
auto-annotation",
        BOOKTITLE = BMVC10,
        YEAR = "2010",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101692"}

@article{bb105351,
        AUTHOR = "Verma, Y. and Jawahar, C.V.",
        TITLE = "A support vector approach for cross-modal search of images and texts",
        JOURNAL = CVIU,
        VOLUME = "154",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "48-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101693"}

@inproceedings{bb105352,
        AUTHOR = "Dutta, A. and Verma, Y. and Jawahar, C.V.",
        TITLE = "Recurrent Image Annotation with Explicit Inter-Label Dependencies",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101694"}

@article{bb105353,
        AUTHOR = "Xue, J.F. and Eguchi, K.",
        TITLE = "Video Data Modeling Using Sequential Correspondence Hierarchical
Dirichlet Processes",
        JOURNAL = IEICE,
        VOLUME = "E100-D",
        YEAR = "2017",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "33-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101695"}

@article{bb105354,
        AUTHOR = "Tariq, A. and Foroosh, H.",
        TITLE = "A Context-Driven Extractive Framework for Generating Realistic Image
Descriptions",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "619-632",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101696"}

@article{bb105355,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101697"}

@inproceedings{bb105356,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101698"}

@article{bb105357,
        AUTHOR = "Gao, L.L. and Guo, Z. and Zhang, H.W. and Xu, X. and Shen, H.T.",
        TITLE = "Video Captioning With Attention-Based LSTM and Semantic Consistency",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2045-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101699"}

@article{bb105358,
        AUTHOR = "Hu, M. and Yang, Y. and Shen, F. and Zhang, L. and Shen, H.T. and Li, X.",
        TITLE = "Robust Web Image Annotation via Exploring Multi-Facet and Structural
Knowledge",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "4871-4884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101700"}

@article{bb105359,
        AUTHOR = "Bin, Y. and Yang, Y. and Shen, F. and Xie, N. and Shen, H.T. and Li, X.",
        TITLE = "Describing Video With Attention-Based Bidirectional LSTM",
        JOURNAL = Cyber,
        VOLUME = "49",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2631-2641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101701"}

@article{bb105360,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101702"}

@article{bb105361,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101703"}

@article{bb105362,
        AUTHOR = "Fu, K. and Jin, J.Q. and Cui, R.P. and Sha, F. and Zhang, C.S.",
        TITLE = "Aligning Where to See and What to Tell: Image Captioning with
Region-Based Attention and Scene-Specific Contexts",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101704"}

@article{bb105363,
        AUTHOR = "Liu, A.A. and Xu, N. and Wong, Y. and Li, J. and Su, Y.T. and Kankanhalli, M.",
        TITLE = "Hierarchical & multimodal video captioning: Discovering and
transferring multimodal knowledge for vision to language",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "113-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101705"}

@article{bb105364,
        AUTHOR = "Nian, F.D. and Li, T. and Wang, Y. and Wu, X.Y. and Ni, B.B. and Xu, C.S.",
        TITLE = "Learning explicit video attributes from mid-level representation for
video captioning",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "126-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101706"}

@article{bb105365,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101707"}

@article{bb105366,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101707"}

@article{bb105367,
        AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.",
        TITLE = "GLA: Global-Local Attention for Image Description",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "726-737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101708"}

@article{bb105368,
        AUTHOR = "Guan, J.N. and Wang, E.",
        TITLE = "Repeated review based image captioning for image evidence review",
        JOURNAL = SP:IC,
        VOLUME = "63",
        YEAR = "2018",
        PAGES = "141-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101709"}

@article{bb105369,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.",
        TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation",
        JOURNAL = GeoRS,
        VOLUME = "56",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2183-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101710"}

@article{bb105370,
        AUTHOR = "Cheng, Q. and Zhang, Q. and Fu, P. and Tu, C.H. and Li, S.",
        TITLE = "A survey and analysis on automatic image annotation",
        JOURNAL = PR,
        VOLUME = "79",
        YEAR = "2018",
        PAGES = "242-259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101711"}

@article{bb105371,
        AUTHOR = "Ben Rejeb, I. and Ouni, S. and Barhoumi, W. and Zagrouba, E.",
        TITLE = "Fuzzy VA-Files for multi-label image annotation based on visual content
of regions",
        JOURNAL = SIViP,
        VOLUME = "12",
        YEAR = "2018",
        NUMBER = "5",
        MONTH = "July",
        PAGES = "877-884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101712"}

@article{bb105372,
        AUTHOR = "Helmy, T.",
        TITLE = "A Generic Framework for Semantic Annotation of Images",
        JOURNAL = IJIG,
        VOLUME = "18",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "July",
        PAGES = "Article 1850013",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101713"}

@article{bb105373,
        AUTHOR = "Wu, C.L. and Wei, Y. and Chu, X.L. and Su, F. and Wang, L.",
        TITLE = "Modeling visual and word-conditional semantic attention for image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "67",
        YEAR = "2018",
        PAGES = "100-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101714"}

@article{bb105374,
        AUTHOR = "Ye, S. and Han, J. and Liu, N.",
        TITLE = "Attentive Linear Transformation for Image Captioning",
        JOURNAL = IP,
        VOLUME = "27",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5514-5524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101715"}

@article{bb105375,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101716"}

@article{bb105376,
        AUTHOR = "Hu, J. and Lam, K.M. and Lou, P. and Liu, Q. and Deng, W.P.",
        TITLE = "Can a machine have two systems for recognition, like human beings?",
        JOURNAL = JVCIR,
        VOLUME = "56",
        YEAR = "2018",
        PAGES = "275-286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101717"}

@article{bb105377,
        AUTHOR = "Bhagat, P.K. and Choudhary, P.",
        TITLE = "Image annotation: Then and now",
        JOURNAL = IVC,
        VOLUME = "80",
        YEAR = "2018",
        PAGES = "1-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101718"}

@article{bb105378,
        AUTHOR = "Gil Gonzalez, J. and Alvarez Meza, A. and Orozco Gutierrez, A.",
        TITLE = "Learning from multiple annotators using kernel alignment",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "150-156",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101719"}

@article{bb105379,
        AUTHOR = "Bazrafkan, S. and Javidnia, H. and Corcoran, P.",
        TITLE = "Latent space mapping for generation of object elements with
corresponding data annotation",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "179-186",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101720"}

@article{bb105380,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101721"}

@article{bb105381,
        AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.",
        TITLE = "Scene graph captioner:
Image captioning based on structural visual representation",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "477-485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101722"}

@article{bb105382,
        AUTHOR = "Niu, Y. and Lu, Z. and Wen, J. and Xiang, T. and Chang, S.",
        TITLE = "Multi-Modal Multi-Scale Deep Learning for Large-Scale Image
Annotation",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1720-1731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101723"}

@article{bb105383,
        AUTHOR = "Jiu, M.Y. and Sahbi, H.",
        TITLE = "Deep representation design from deep kernel networks",
        JOURNAL = PR,
        VOLUME = "88",
        YEAR = "2019",
        PAGES = "447-457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101724"}

@article{bb105384,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101725"}

@article{bb105385,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Towards Personalized Image Captioning via Multimodal Memory Networks",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "999-1012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101726"}

@inproceedings{bb105386,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Attend to You: Personalized Image Captioning with Context Sequence
Memory Networks",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6432-6440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101727"}

@article{bb105387,
        AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.",
        TITLE = "Dense semantic embedding network for image captioning",
        JOURNAL = PR,
        VOLUME = "90",
        YEAR = "2019",
        PAGES = "285-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101728"}

@article{bb105388,
        AUTHOR = "Yang, M. and Zhao, W. and Xu, W. and Feng, Y. and Zhao, Z. and Chen, X. and Lei, K.",
        TITLE = "Multitask Learning for Cross-Domain Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1047-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101729"}

@article{bb105389,
        AUTHOR = "Foumani, S.N.M. and Nickabadi, A.",
        TITLE = "A probabilistic topic model using deep visual word representation for
simultaneous image classification and annotation",
        JOURNAL = JVCIR,
        VOLUME = "59",
        YEAR = "2019",
        PAGES = "195-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101730"}

@article{bb105390,
        AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.",
        TITLE = "Description Generation for Remote Sensing Images Using Attribute
Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101731"}

@article{bb105391,
        AUTHOR = "Zheng, H. and Wu, J.H. and Liang, R. and Li, Y. and Li, X.Z.",
        TITLE = "Multi-task learning for captioning images with novel words",
        JOURNAL = IET-CV,
        VOLUME = "13",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "April",
        PAGES = "294-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101732"}

@article{bb105392,
        AUTHOR = "Yu, N. and Hu, X. and Song, B. and Yang, J. and Zhang, J.",
        TITLE = "Topic-Oriented Image Captioning Based on Order-Embedding",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2743-2754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101733"}

@article{bb105393,
        AUTHOR = "Ding, S.T. and Qu, S. and Xi, Y.L. and Sangaiah, A.K. and Wan, S.H.",
        TITLE = "Image caption generation with high-level image features",
        JOURNAL = PRL,
        VOLUME = "123",
        YEAR = "2019",
        PAGES = "89-95",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101734"}

@article{bb105394,
        AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
        TITLE = "A survey on deep neural network-based image captioning",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "445-470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101735"}

@article{bb105395,
        AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
        TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
        JOURNAL = Surveys,
        VOLUME = "51",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "Article No 118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101736"}

@article{bb105396,
        AUTHOR = "Peng, Y.Q. and Liu, X. and Wang, W.H. and Zhao, X.S. and Wei, M.",
        TITLE = "Image caption model of double LSTM with scene factors",
        JOURNAL = IVC,
        VOLUME = "86",
        YEAR = "2019",
        PAGES = "38-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101737"}

@article{bb105397,
        AUTHOR = "Zhang, J.J. and Wu, Q. and Zhang, J. and Shen, C.H. and Lu, J.F. and Wu, Q.A.",
        TITLE = "Heritage image annotation via collective knowledge",
        JOURNAL = PR,
        VOLUME = "93",
        YEAR = "2019",
        PAGES = "204-214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101738"}

@article{bb105398,
        AUTHOR = "Verma, Y.",
        TITLE = "Diverse image annotation with missing labels",
        JOURNAL = PR,
        VOLUME = "93",
        YEAR = "2019",
        PAGES = "470-484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101739"}

@article{bb105399,
        AUTHOR = "Markatopoulou, F. and Mezaris, V. and Patras, I.",
        TITLE = "Implicit and Explicit Concept Relations in Deep Neural Networks for
Multi-Label Video/Image Annotation",
        JOURNAL = CirSysVideo,
        VOLUME = "29",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "1631-1644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT101740"}

Last update:Sep 19, 2021 at 21:11:01