@inproceedings{bb104200,
        AUTHOR = "Vedaldi, A. and Mahendran, S. and Tsogkas, S. and Maji, S. and Girshick, R. and Kannala, J.H. and Rahtu, E. and Kokkinos, I. and Blaschko, M.B. and Weiss, D. and Taskar, B. and Simonyan, K. and Saphra, N. and Mohamed, S.",
        TITLE = "Understanding Objects in Detail with Fine-Grained Attributes",
        BOOKTITLE = CVPR14,
        YEAR = "2014",
        PAGES = "3622-3629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605fg1.html#TT100590"}

@inproceedings{bb104201,
        AUTHOR = "Zhang, N. and Donahue, J. and Girshick, R. and Darrell, T.J.",
        TITLE = "Part-Based R-CNNs for Fine-Grained Category Detection",
        BOOKTITLE = ECCV14,
        YEAR = "2014",
        PAGES = "I: 834-849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605fg1.html#TT100591"}

@inproceedings{bb104202,
        AUTHOR = "Kanan, C.",
        TITLE = "Fine-grained object recognition with Gnostic Fields",
        BOOKTITLE = WACV14,
        YEAR = "2014",
        PAGES = "23-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605fg1.html#TT100592"}

@inproceedings{bb104203,
        AUTHOR = "Ordonez, V. and Jagadeesh, V. and Di, W. and Bhardwaj, A. and Piramuthu, R.",
        TITLE = "Furniture-geek: Understanding fine-grained furniture attributes from
freely associated text and tags",
        BOOKTITLE = WACV14,
        YEAR = "2014",
        PAGES = "317-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605fg1.html#TT100593"}

@article{bb104204,
        AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.",
        TITLE = "Integrating Natural-Language Understanding with
Document Structure-Analysis",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "255-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100594"}

@article{bb104205,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "247-250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100595"}

@book{bb104206,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        PUBLISHER = "Springer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100595"}

@article{bb104207,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Intelligent Multimedia",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "2-3",
        MONTH = "June",
        PAGES = "77-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100596"}

@article{bb104208,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
More Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "345-348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100597"}

@article{bb104209,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "99-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100598"}

@book{bb104210,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        PUBLISHER = "Kluwer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100598"}

@article{bb104211,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Grounding Representations",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "7-13",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100599"}

@book{bb104212,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural Language and Vision Processing",
        PUBLISHER = "Kluwer",
        YEAR = "1996",
        MONTH = "September",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100600"}

@article{bb104213,
        AUTHOR = "Siskind, J.M.",
        TITLE = "Grounding Language in Perception",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "371-391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100601"}

@article{bb104214,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images",
        JOURNAL = Computer,
        VOLUME = "28",
        YEAR = "1995",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "49-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100602"}

@article{bb104215,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "409-430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100603"}

@article{bb104216,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Collateral Text in Understanding Photos in Documents",
        JOURNAL = SPIE,
        VOLUME = "2368",
        YEAR = "1994",
        PAGES = "186-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100604"}

@article{bb104217,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "349-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100605"}

@inproceedings{bb104218,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
        BOOKTITLE = "Radius97",
        YEAR = "1997",
        PAGES = "419-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100606"}

@inproceedings{bb104219,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Linguistic Context In Vision",
        BOOKTITLE = Context95,
        YEAR = "1995",
        PAGES = "xx",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100607"}

@inproceedings{bb104220,
        AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
        TITLE = "Using Speech Input for Image Interpretation and Annotation",
        BOOKTITLE = ARPA96,
        YEAR = "1996",
        PAGES = "501-510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100608"}

@inproceedings{bb104221,
        AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
        TITLE = "Use of Collateral Text in Image Interpretation",
        BOOKTITLE = ARPA94,
        YEAR = "1994",
        PAGES = "II:897-907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100609"}

@inproceedings{bb104222,
        AUTHOR = "Srihari, R.K. and Burhans, D.T.",
        TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
        BOOKTITLE = AAAI-94,
        YEAR = "1994",
        PAGES = "793-798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100610"}

@inproceedings{bb104223,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "Caption-Aided Face Location In Newspaper Photographs",
        BOOKTITLE = ICPR92,
        YEAR = "1992",
        PAGES = "I:474-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100611"}

@inproceedings{bb104224,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
        BOOKTITLE = AAAI-92,
        YEAR = "1992",
        PAGES = "350-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100612"}

@article{bb104225,
        AUTHOR = "Schank, R.C. and Fano, A.",
        TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "261-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100613"}

@article{bb104226,
        AUTHOR = "Wilks, Y.",
        TITLE = "Language, Vision and Metaphor",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "273-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100614"}

@article{bb104227,
        AUTHOR = "Partridge, D.",
        TITLE = "Language and Vision: A Single Perceptual Mechanism",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "291-303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100615"}

@article{bb104228,
        AUTHOR = "Marconi, D.",
        TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "15-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100616"}

@article{bb104229,
        AUTHOR = "Meini, C. and Paternoster, A.",
        TITLE = "Understanding Language Through Vision",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "37-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100617"}

@article{bb104230,
        AUTHOR = "McKevitt, P. and Guo, C.M.",
        TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "49-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100618"}

@article{bb104231,
        AUTHOR = "Grumbach, A.",
        TITLE = "Grounding Symbols into Perceptions",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "131-146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100619"}

@article{bb104232,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
        TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "155-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100620"}

@article{bb104233,
        AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
        TITLE = "Conceptual description of visual scenes from linguistic models",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "173-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100621"}

@article{bb104234,
        AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
        TITLE = "Conceptual representations between video signals and natural language
descriptions",
        JOURNAL = IVC,
        VOLUME = "26",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "53-66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100622"}

@inproceedings{bb104235,
        AUTHOR = "Gerber, R. and Nagel, H.H.",
        TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100623"}

@article{bb104236,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "346-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100624"}

@inproceedings{bb104237,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-graphs for context-aware category discovery",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100625"}

@inproceedings{bb104238,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Learning the easy things first: Self-paced visual category discovery",
        BOOKTITLE = CVPR11,
        YEAR = "2011",
        PAGES = "1721-1728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100626"}

@article{bb104239,
        AUTHOR = "Yu, A. and Grauman, K.",
        TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
        JOURNAL = IJCV,
        VOLUME = "128",
        YEAR = "2020",
        NUMBER = "10-11",
        MONTH = "November",
        PAGES = "2704-2730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100627"}

@inproceedings{bb104240,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100628"}

@inproceedings{bb104241,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100629"}

@inproceedings{bb104242,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100630"}

@inproceedings{bb104243,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100631"}

@inproceedings{bb104244,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT100632"}

@article{bb104245,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100633"}

@inproceedings{bb104246,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100634"}

@article{bb104247,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100635"}

@inproceedings{bb104248,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100636"}

@inproceedings{bb104249,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100637"}

@article{bb104250,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100638"}

@inproceedings{bb104251,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100639"}

@inproceedings{bb104252,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100640"}

@inproceedings{bb104253,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100641"}

@inproceedings{bb104254,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100642"}

@inproceedings{bb104255,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100643"}

@inproceedings{bb104256,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT100644"}

@article{bb104257,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100645"}

@article{bb104258,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        JOURNAL = IEICE,
        VOLUME = "E93-D",
        YEAR = "2010",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1727-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100646"}

@inproceedings{bb104259,
        AUTHOR = "Kuniyoshi, Y. and Harada, T. and Nakayama, H.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        BOOKTITLE = CIVR09,
        YEAR = "2009",
        PAGES = "Article No 17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100646"}

@inproceedings{bb104260,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Global Gaussian approach for scene categorization using information
geometry",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "2336-2343",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100647"}

@inproceedings{bb104261,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "AI Goggles: Real-time Description and Retrieval in the Real World with
Online Learning",
        BOOKTITLE = CRV09,
        YEAR = "2009",
        PAGES = "184-191",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100648"}

@inproceedings{bb104262,
        AUTHOR = "Ushiku, Y. and Yamaguchi, M. and Mukuta, Y. and Harada, T.",
        TITLE = "Common Subspace for Model and Similarity:
Phrase Learning for Caption Generation from Images",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "2668-2676",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100649"}

@inproceedings{bb104263,
        AUTHOR = "Jin, J. and Nakayama, H.",
        TITLE = "Annotation order matters:
Recurrent Image Annotator for arbitrary length image tagging",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "2452-2457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100650"}

@inproceedings{bb104264,
        AUTHOR = "Harada, T. and Nakayama, H. and Kuniyoshi, Y.",
        TITLE = "Improving Local Descriptors by Embedding Global and Local Spatial
Information",
        BOOKTITLE = ECCV10,
        YEAR = "2010",
        PAGES = "IV: 736-749",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100651"}

@inproceedings{bb104265,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Evaluation of dimensionality reduction methods for image
auto-annotation",
        BOOKTITLE = BMVC10,
        YEAR = "2010",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100652"}

@article{bb104266,
        AUTHOR = "Verma, Y. and Jawahar, C.V.",
        TITLE = "A support vector approach for cross-modal search of images and texts",
        JOURNAL = CVIU,
        VOLUME = "154",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "48-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100653"}

@inproceedings{bb104267,
        AUTHOR = "Dutta, A. and Verma, Y. and Jawahar, C.V.",
        TITLE = "Recurrent Image Annotation with Explicit Inter-Label Dependencies",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100654"}

@article{bb104268,
        AUTHOR = "Xue, J.F. and Eguchi, K.",
        TITLE = "Video Data Modeling Using Sequential Correspondence Hierarchical
Dirichlet Processes",
        JOURNAL = IEICE,
        VOLUME = "E100-D",
        YEAR = "2017",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "33-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100655"}

@article{bb104269,
        AUTHOR = "Tariq, A. and Foroosh, H.",
        TITLE = "A Context-Driven Extractive Framework for Generating Realistic Image
Descriptions",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "619-632",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100656"}

@article{bb104270,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100657"}

@inproceedings{bb104271,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100658"}

@article{bb104272,
        AUTHOR = "Gao, L.L. and Guo, Z. and Zhang, H.W. and Xu, X. and Shen, H.T.",
        TITLE = "Video Captioning With Attention-Based LSTM and Semantic Consistency",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2045-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100659"}

@article{bb104273,
        AUTHOR = "Hu, M. and Yang, Y. and Shen, F. and Zhang, L. and Shen, H.T. and Li, X.",
        TITLE = "Robust Web Image Annotation via Exploring Multi-Facet and Structural
Knowledge",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "4871-4884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100660"}

@article{bb104274,
        AUTHOR = "Bin, Y. and Yang, Y. and Shen, F. and Xie, N. and Shen, H.T. and Li, X.",
        TITLE = "Describing Video With Attention-Based Bidirectional LSTM",
        JOURNAL = Cyber,
        VOLUME = "49",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2631-2641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100661"}

@article{bb104275,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100662"}

@article{bb104276,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100663"}

@article{bb104277,
        AUTHOR = "Fu, K. and Jin, J.Q. and Cui, R.P. and Sha, F. and Zhang, C.S.",
        TITLE = "Aligning Where to See and What to Tell: Image Captioning with
Region-Based Attention and Scene-Specific Contexts",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100664"}

@article{bb104278,
        AUTHOR = "Liu, A.A. and Xu, N. and Wong, Y. and Li, J. and Su, Y.T. and Kankanhalli, M.",
        TITLE = "Hierarchical & multimodal video captioning: Discovering and
transferring multimodal knowledge for vision to language",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "113-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100665"}

@article{bb104279,
        AUTHOR = "Nian, F.D. and Li, T. and Wang, Y. and Wu, X.Y. and Ni, B.B. and Xu, C.S.",
        TITLE = "Learning explicit video attributes from mid-level representation for
video captioning",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "126-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100666"}

@article{bb104280,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100667"}

@article{bb104281,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100667"}

@article{bb104282,
        AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.",
        TITLE = "GLA: Global-Local Attention for Image Description",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "726-737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100668"}

@article{bb104283,
        AUTHOR = "Guan, J.N. and Wang, E.",
        TITLE = "Repeated review based image captioning for image evidence review",
        JOURNAL = SP:IC,
        VOLUME = "63",
        YEAR = "2018",
        PAGES = "141-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100669"}

@article{bb104284,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.",
        TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation",
        JOURNAL = GeoRS,
        VOLUME = "56",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2183-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100670"}

@article{bb104285,
        AUTHOR = "Cheng, Q. and Zhang, Q. and Fu, P. and Tu, C.H. and Li, S.",
        TITLE = "A survey and analysis on automatic image annotation",
        JOURNAL = PR,
        VOLUME = "79",
        YEAR = "2018",
        PAGES = "242-259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100671"}

@article{bb104286,
        AUTHOR = "Ben Rejeb, I. and Ouni, S. and Barhoumi, W. and Zagrouba, E.",
        TITLE = "Fuzzy VA-Files for multi-label image annotation based on visual content
of regions",
        JOURNAL = SIViP,
        VOLUME = "12",
        YEAR = "2018",
        NUMBER = "5",
        MONTH = "July",
        PAGES = "877-884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100672"}

@article{bb104287,
        AUTHOR = "Helmy, T.",
        TITLE = "A Generic Framework for Semantic Annotation of Images",
        JOURNAL = IJIG,
        VOLUME = "18",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "July",
        PAGES = "Article 1850013",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100673"}

@article{bb104288,
        AUTHOR = "Wu, C.L. and Wei, Y. and Chu, X.L. and Su, F. and Wang, L.",
        TITLE = "Modeling visual and word-conditional semantic attention for image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "67",
        YEAR = "2018",
        PAGES = "100-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100674"}

@article{bb104289,
        AUTHOR = "Ye, S. and Han, J. and Liu, N.",
        TITLE = "Attentive Linear Transformation for Image Captioning",
        JOURNAL = IP,
        VOLUME = "27",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5514-5524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100675"}

@article{bb104290,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100676"}

@article{bb104291,
        AUTHOR = "Hu, J. and Lam, K.M. and Lou, P. and Liu, Q. and Deng, W.P.",
        TITLE = "Can a machine have two systems for recognition, like human beings?",
        JOURNAL = JVCIR,
        VOLUME = "56",
        YEAR = "2018",
        PAGES = "275-286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100677"}

@article{bb104292,
        AUTHOR = "Bhagat, P.K. and Choudhary, P.",
        TITLE = "Image annotation: Then and now",
        JOURNAL = IVC,
        VOLUME = "80",
        YEAR = "2018",
        PAGES = "1-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100678"}

@article{bb104293,
        AUTHOR = "Gil Gonzalez, J. and Alvarez Meza, A. and Orozco Gutierrez, A.",
        TITLE = "Learning from multiple annotators using kernel alignment",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "150-156",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100679"}

@article{bb104294,
        AUTHOR = "Bazrafkan, S. and Javidnia, H. and Corcoran, P.",
        TITLE = "Latent space mapping for generation of object elements with
corresponding data annotation",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "179-186",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100680"}

@article{bb104295,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100681"}

@article{bb104296,
        AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.",
        TITLE = "Scene graph captioner:
Image captioning based on structural visual representation",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "477-485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100682"}

@article{bb104297,
        AUTHOR = "Niu, Y. and Lu, Z. and Wen, J. and Xiang, T. and Chang, S.",
        TITLE = "Multi-Modal Multi-Scale Deep Learning for Large-Scale Image
Annotation",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1720-1731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100683"}

@article{bb104298,
        AUTHOR = "Jiu, M.Y. and Sahbi, H.",
        TITLE = "Deep representation design from deep kernel networks",
        JOURNAL = PR,
        VOLUME = "88",
        YEAR = "2019",
        PAGES = "447-457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100684"}

@article{bb104299,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT100685"}

Last update:Jun 14, 2021 at 09:20:36