@inproceedings{bb284200,
        AUTHOR = "Abbas, J. and Dagli, C.K. and Huang, T.S.",
        TITLE = "A Multimodality Framework for Creating Speaker/Non-Speaker Profile
Databases for Real-World Video",
        BOOKTITLE = SLAM07,
        YEAR = "2007",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278886"}

@inproceedings{bb284201,
        AUTHOR = "Monaci, G. and Vandergheynst, P.",
        TITLE = "Audiovisual Gestalts",
        BOOKTITLE = PercOrg06,
        YEAR = "2006",
        PAGES = "200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278887"}

@inproceedings{bb284202,
        AUTHOR = "Zhu, Z.G. and Li, W.H. and Molina, E. and Wolberg, G.",
        TITLE = "LDV Sensing and Processing for Remote Hearing in a Multimodal
Surveillance System",
        BOOKTITLE = MSCSAS07,
        YEAR = "2007",
        PAGES = "1-2",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278888"}

@inproceedings{bb284203,
        AUTHOR = "Zhu, Z.G. and Li, W.H. and Wolberg, G.",
        TITLE = "Integrating LDV Audio and IR Video for Remote Multimodal Surveillance",
        BOOKTITLE = OTCBVS05,
        YEAR = "2005",
        PAGES = "III: 10-10",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278889"}

@inproceedings{bb284204,
        AUTHOR = "Wu, Z.Y. and Cai, L.H. and Meng, H.",
        TITLE = "Multi-level Fusion of Audio and Visual Features for Speaker
Identification",
        BOOKTITLE = ICB06,
        YEAR = "2006",
        PAGES = "493-499",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278890"}

@inproceedings{bb284205,
        AUTHOR = "Yang, P. and Yang, Y.C. and Wu, Z.H.",
        TITLE = "Exploiting Glottal Information in Speaker Recognition Using Parallel
GMMs",
        BOOKTITLE = AVBPA05,
        YEAR = "2005",
        PAGES = "804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278891"}

@inproceedings{bb284206,
        AUTHOR = "Lei, Z.C.",
        TITLE = "Combining the Likelihood and the Kullback-Leibler Distance in
Estimating the Universal Background Model for Speaker Verification
Using SVM",
        BOOKTITLE = ICPR10,
        YEAR = "2010",
        PAGES = "4553-4556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278892"}

@inproceedings{bb284207,
        AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.",
        TITLE = "An UBM-Based Reference Space for Speaker Recognition",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "IV: 318-321",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278893"}

@inproceedings{bb284208,
        AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.",
        TITLE = "Constructing the Discriminative Kernels Using GMM for Text-Independent
Speaker Identification",
        BOOKTITLE = IWBRS05,
        YEAR = "2005",
        PAGES = "165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278894"}

@inproceedings{bb284209,
        AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.",
        TITLE = "Speaker Identification Using the VQ-Based Discriminative Kernels",
        BOOKTITLE = AVBPA05,
        YEAR = "2005",
        PAGES = "797",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278895"}

@inproceedings{bb284210,
        AUTHOR = "Li, D.D. and Yang, Y.C. and Wu, Z.H.",
        TITLE = "Dynamic Bayesian Networks for Audio-Visual Speaker Recognition",
        BOOKTITLE = ICB06,
        YEAR = "2006",
        PAGES = "539-545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278896"}

@inproceedings{bb284211,
        AUTHOR = "Fox, N.A. and O'Mullane, B.A. and Reilly, R.B.",
        TITLE = "VALID:
A New Practical Audio-Visual Database, and Comparative Results",
        BOOKTITLE = AVBPA05,
        YEAR = "2005",
        PAGES = "777",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278897"}

@book{bb284212,
        AUTHOR = "Sharma, P. and Reilly, R.B.",
        TITLE = "The UCD Colour Face Image Database for Face Detection",
        PUBLISHER = "Online",
        YEAR = "1998",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278898"}

@inproceedings{bb284213,
        AUTHOR = "Fox, N.A. and O'Mullane, B.A. and Reilly, R.B.",
        TITLE = "Audio-Visual Speaker Identification via Adaptive Fusion Using
Reliability Estimates of Both Modalities",
        BOOKTITLE = AVBPA05,
        YEAR = "2005",
        PAGES = "787",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278899"}

@inproceedings{bb284214,
        AUTHOR = "Zhang, D. and Ghobakhlou, A. and Kasabov, N.",
        TITLE = "An adaptive model of person identification combining speech and image
information",
        BOOKTITLE = ICARCV04,
        YEAR = "2004",
        PAGES = "I: 413-418",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278900"}

@inproceedings{bb284215,
        AUTHOR = "Kratt, J. and Metze, F. and Stiefelhagen, R. and Waibel, A.",
        TITLE = "Large Vocabulary Audio-Visual Speech Recognition Using the Janus Speech
Recognition Toolkit",
        BOOKTITLE = DAGM04,
        YEAR = "2004",
        PAGES = "488-495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278901"}

@inproceedings{bb284216,
        AUTHOR = "Hanafiah, Z.M. and Yamazaki, C. and Nakamura, A. and Kuno, Y.",
        TITLE = "Understanding inexplicit utterances using vision for helper robots",
        BOOKTITLE = ICPR04,
        YEAR = "2004",
        PAGES = "IV: 925-928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278902"}

@inproceedings{bb284217,
        AUTHOR = "Hermann, T. and Henning, T. and Ritter, H.",
        TITLE = "Gesture Desk an Integrated Multi-modal Gestural Workplace
for Sonification",
        BOOKTITLE = GW03,
        YEAR = "2003",
        PAGES = "369-379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278903"}

@inproceedings{bb284218,
        AUTHOR = "Merola, G.",
        TITLE = "The Effects of the Gesture Viewpoint on the Students' Memory of Words
and Stories",
        BOOKTITLE = GW07,
        YEAR = "2007",
        PAGES = "272-281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278904"}

@inproceedings{bb284219,
        AUTHOR = "Merola, G. and Poggi, I.",
        TITLE = "Multimodality and Gestures in the Teacher's Communication",
        BOOKTITLE = GW03,
        YEAR = "2003",
        PAGES = "101-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278905"}

@inproceedings{bb284220,
        AUTHOR = "Kranstedt, A. and Kuhnlein, P. and Wachsmuth, I.",
        TITLE = "Deixis in Multimodal Human Computer Interaction:
An Interdisciplinary Approach",
        BOOKTITLE = GW03,
        YEAR = "2003",
        PAGES = "112-123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278906"}

@inproceedings{bb284221,
        AUTHOR = "Saeed, K. and Kozlowski, M.",
        TITLE = "An Image-Based System for Spoken-Letter Recognition",
        BOOKTITLE = CAIP03,
        YEAR = "2003",
        PAGES = "494-502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278907"}

@inproceedings{bb284222,
        AUTHOR = "Ho, P. and Armington, J.",
        TITLE = "A Dual-Factor Authentication System Featuring Speaker Verification and
Token Technology",
        BOOKTITLE = AVBPA03,
        YEAR = "2003",
        PAGES = "128-136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278908"}

@inproceedings{bb284223,
        AUTHOR = "Fox, N.A. and Reilly, R.B.",
        TITLE = "Audio-Visual Speaker Identification Based on the Use of Dynamic Audio
and Visual Features",
        BOOKTITLE = AVBPA03,
        YEAR = "2003",
        PAGES = "743-751",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278909"}

@inproceedings{bb284224,
        AUTHOR = "Czyz, J. and Bengio, S. and Marcel, C. and Vandendorpe, L.",
        TITLE = "Scalability Analysis of Audio-Visual Person Identity Verification",
        BOOKTITLE = AVBPA03,
        YEAR = "2003",
        PAGES = "752-760",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278910"}

@inproceedings{bb284225,
        AUTHOR = "Bengio, S.",
        TITLE = "Multimodal Authentication Using Asynchronous HMMs",
        BOOKTITLE = AVBPA03,
        YEAR = "2003",
        PAGES = "770-777",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278911"}

@inproceedings{bb284226,
        AUTHOR = "Lucey, S. and Chen, T.H.",
        TITLE = "Improved Audio-Visual Speaker Recognition via the Use of a Hybrid
Combination Strategy",
        BOOKTITLE = AVBPA03,
        YEAR = "2003",
        PAGES = "929-936",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278912"}

@inproceedings{bb284227,
        AUTHOR = "Krahnstoever, N. and Schapira, E. and Kettebeko, S. and Sharma, R.",
        TITLE = "Multimodal human-computer interaction for crisis management systems",
        BOOKTITLE = WACV02,
        YEAR = "2002",
        PAGES = "203-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278913"}

@inproceedings{bb284228,
        AUTHOR = "Kettebekov, S. and Yeasin, M. and Sharma, R.",
        TITLE = "Improving continuous gesture recognition with spoken prosody",
        BOOKTITLE = CVPR03,
        YEAR = "2003",
        PAGES = "I: 565-570",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278914"}

@inproceedings{bb284229,
        AUTHOR = "Poh, N. and Korczak, J.",
        TITLE = "Hybrid Biometric Person Authentication Using Face and Voice Features",
        BOOKTITLE = AVBPA01,
        YEAR = "2001",
        PAGES = "348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278915"}

@inproceedings{bb284230,
        AUTHOR = "Nakamura, S.",
        TITLE = "Fusion of Audio-Visual Information for Integrated Speech Processing",
        BOOKTITLE = AVBPA01,
        YEAR = "2001",
        PAGES = "127",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278916"}

@inproceedings{bb284231,
        AUTHOR = "Sullivan, K.P.H. and Pelecanos, J.",
        TITLE = "Revisiting Carl Bildt's Impostor: Would a Speaker Verification System
Foil Him?",
        BOOKTITLE = AVBPA01,
        YEAR = "2001",
        PAGES = "144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278917"}

@inproceedings{bb284232,
        AUTHOR = "Geiger, G. and Ezzat, T. and Poggio, T.",
        TITLE = "Perceptual Evaluation of Video-Realistic Speech",
        BOOKTITLE = "MIT AIM",
        YEAR = "2003",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278918"}

@inproceedings{bb284233,
        AUTHOR = "Zhang, X.Z. and Merserratt, R.M. and Clements, M.",
        TITLE = "Bimodal fusion in audio-visual speech recognition",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "I: 964-967",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278919"}

@inproceedings{bb284234,
        AUTHOR = "Graf, H.P. and Cosatto, E. and Strom, V. and Huang, F.J.",
        TITLE = "Visual prosody: facial movements accompanying speech",
        BOOKTITLE = AFGR02,
        YEAR = "2002",
        PAGES = "381-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278920"}

@inproceedings{bb284235,
        AUTHOR = "Qi, Y.",
        TITLE = "Learning Algorithms for Audio and Video Processing: 
Independent Component Analysis and Support Vector Machine Based Approaches",
        BOOKTITLE = UMD,
        YEAR = "2000",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278921"}

@inproceedings{bb284236,
        AUTHOR = "Nankaku, Y. and Tokuda, K. and Kitamura, T.",
        TITLE = "Normalized Training for HMM-based Visual Speech Recognition",
        BOOKTITLE = ICIP00,
        YEAR = "2000",
        PAGES = "Vol III: 234-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278922"}

@inproceedings{bb284237,
        AUTHOR = "Zhang, Y. and Levinson, S. and Huang, T.S.",
        TITLE = "Speaker Independent Audio-Visual Speech Recognition",
        BOOKTITLE = ICME00,
        YEAR = "2000",
        PAGES = "TP8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278923"}

@inproceedings{bb284238,
        AUTHOR = "Pan, H. and Huang, T.S.",
        TITLE = "A New Approach to Integrate Audio and Visual Features of Speech",
        BOOKTITLE = ICME00,
        YEAR = "2000",
        PAGES = "TP8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278924"}

@inproceedings{bb284239,
        AUTHOR = "Potamianos, G. and Verma, A. and Neti, C. and Iyengar, G. and Basu, S.",
        TITLE = "A Cascade Image Transform for Speaker Independent Automatic Speech
Reading",
        BOOKTITLE = ICME00,
        YEAR = "2000",
        PAGES = "TP8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278925"}

@inproceedings{bb284240,
        AUTHOR = "Pan, H. and Liang, Z.P. and Huang, T.S.",
        TITLE = "Fusing Audio and Visual Features of Speech",
        BOOKTITLE = ICIP00,
        YEAR = "2000",
        PAGES = "Vol III: 214-217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278926"}

@inproceedings{bb284241,
        AUTHOR = "Faruquie, T.A. and Majumdar, A. and Rajput, N. and Subramaniam, L.V.",
        TITLE = "Large Vocabulary Audio-visual Speech Recognition Using Active Shape
Models",
        BOOKTITLE = ICPR00,
        YEAR = "2000",
        PAGES = "Vol III: 106-109",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278927"}

@inproceedings{bb284242,
        AUTHOR = "Yu, K. and Jiang, X. and Bunke, H.",
        TITLE = "Combining Acoustic and Visual Classifiers for the Recognition of Spoken
Sentences",
        BOOKTITLE = ICPR00,
        YEAR = "2000",
        PAGES = "Vol II: 491-494",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278928"}

@inproceedings{bb284243,
        AUTHOR = "Nam, J. and Alghoniemy, M. and Tewfik, A.H.",
        TITLE = "Audio-visual content-based violent scene characterization",
        BOOKTITLE = ICIP98,
        YEAR = "1998",
        PAGES = "I: 353-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278929"}

@inproceedings{bb284244,
        AUTHOR = "Luettin, J. and Dupont, S.",
        TITLE = "Continuous Audio-Visual Speech Recognition",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 657",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278930"}

@inproceedings{bb284245,
        AUTHOR = "Yang, J. and Xiao, J. and Ritter, M.",
        TITLE = "Automatic Selection of Visemes for Image-based Visual Speech Synthesis",
        BOOKTITLE = ICME00,
        YEAR = "2000",
        PAGES = "TP8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278931"}

@inproceedings{bb284246,
        AUTHOR = "Sharma, R. and Cai, J.Y. and Chakravarthy, S. and Poddar, I. and Sethi, Y.",
        TITLE = "Exploiting Speech/Gesture Co-occurrence for Improving Continuous
Gesture Recognition in Weather Narration",
        BOOKTITLE = AFGR00,
        YEAR = "2000",
        PAGES = "422-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278932"}

@inproceedings{bb284247,
        AUTHOR = "Yamamoto, E. and Nakamura, S. and Shikano, K.",
        TITLE = "Lip Movement Synthesis from Speech Based on Hidden Markov Models",
        BOOKTITLE = AFGR98,
        YEAR = "1998",
        PAGES = "154-159",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278933"}

@inproceedings{bb284248,
        AUTHOR = "Roy, D. and Pentland, A.P.",
        TITLE = "Automatic spoken affect classification and analysis",
        BOOKTITLE = AFGR96,
        YEAR = "1996",
        PAGES = "363-367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278934"}

@inproceedings{bb284249,
        AUTHOR = "Petajan, E.D.",
        TITLE = "An Architecture for Automatic Lipreading to Enhance Speech Recognition",
        BOOKTITLE = CVPR85,
        YEAR = "1985",
        PAGES = "40-47",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT278935"}

@article{bb284250,
        AUTHOR = "Zotkin, D.N. and Duraiswami, R. and Davis, L.S.",
        TITLE = "Joint Audio-Visual Tracking Using Particle Filters",
        JOURNAL = JASP,
        VOLUME = "2002",
        YEAR = "2002",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278936"}

@article{bb284251,
        AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
        TITLE = "Boosted learning in dynamic Bayesian networks for multimodal speaker
detection",
        JOURNAL = PIEEE,
        VOLUME = "91",
        YEAR = "2003",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1355-1369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278937"}

@inproceedings{bb284252,
        AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
        TITLE = "Audio-visual speaker detection using dynamic Bayesian networks",
        BOOKTITLE = AFGR00,
        YEAR = "2000",
        PAGES = "384-390",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278938"}

@inproceedings{bb284253,
        AUTHOR = "Pavlovic, V. and Garg, A. and Rehg, J.M. and Huang, T.S.",
        TITLE = "Multimodal Speaker Detection using Error Feedback Dynamic Bayesian
Networks",
        BOOKTITLE = CVPR00,
        YEAR = "2000",
        PAGES = "II: 34-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278939"}

@inproceedings{bb284254,
        AUTHOR = "Pavlovic, V. and Berry, G. and Huang, T.S.",
        TITLE = "Integration of Audio/Visual Information for Use in
Human-Computer Intelligent Interaction",
        BOOKTITLE = ICIP97,
        YEAR = "1997",
        PAGES = "I: 121-124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278940"}

@inproceedings{bb284255,
        AUTHOR = "Choudhury, T. and Rehg, J.M. and Pavlovic, V. and Pentland, A.P.",
        TITLE = "Boosting and structure learning in dynamic Bayesian networks for
audio-visual speaker detection",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "III: 789-794",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278941"}

@inproceedings{bb284256,
        AUTHOR = "Pavlovic, V.",
        TITLE = "Multimodal tracking and classification of audio-visual features",
        BOOKTITLE = ICIP98,
        YEAR = "1998",
        PAGES = "I: 343-347",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278942"}

@inproceedings{bb284257,
        AUTHOR = "Rehg, J.M. and Murphy, K.P. and Fieguth, P.W.",
        TITLE = "Vision-Based Speaker Detection Using Bayesian Networks",
        BOOKTITLE = CVPR99,
        YEAR = "1999",
        PAGES = "II: 110-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278943"}

@article{bb284258,
        AUTHOR = "Vajaria, H. and Sankar, R. and Kasturi, R.",
        TITLE = "Exploring Co-Occurence Between Speech and Body Movement for
Audio-Guided Video Localization",
        JOURNAL = CirSysVideo,
        VOLUME = "18",
        YEAR = "2008",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1608-1617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278944"}

@inproceedings{bb284259,
        AUTHOR = "Vajaria, H. and Islam, T. and Sarkar, S. and Sankar, R. and Kasturi, R.",
        TITLE = "Audio Segmentation and Speaker Localization in Meeting Videos",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "II: 1150-1153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278945"}

@article{bb284260,
        AUTHOR = "Talantzis, F. and Pnevmatikakis, A. and Constantinides, A.G.",
        TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
        JOURNAL = SMC-B,
        VOLUME = "39",
        YEAR = "2009",
        NUMBER = "1",
        MONTH = "February",
        PAGES = "7-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278946"}

@article{bb284261,
        AUTHOR = "Constantinides, A.G. and Pnevmatikakis, A. and Talantzis, F.",
        TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
        JOURNAL = SMC-B,
        VOLUME = "38",
        YEAR = "2008",
        NUMBER = "3",
        MONTH = "June",
        PAGES = "799-807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278946"}

@article{bb284262,
        AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.",
        TITLE = "Efficient video coding based on audio-visual focus of attention",
        JOURNAL = JVCIR,
        VOLUME = "22",
        YEAR = "2011",
        NUMBER = "8",
        MONTH = "November",
        PAGES = "704-711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278947"}

@article{bb284263,
        AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.",
        TITLE = "Voice activity detection and speaker localization using audiovisual
cues",
        JOURNAL = PRL,
        VOLUME = "33",
        YEAR = "2012",
        NUMBER = "4",
        MONTH = "March",
        PAGES = "373-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278948"}

@inproceedings{bb284264,
        AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.",
        TITLE = "Audiovisual voice activity detection using off-the-shelf cameras",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3886-3890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278949"}

@article{bb284265,
        AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.",
        TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using
Mid-Fusion of SVM and HMMs",
        JOURNAL = MultMed,
        VOLUME = "16",
        YEAR = "2014",
        NUMBER = "4",
        MONTH = "June",
        PAGES = "1032-1044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278950"}

@article{bb284266,
        AUTHOR = "Qian, X. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
        TITLE = "Multi-Speaker Tracking From an Audio-Visual Sensing Device",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2576-2588",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278951"}

@article{bb284267,
        AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.",
        TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank
and Kernelized Sparsity",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "865-869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278952"}

@article{bb284268,
        AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.",
        TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling
Factor Estimation",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1405-1409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278953"}

@article{bb284269,
        AUTHOR = "Ban, Y.T. and Alameda Pineda, X. and Girin, L. and Horaud, R.",
        TITLE = "Variational Bayesian Inference for Audio-Visual Tracking of Multiple
Speakers",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1761-1776",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278954"}

@inproceedings{bb284270,
        AUTHOR = "Ban, Y.T. and Girin, L. and Alameda Pineda, X. and Horaud, R.",
        TITLE = "Exploiting the Complementarity of Audio and Visual Data in
Multi-speaker Tracking",
        BOOKTITLE = CVAVM17,
        YEAR = "2017",
        PAGES = "446-454",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278955"}

@article{bb284271,
        AUTHOR = "Qian, X.Y. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
        TITLE = "Audio-Visual Tracking of Concurrent Speakers",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "942-954",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278956"}

@article{bb284272,
        AUTHOR = "Hu, D. and Wei, Y. and Qian, R. and Lin, W.Y. and Song, R.H. and Wen, J.R.",
        TITLE = "Class-Aware Sounding Objects Localization via Audiovisual
Correspondence",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9844-9859",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278957"}

@article{bb284273,
        AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.",
        TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "338-351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278958"}

@article{bb284274,
        AUTHOR = "Wang, Y. and Qian, X.H. and Zhou, W.",
        TITLE = "Transformer-Prompted Network: Efficient Audio-Visual Segmentation via
Transformer and Prompt Learning",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "516-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278959"}

@article{bb284275,
        AUTHOR = "Wang, H. and Zha, Z.J. and Li, L. and Chen, X.J. and Luo, J.B.",
        TITLE = "Semantic and Relation Modulation for Audio-Visual Event Localization",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7711-7725",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278960"}

@article{bb284276,
        AUTHOR = "Garg, R. and Gao, R.H. and Grauman, K.",
        TITLE = "Visually-Guided Audio Spatialization in Video with Geometry-Aware
Multi-task Learning",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2723-2737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278961"}

@article{bb284277,
        AUTHOR = "Wang, J.X. and Li, C.L. and Zheng, A. and Tang, J. and Luo, B.",
        TITLE = "Looking and Hearing Into Details:
Dual-Enhanced Siamese Adversarial Network for Audio-Visual Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7505-7516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278962"}

@article{bb284278,
        AUTHOR = "Liu, C. and Li, P. and Zhang, H. and Li, L.C. and Huang, Z. and Wang, D.D. and Yu, X.",
        TITLE = "BAVS: Bootstrapping Audio-Visual Segmentation by Integrating
Foundation Knowledge",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "10015-10028",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278963"}

@inproceedings{bb284279,
        AUTHOR = "Liu, C. and Li, P. and Yang, L.Y. and Wang, D.D. and Li, L.C. and Yu, X.",
        TITLE = "Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent
Alignment",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "28922-28931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278964"}

@inproceedings{bb284280,
        AUTHOR = "Liu, C. and Li, P.P. and Yu, Q. and Sheng, H.W. and Wang, D.D. and Li, L.C. and Yu, X.",
        TITLE = "Benchmarking Audio Visual Segmentation for Long-Untrimmed Videos",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "22712-22722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278965"}

@article{bb284281,
        AUTHOR = "Traa, J. and Smaragdis, P.",
        TITLE = "A Wrapped Kalman Filter for Azimuthal Speaker Tracking",
        JOURNAL = SPLetters,
        VOLUME = "20",
        YEAR = "2013",
        NUMBER = "12",
        PAGES = "1257-1260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278966"}

@article{bb284282,
        AUTHOR = "Li, Y. and Liu, H. and Yang, B.",
        TITLE = "STNet: Deep Audio-Visual Fusion Network for Robust Speaker Tracking",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1835-1847",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278967"}

@article{bb284283,
        AUTHOR = "Shi, Z.F. and Wu, Q.B. and Meng, F.M. and Xu, L.F. and Li, H.L.",
        TITLE = "Cross-Modal Cognitive Consensus Guided Audio-Visual Segmentation",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "209-223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278968"}

@article{bb284284,
        AUTHOR = "Senocak, A. and Ryu, H. and Kim, J. and Oh, T.H. and Pfister, H. and Chung, J.S.",
        TITLE = "Toward Interactive Sound Source Localization:
Better Align Sight and Sound!",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "7643-7659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278969"}

@inproceedings{bb284285,
        AUTHOR = "Um, S.J. and Kim, D.J. and Lee, S. and Kim, J.U.",
        TITLE = "Object-aware Sound Source Localization via Audio-Visual Scene
Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8342-8351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278970"}

@inproceedings{bb284286,
        AUTHOR = "Kim, I.H. and Song, Y. and Park, J. and Kim, W.H. and Kwak, S.",
        TITLE = "Improving Sound Source Localization with Joint Slot Attention on
Image and Audio",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3121-3130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278971"}

@inproceedings{bb284287,
        AUTHOR = "Liu, C. and Yang, L.Y. and Li, P. and Wang, D.D. and Li, L. and Yu, X.",
        TITLE = "Dynamic Derivation and Elimination: Audio Visual Segmentation with
Enhanced Audio Semantics",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3131-3141",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278972"}

@inproceedings{bb284288,
        AUTHOR = "Ryu, H. and Kim, S. and Chung, J.S. and Senocak, A.",
        TITLE = "Seeing Speech and Sound: Distinguishing and Locating Audio Sources in
Visual Scenes",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13540-13549",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278973"}

@inproceedings{bb284289,
        AUTHOR = "Wang, X.Z. and Cheng, F. and Bertasius, G.",
        TITLE = "LoCoNet: Long-Short Context Network for Active Speaker Detection",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18462-18472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278974"}

@inproceedings{bb284290,
        AUTHOR = "Huang, C. and Tian, Y.P. and Kumar, A. and Xu, C.L.",
        TITLE = "Egocentric Audio-Visual Object Localization",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22910-22921",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278975"}

@inproceedings{bb284291,
        AUTHOR = "Nugroho, M.A. and Woo, S. and Lee, S. and Kim, C.",
        TITLE = "Audio-Visual Glance Network for Efficient Video Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10116-10125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278976"}

@inproceedings{bb284292,
        AUTHOR = "Liu, Y. and Tan, Y. and Lan, H.Y.",
        TITLE = "Self-Supervised Contrastive Learning for Audio-Visual Action
Recognition",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1000-1004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278977"}

@inproceedings{bb284293,
        AUTHOR = "Mo, S.T. and Morgado, P.",
        TITLE = "Localizing Visual Sounds the Easy Way",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:218-234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278978"}

@inproceedings{bb284294,
        AUTHOR = "Xia, Y. and Zhao, Z.",
        TITLE = "Cross-modal Background Suppression for Audio-Visual Event
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19957-19966",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278979"}

@inproceedings{bb284295,
        AUTHOR = "Jiang, H. and Murdock, C. and Ithapu, V.K.",
        TITLE = "Egocentric Deep Multi-Channel Audio-Visual Active Speaker
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10534-10542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278980"}

@inproceedings{bb284296,
        AUTHOR = "Min, K. and Roy, S. and Tripathi, S. and Guha, T. and Majumdar, S.",
        TITLE = "Learning Long-Term Spatial-Temporal Graphs for Active Speaker Detection",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:371-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278981"}

@inproceedings{bb284297,
        AUTHOR = "Duan, B. and Tang, H. and Wang, W. and Zong, Z.L. and Yang, G.W. and Yan, Y.",
        TITLE = "Audio-Visual Event Localization via Recursive Fusion by Joint
Co-Attention",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "4012-4021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278982"}

@inproceedings{bb284298,
        AUTHOR = "Wu, Y. and Zhu, L.C. and Yan, Y. and Yang, Y.",
        TITLE = "Dual Attention Matching for Audio-Visual Event Localization",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "6291-6299",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278983"}

@inproceedings{bb284299,
        AUTHOR = "Majumder, S. and Al Halah, Z. and Grauman, K.",
        TITLE = "Move2Hear: Active Audio-Visual Source Separation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "275-285",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT278984"}

Last update:Nov 26, 2025 at 20:24:09