@inproceedings{bb256500, AUTHOR = "Xin, L. and Tao, J.H. and Tan, T.N.", TITLE = "Dynamic Audio-Visual Mapping using Fused Hidden Markov Model Inversion Method", BOOKTITLE = ICIP07, YEAR = "2007", PAGES = "III: 293-296", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251309"} @inproceedings{bb256501, AUTHOR = "Barzelay, Z. and Schechner, Y.Y.", TITLE = "Harmony in Motion", BOOKTITLE = CVPR07, YEAR = "2007", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251310"} @inproceedings{bb256502, AUTHOR = "O'Donovan, A. and Duraiswami, R. and Neumann, J.", TITLE = "Microphone Arrays as Generalized Cameras for Integrated Audio Visual Processing", BOOKTITLE = CVPR07, YEAR = "2007", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251311"} @inproceedings{bb256503, AUTHOR = "Abbas, J. and Dagli, C.K. and Huang, T.S.", TITLE = "A Multimodality Framework for Creating Speaker/Non-Speaker Profile Databases for Real-World Video", BOOKTITLE = SLAM07, YEAR = "2007", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251312"} @inproceedings{bb256504, AUTHOR = "Kushal, A. and Rahurkar, M. and Fei Fei, L. and Ponce, J. and Huang, T.", TITLE = "Audio-Visual Speaker Localization Using Graphical Models", BOOKTITLE = ICPR06, YEAR = "2006", PAGES = "I: 291-294", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251313"} @inproceedings{bb256505, AUTHOR = "Tsuji, T. and Yamamoto, K. and Ishii, I.", TITLE = "Real-time Sound Source Localization Based on Audiovisual Frequency Integration", BOOKTITLE = ICPR06, YEAR = "2006", PAGES = "IV: 322-325", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251314"} @inproceedings{bb256506, AUTHOR = "Monaci, G. and Vandergheynst, P.", TITLE = "Audiovisual Gestalts", BOOKTITLE = PercOrg06, YEAR = "2006", PAGES = "200", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251315"} @inproceedings{bb256507, AUTHOR = "Zhu, Z.G. and Li, W.H. and Molina, E. and Wolberg, G.", TITLE = "LDV Sensing and Processing for Remote Hearing in a Multimodal Surveillance System", BOOKTITLE = MSCSAS07, YEAR = "2007", PAGES = "1-2", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251316"} @inproceedings{bb256508, AUTHOR = "Zhu, Z.G. and Li, W.H. and Wolberg, G.", TITLE = "Integrating LDV Audio and IR Video for Remote Multimodal Surveillance", BOOKTITLE = OTCBVS05, YEAR = "2005", PAGES = "III: 10-10", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251317"} @inproceedings{bb256509, AUTHOR = "Wu, Z.Y. and Cai, L.H. and Meng, H.", TITLE = "Multi-level Fusion of Audio and Visual Features for Speaker Identification", BOOKTITLE = ICB06, YEAR = "2006", PAGES = "493-499", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251318"} @inproceedings{bb256510, AUTHOR = "Yang, P. and Yang, Y.C. and Wu, Z.H.", TITLE = "Exploiting Glottal Information in Speaker Recognition Using Parallel GMMs", BOOKTITLE = AVBPA05, YEAR = "2005", PAGES = "804", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251319"} @inproceedings{bb256511, AUTHOR = "Lei, Z.C.", TITLE = "Combining the Likelihood and the Kullback-Leibler Distance in Estimating the Universal Background Model for Speaker Verification Using SVM", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4553-4556", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251320"} @inproceedings{bb256512, AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.", TITLE = "An UBM-Based Reference Space for Speaker Recognition", BOOKTITLE = ICPR06, YEAR = "2006", PAGES = "IV: 318-321", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251321"} @inproceedings{bb256513, AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.", TITLE = "Constructing the Discriminative Kernels Using GMM for Text-Independent Speaker Identification", BOOKTITLE = IWBRS05, YEAR = "2005", PAGES = "165", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251322"} @inproceedings{bb256514, AUTHOR = "Lei, Z.C. and Yang, Y.C. and Wu, Z.H.", TITLE = "Speaker Identification Using the VQ-Based Discriminative Kernels", BOOKTITLE = AVBPA05, YEAR = "2005", PAGES = "797", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251323"} @inproceedings{bb256515, AUTHOR = "Li, D.D. and Yang, Y.C. and Wu, Z.H.", TITLE = "Dynamic Bayesian Networks for Audio-Visual Speaker Recognition", BOOKTITLE = ICB06, YEAR = "2006", PAGES = "539-545", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251324"} @inproceedings{bb256516, AUTHOR = "Fox, N.A. and O'Mullane, B.A. and Reilly, R.B.", TITLE = "VALID: A New Practical Audio-Visual Database, and Comparative Results", BOOKTITLE = AVBPA05, YEAR = "2005", PAGES = "777", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251325"} @book{bb256517, AUTHOR = "Sharma, P. and Reilly, R.B.", TITLE = "The UCD Colour Face Image Database for Face Detection", PUBLISHER = "Online", YEAR = "1998", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251326"} @inproceedings{bb256518, AUTHOR = "Fox, N.A. and O'Mullane, B.A. and Reilly, R.B.", TITLE = "Audio-Visual Speaker Identification via Adaptive Fusion Using Reliability Estimates of Both Modalities", BOOKTITLE = AVBPA05, YEAR = "2005", PAGES = "787", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251327"} @inproceedings{bb256519, AUTHOR = "Zhang, D. and Ghobakhlou, A. and Kasabov, N.", TITLE = "An adaptive model of person identification combining speech and image information", BOOKTITLE = ICARCV04, YEAR = "2004", PAGES = "I: 413-418", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251328"} @inproceedings{bb256520, AUTHOR = "Kratt, J. and Metze, F. and Stiefelhagen, R. and Waibel, A.", TITLE = "Large Vocabulary Audio-Visual Speech Recognition Using the Janus Speech Recognition Toolkit", BOOKTITLE = DAGM04, YEAR = "2004", PAGES = "488-495", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251329"} @inproceedings{bb256521, AUTHOR = "Hanafiah, Z.M. and Yamazaki, C. and Nakamura, A. and Kuno, Y.", TITLE = "Understanding inexplicit utterances using vision for helper robots", BOOKTITLE = ICPR04, YEAR = "2004", PAGES = "IV: 925-928", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251330"} @inproceedings{bb256522, AUTHOR = "Hermann, T. and Henning, T. and Ritter, H.", TITLE = "Gesture Desk an Integrated Multi-modal Gestural Workplace for Sonification", BOOKTITLE = GW03, YEAR = "2003", PAGES = "369-379", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251331"} @inproceedings{bb256523, AUTHOR = "Merola, G.", TITLE = "The Effects of the Gesture Viewpoint on the Students' Memory of Words and Stories", BOOKTITLE = GW07, YEAR = "2007", PAGES = "272-281", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251332"} @inproceedings{bb256524, AUTHOR = "Merola, G. and Poggi, I.", TITLE = "Multimodality and Gestures in the Teacher's Communication", BOOKTITLE = GW03, YEAR = "2003", PAGES = "101-111", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251333"} @inproceedings{bb256525, AUTHOR = "Kranstedt, A. and Kuhnlein, P. and Wachsmuth, I.", TITLE = "Deixis in Multimodal Human Computer Interaction: An Interdisciplinary Approach", BOOKTITLE = GW03, YEAR = "2003", PAGES = "112-123", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251334"} @inproceedings{bb256526, AUTHOR = "Saeed, K. and Kozlowski, M.", TITLE = "An Image-Based System for Spoken-Letter Recognition", BOOKTITLE = CAIP03, YEAR = "2003", PAGES = "494-502", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251335"} @inproceedings{bb256527, AUTHOR = "Ho, P. and Armington, J.", TITLE = "A Dual-Factor Authentication System Featuring Speaker Verification and Token Technology", BOOKTITLE = AVBPA03, YEAR = "2003", PAGES = "128-136", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251336"} @inproceedings{bb256528, AUTHOR = "Fox, N.A. and Reilly, R.B.", TITLE = "Audio-Visual Speaker Identification Based on the Use of Dynamic Audio and Visual Features", BOOKTITLE = AVBPA03, YEAR = "2003", PAGES = "743-751", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251337"} @inproceedings{bb256529, AUTHOR = "Czyz, J. and Bengio, S. and Marcel, C. and Vandendorpe, L.", TITLE = "Scalability Analysis of Audio-Visual Person Identity Verification", BOOKTITLE = AVBPA03, YEAR = "2003", PAGES = "752-760", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251338"} @inproceedings{bb256530, AUTHOR = "Bengio, S.", TITLE = "Multimodal Authentication Using Asynchronous HMMs", BOOKTITLE = AVBPA03, YEAR = "2003", PAGES = "770-777", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251339"} @inproceedings{bb256531, AUTHOR = "Lucey, S. and Chen, T.H.", TITLE = "Improved Audio-Visual Speaker Recognition via the Use of a Hybrid Combination Strategy", BOOKTITLE = AVBPA03, YEAR = "2003", PAGES = "929-936", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251340"} @inproceedings{bb256532, AUTHOR = "Krahnstoever, N. and Schapira, E. and Kettebeko, S. and Sharma, R.", TITLE = "Multimodal human-computer interaction for crisis management systems", BOOKTITLE = WACV02, YEAR = "2002", PAGES = "203-207", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251341"} @inproceedings{bb256533, AUTHOR = "Kettebekov, S. and Yeasin, M. and Sharma, R.", TITLE = "Improving continuous gesture recognition with spoken prosody", BOOKTITLE = CVPR03, YEAR = "2003", PAGES = "I: 565-570", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251342"} @inproceedings{bb256534, AUTHOR = "Poh, N. and Korczak, J.", TITLE = "Hybrid Biometric Person Authentication Using Face and Voice Features", BOOKTITLE = AVBPA01, YEAR = "2001", PAGES = "348", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251343"} @inproceedings{bb256535, AUTHOR = "Nakamura, S.", TITLE = "Fusion of Audio-Visual Information for Integrated Speech Processing", BOOKTITLE = AVBPA01, YEAR = "2001", PAGES = "127", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251344"} @inproceedings{bb256536, AUTHOR = "Sullivan, K.P.H. and Pelecanos, J.", TITLE = "Revisiting Carl Bildt's Impostor: Would a Speaker Verification System Foil Him?", BOOKTITLE = AVBPA01, YEAR = "2001", PAGES = "144", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251345"} @inproceedings{bb256537, AUTHOR = "Geiger, G. and Ezzat, T. and Poggio, T.", TITLE = "Perceptual Evaluation of Video-Realistic Speech", BOOKTITLE = "MIT AIM", YEAR = "2003", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251346"} @inproceedings{bb256538, AUTHOR = "Zhang, X.Z. and Merserratt, R.M. and Clements, M.", TITLE = "Bimodal fusion in audio-visual speech recognition", BOOKTITLE = ICIP02, YEAR = "2002", PAGES = "I: 964-967", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251347"} @inproceedings{bb256539, AUTHOR = "Graf, H.P. and Cosatto, E. and Strom, V. and Huang, F.J.", TITLE = "Visual prosody: facial movements accompanying speech", BOOKTITLE = AFGR02, YEAR = "2002", PAGES = "381-386", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251348"} @inproceedings{bb256540, AUTHOR = "Qi, Y.", TITLE = "Learning Algorithms for Audio and Video Processing: Independent Component Analysis and Support Vector Machine Based Approaches", BOOKTITLE = UMD, YEAR = "2000", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251349"} @inproceedings{bb256541, AUTHOR = "Nankaku, Y. and Tokuda, K. and Kitamura, T.", TITLE = "Normalized Training for HMM-based Visual Speech Recognition", BOOKTITLE = ICIP00, YEAR = "2000", PAGES = "Vol III: 234-237", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251350"} @inproceedings{bb256542, AUTHOR = "Zhang, Y. and Levinson, S. and Huang, T.S.", TITLE = "Speaker Independent Audio-Visual Speech Recognition", BOOKTITLE = ICME00, YEAR = "2000", PAGES = "TP8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251351"} @inproceedings{bb256543, AUTHOR = "Pan, H. and Huang, T.S.", TITLE = "A New Approach to Integrate Audio and Visual Features of Speech", BOOKTITLE = ICME00, YEAR = "2000", PAGES = "TP8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251352"} @inproceedings{bb256544, AUTHOR = "Potamianos, G. and Verma, A. and Neti, C. and Iyengar, G. and Basu, S.", TITLE = "A Cascade Image Transform for Speaker Independent Automatic Speech Reading", BOOKTITLE = ICME00, YEAR = "2000", PAGES = "TP8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251353"} @inproceedings{bb256545, AUTHOR = "Pan, H. and Liang, Z.P. and Huang, T.S.", TITLE = "Fusing Audio and Visual Features of Speech", BOOKTITLE = ICIP00, YEAR = "2000", PAGES = "Vol III: 214-217", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251354"} @inproceedings{bb256546, AUTHOR = "Faruquie, T.A. and Majumdar, A. and Rajput, N. and Subramaniam, L.V.", TITLE = "Large Vocabulary Audio-visual Speech Recognition Using Active Shape Models", BOOKTITLE = ICPR00, YEAR = "2000", PAGES = "Vol III: 106-109", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251355"} @inproceedings{bb256547, AUTHOR = "Yu, K. and Jiang, X. and Bunke, H.", TITLE = "Combining Acoustic and Visual Classifiers for the Recognition of Spoken Sentences", BOOKTITLE = ICPR00, YEAR = "2000", PAGES = "Vol II: 491-494", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251356"} @inproceedings{bb256548, AUTHOR = "Nam, J. and Alghoniemy, M. and Tewfik, A.H.", TITLE = "Audio-visual content-based violent scene characterization", BOOKTITLE = ICIP98, YEAR = "1998", PAGES = "I: 353-357", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251357"} @inproceedings{bb256549, AUTHOR = "Luettin, J. and Dupont, S.", TITLE = "Continuous Audio-Visual Speech Recognition", BOOKTITLE = ECCV98, YEAR = "1998", PAGES = "II: 657", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251358"} @inproceedings{bb256550, AUTHOR = "Yang, J. and Xiao, J. and Ritter, M.", TITLE = "Automatic Selection of Visemes for Image-based Visual Speech Synthesis", BOOKTITLE = ICME00, YEAR = "2000", PAGES = "TP8", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251359"} @inproceedings{bb256551, AUTHOR = "Sharma, R. and Cai, J.Y. and Chakravarthy, S. and Poddar, I. and Sethi, Y.", TITLE = "Exploiting Speech/Gesture Co-occurrence for Improving Continuous Gesture Recognition in Weather Narration", BOOKTITLE = AFGR00, YEAR = "2000", PAGES = "422-427", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251360"} @inproceedings{bb256552, AUTHOR = "Yamamoto, E. and Nakamura, S. and Shikano, K.", TITLE = "Lip Movement Synthesis from Speech Based on Hidden Markov Models", BOOKTITLE = AFGR98, YEAR = "1998", PAGES = "154-159", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251361"} @inproceedings{bb256553, AUTHOR = "Roy, D. and Pentland, A.P.", TITLE = "Automatic spoken affect classification and analysis", BOOKTITLE = AFGR96, YEAR = "1996", PAGES = "363-367", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251362"} @inproceedings{bb256554, AUTHOR = "Petajan, E.D.", TITLE = "An Architecture for Automatic Lipreading to Enhance Speech Recognition", BOOKTITLE = CVPR85, YEAR = "1985", PAGES = "40-47", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251363"} @article{bb256555, AUTHOR = "Zotkin, D.N. and Duraiswami, R. and Davis, L.S.", TITLE = "Joint Audio-Visual Tracking Using Particle Filters", JOURNAL = JASP, VOLUME = "2002", YEAR = "2002", NUMBER = "11", MONTH = "November", PAGES = "1154", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251364"} @article{bb256556, AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.", TITLE = "Boosted learning in dynamic Bayesian networks for multimodal speaker detection", JOURNAL = PIEEE, VOLUME = "91", YEAR = "2003", NUMBER = "9", MONTH = "September", PAGES = "1355-1369", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251365"} @inproceedings{bb256557, AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.", TITLE = "Audio-visual speaker detection using dynamic Bayesian networks", BOOKTITLE = AFGR00, YEAR = "2000", PAGES = "384-390", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251366"} @inproceedings{bb256558, AUTHOR = "Pavlovic, V. and Garg, A. and Rehg, J.M. and Huang, T.S.", TITLE = "Multimodal Speaker Detection using Error Feedback Dynamic Bayesian Networks", BOOKTITLE = CVPR00, YEAR = "2000", PAGES = "II: 34-41", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251367"} @inproceedings{bb256559, AUTHOR = "Pavlovic, V. and Berry, G. and Huang, T.S.", TITLE = "Integration of Audio/Visual Information for Use in Human-Computer Intelligent Interaction", BOOKTITLE = ICIP97, YEAR = "1997", PAGES = "I: 121-124", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251368"} @inproceedings{bb256560, AUTHOR = "Choudhury, T. and Rehg, J.M. and Pavlovic, V. and Pentland, A.P.", TITLE = "Boosting and structure learning in dynamic Bayesian networks for audio-visual speaker detection", BOOKTITLE = ICPR02, YEAR = "2002", PAGES = "III: 789-794", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251369"} @inproceedings{bb256561, AUTHOR = "Pavlovic, V.", TITLE = "Multimodal tracking and classification of audio-visual features", BOOKTITLE = ICIP98, YEAR = "1998", PAGES = "I: 343-347", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251370"} @inproceedings{bb256562, AUTHOR = "Rehg, J.M. and Murphy, K.P. and Fieguth, P.W.", TITLE = "Vision-Based Speaker Detection Using Bayesian Networks", BOOKTITLE = CVPR99, YEAR = "1999", PAGES = "II: 110-116", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251371"} @article{bb256563, AUTHOR = "Talantzis, F. and Pnevmatikakis, A. and Constantinides, A.G.", TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments", JOURNAL = SMC-B, VOLUME = "39", YEAR = "2009", NUMBER = "1", MONTH = "February", PAGES = "7-15", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251372"} @article{bb256564, AUTHOR = "Constantinides, A.G. and Pnevmatikakis, A. and Talantzis, F.", TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments", JOURNAL = SMC-B, VOLUME = "38", YEAR = "2008", NUMBER = "3", MONTH = "June", PAGES = "799-807", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251372"} @article{bb256565, AUTHOR = "Qian, X. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.", TITLE = "Multi-Speaker Tracking From an Audio-Visual Sensing Device", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "2576-2588", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251373"} @article{bb256566, AUTHOR = "Ban, Y.T. and Alameda Pineda, X. and Girin, L. and Horaud, R.", TITLE = "Variational Bayesian Inference for Audio-Visual Tracking of Multiple Speakers", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "5", MONTH = "May", PAGES = "1761-1776", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251374"} @inproceedings{bb256567, AUTHOR = "Ban, Y.T. and Girin, L. and Alameda Pineda, X. and Horaud, R.", TITLE = "Exploiting the Complementarity of Audio and Visual Data in Multi-speaker Tracking", BOOKTITLE = CVAVM17, YEAR = "2017", PAGES = "446-454", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251375"} @article{bb256568, AUTHOR = "Qian, X.Y. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.", TITLE = "Audio-Visual Tracking of Concurrent Speakers", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "942-954", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251376"} @article{bb256569, AUTHOR = "Hu, D. and Wei, Y. and Qian, R. and Lin, W.Y. and Song, R.H. and Wen, J.R.", TITLE = "Class-Aware Sounding Objects Localization via Audiovisual Correspondence", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "9844-9859", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251377"} @article{bb256570, AUTHOR = "Wang, H. and Zha, Z.J. and Li, L. and Chen, X.J. and Luo, J.B.", TITLE = "Semantic and Relation Modulation for Audio-Visual Event Localization", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7711-7725", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251378"} @article{bb256571, AUTHOR = "Garg, R. and Gao, R.H. and Grauman, K.", TITLE = "Visually-Guided Audio Spatialization in Video with Geometry-Aware Multi-task Learning", JOURNAL = IJCV, VOLUME = "131", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "2723-2737", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251379"} @article{bb256572, AUTHOR = "Wang, J.X. and Li, C.L. and Zheng, A. and Tang, J. and Luo, B.", TITLE = "Looking and Hearing Into Details: Dual-Enhanced Siamese Adversarial Network for Audio-Visual Matching", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "7505-7516", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251380"} @inproceedings{bb256573, AUTHOR = "Nugroho, M.A. and Woo, S. and Lee, S. and Kim, C.", TITLE = "Audio-Visual Glance Network for Efficient Video Recognition", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "10116-10125", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251381"} @inproceedings{bb256574, AUTHOR = "Liu, Y. and Tan, Y. and Lan, H.Y.", TITLE = "Self-Supervised Contrastive Learning for Audio-Visual Action Recognition", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1000-1004", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251382"} @inproceedings{bb256575, AUTHOR = "Min, K. and Roy, S. and Tripathi, S. and Guha, T. and Majumdar, S.", TITLE = "Learning Long-Term Spatial-Temporal Graphs for Active Speaker Detection", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:371-387", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251383"} @inproceedings{bb256576, AUTHOR = "Majumder, S. and Al Halah, Z. and Grauman, K.", TITLE = "Move2Hear: Active Audio-Visual Source Separation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "275-285", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251384"} @inproceedings{bb256577, AUTHOR = "Majumder, S. and Grauman, K.", TITLE = "Active Audio-Visual Separation of Dynamic Sound Sources", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:551-569", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251385"} @inproceedings{bb256578, AUTHOR = "Alcazar, J.L. and Heilbron, F.C. and Thabet, A.K. and Ghanem, B.", TITLE = "MAAS: Multi-modal Assignation for Active Speaker Detection", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "265-274", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251386"} @inproceedings{bb256579, AUTHOR = "Kopuklu, O. and Taseska, M. and Rigoll, G.", TITLE = "How to Design a Three-Stage Architecture for Audio-Visual Active Speaker Detection in the Wild", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1173-1183", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251387"} @inproceedings{bb256580, AUTHOR = "Wu, Y. and Yang, Y.", TITLE = "Exploring Heterogeneous Clues for Weakly-Supervised Audio-Visual Video Parsing", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "1326-1335", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251388"} @inproceedings{bb256581, AUTHOR = "Liu, H. and Sun, Y.H. and Li, Y.D. and Yang, B.", TITLE = "3D Audio-Visual Speaker Tracking with A Novel Particle Filter", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "7343-7348", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251389"} @inproceedings{bb256582, AUTHOR = "Liu, H. and Li, Y.D. and Yang, B.", TITLE = "3D Audio-Visual Speaker Tracking with A Two-Layer Particle Filter", BOOKTITLE = ICIP19, YEAR = "2019", PAGES = "1955-1959", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251390"} @inproceedings{bb256583, AUTHOR = "He, G. and Liu, X. and Fan, F. and You, J.", TITLE = "Image2Audio: Facilitating Semi-supervised Audio Emotion Recognition with Facial Expression Image", BOOKTITLE = VL3W20, YEAR = "2020", PAGES = "3978-3983", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251391"} @inproceedings{bb256584, AUTHOR = "Le, N. and Heili, A. and Wu, D. and Odobez, J.M.", TITLE = "Temporally subsampled detection for accurate and efficient face tracking and diarization", BOOKTITLE = ICPR16, YEAR = "2016", PAGES = "1792-1797", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251392"} @inproceedings{bb256585, AUTHOR = "Saeed, A. and Al Hamadi, A. and Heuer, M.", TITLE = "Speaker Tracking Using Multi-modal Fusion Framework", BOOKTITLE = ICISP12, YEAR = "2012", PAGES = "539-546", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251393"} @inproceedings{bb256586, AUTHOR = "Katsarakis, N. and Talantzis, F. and Pnevmatikakis, A. and Polymenakos, L.", TITLE = "The AIT 3D Audio / Visual Person Tracker for CLEAR 2007", BOOKTITLE = MTPH07, YEAR = "2007", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251394"} @inproceedings{bb256587, AUTHOR = "Megherbi, N. and Ambellouis, S. and Colot, O. and Cabestaing, F.", TITLE = "Data Association in Multi-Target Tracking Using Belief Theory: Handling Target Emergence and Disappearance Issue", BOOKTITLE = AVSBS05, YEAR = "2005", PAGES = "517-521", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251395"} @inproceedings{bb256588, AUTHOR = "Megherbi, N. and Ambellouis, S. and Colot, O. and Cabestaing, F.", TITLE = "Joint audio-video people tracking using belief theory", BOOKTITLE = AVSBS05, YEAR = "2005", PAGES = "135-140", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251396"} @inproceedings{bb256589, AUTHOR = "Li, X. and Sun, L. and Tao, L.M. and Xu, G.Y. and Jia, Y.", TITLE = "A Speaker Tracking Algorithm Based on Audio and Visual Information Fusion Using Particle Filter", BOOKTITLE = ICIAR04, YEAR = "2004", PAGES = "II: 572-580", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251397"} @inproceedings{bb256590, AUTHOR = "Lange, C. and Hermann, T. and Ritter, H.", TITLE = "Holistic Body Tracking for Gestural Interfaces", BOOKTITLE = GW03, YEAR = "2003", PAGES = "132-139", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251398"} @inproceedings{bb256591, AUTHOR = "Blake, A. and Gangnet, M. and Perez, P. and Vermaak, J.", TITLE = "Integrated tracking with vision and sound", BOOKTITLE = CIAP01, YEAR = "2001", PAGES = "354-357", BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT251399"} @article{bb256592, AUTHOR = "Mirhosseini, A.R. and Yan, H. and Lam, K.M.", TITLE = "Adaptive Deformable Model for Mouth Boundary Detection", JOURNAL = OptEng, VOLUME = "37", YEAR = "1998", NUMBER = "3", MONTH = "March", PAGES = "869-875", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251400"} @inproceedings{bb256593, AUTHOR = "Mirhosseini, A.R. and Lam, K.M. and Yan, H.", TITLE = "An adaptive deformable template for mouth boundary modeling", BOOKTITLE = CIAP97, YEAR = "1997", PAGES = "I: 559-566", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251401"} @inproceedings{bb256594, AUTHOR = "Mirhosseini, A.R. and Yan, H. and Lam, K.M. and Chen, C.", TITLE = "A Hierarchical and Adaptive Deformable Model for Mouth Boundary Detection", BOOKTITLE = ICIP97, YEAR = "1997", PAGES = "II: 756-759", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251402"} @article{bb256595, AUTHOR = "Li, C.H. and Yuen, P.C.", TITLE = "Regularized color clustering in medical image database", JOURNAL = MedImg, VOLUME = "19", YEAR = "2000", NUMBER = "11", MONTH = "November", PAGES = "1150-1155", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251403"} @inproceedings{bb256596, AUTHOR = "Li, C.H. and Yuen, P.C.", TITLE = "Normalized sampling for color clustering in medical diagnosis", BOOKTITLE = ICPR02, YEAR = "2002", PAGES = "III: 819-822", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251404"} @article{bb256597, AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.", TITLE = "Modelling and segmentation of lip area in face images", JOURNAL = VISP, VOLUME = "149", YEAR = "2002", NUMBER = "3", MONTH = "June", PAGES = "179-184", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251405"} @inproceedings{bb256598, AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.", TITLE = "Spatial clustering of pixels in the mouth area of face images", BOOKTITLE = CIAP01, YEAR = "2001", PAGES = "36-41", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251406"} @inproceedings{bb256599, AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.", TITLE = "Segmentation of Lip Pixels for Lip Tracker Initialisation", BOOKTITLE = ICIP01, YEAR = "2001", PAGES = "I: 50-53", BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT251407"}