@inproceedings{bb289800,
        AUTHOR = "Sharma, R. and Cai, J.Y. and Chakravarthy, S. and Poddar, I. and Sethi, Y.",
        TITLE = "Exploiting Speech/Gesture Co-occurrence for Improving Continuous
Gesture Recognition in Weather Narration",
        BOOKTITLE = AFGR00,
        YEAR = "2000",
        PAGES = "422-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT284433"}

@inproceedings{bb289801,
        AUTHOR = "Yamamoto, E. and Nakamura, S. and Shikano, K.",
        TITLE = "Lip Movement Synthesis from Speech Based on Hidden Markov Models",
        BOOKTITLE = AFGR98,
        YEAR = "1998",
        PAGES = "154-159",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT284434"}

@inproceedings{bb289802,
        AUTHOR = "Roy, D. and Pentland, A.P.",
        TITLE = "Automatic spoken affect classification and analysis",
        BOOKTITLE = AFGR96,
        YEAR = "1996",
        PAGES = "363-367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT284435"}

@inproceedings{bb289803,
        AUTHOR = "Petajan, E.D.",
        TITLE = "An Architecture for Automatic Lipreading to Enhance Speech Recognition",
        BOOKTITLE = CVPR85,
        YEAR = "1985",
        PAGES = "40-47",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT284436"}

@article{bb289804,
        AUTHOR = "Zotkin, D.N. and Duraiswami, R. and Davis, L.S.",
        TITLE = "Joint Audio-Visual Tracking Using Particle Filters",
        JOURNAL = JASP,
        VOLUME = "2002",
        YEAR = "2002",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284437"}

@article{bb289805,
        AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
        TITLE = "Boosted learning in dynamic Bayesian networks for multimodal speaker
detection",
        JOURNAL = PIEEE,
        VOLUME = "91",
        YEAR = "2003",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1355-1369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284438"}

@inproceedings{bb289806,
        AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
        TITLE = "Audio-visual speaker detection using dynamic Bayesian networks",
        BOOKTITLE = AFGR00,
        YEAR = "2000",
        PAGES = "384-390",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284439"}

@inproceedings{bb289807,
        AUTHOR = "Pavlovic, V. and Garg, A. and Rehg, J.M. and Huang, T.S.",
        TITLE = "Multimodal Speaker Detection using Error Feedback Dynamic Bayesian
Networks",
        BOOKTITLE = CVPR00,
        YEAR = "2000",
        PAGES = "II: 34-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284440"}

@inproceedings{bb289808,
        AUTHOR = "Pavlovic, V. and Berry, G. and Huang, T.S.",
        TITLE = "Integration of Audio/Visual Information for Use in
Human-Computer Intelligent Interaction",
        BOOKTITLE = ICIP97,
        YEAR = "1997",
        PAGES = "I: 121-124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284441"}

@inproceedings{bb289809,
        AUTHOR = "Choudhury, T. and Rehg, J.M. and Pavlovic, V. and Pentland, A.P.",
        TITLE = "Boosting and structure learning in dynamic Bayesian networks for
audio-visual speaker detection",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "III: 789-794",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284442"}

@inproceedings{bb289810,
        AUTHOR = "Pavlovic, V.",
        TITLE = "Multimodal tracking and classification of audio-visual features",
        BOOKTITLE = ICIP98,
        YEAR = "1998",
        PAGES = "I: 343-347",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284443"}

@inproceedings{bb289811,
        AUTHOR = "Rehg, J.M. and Murphy, K.P. and Fieguth, P.W.",
        TITLE = "Vision-Based Speaker Detection Using Bayesian Networks",
        BOOKTITLE = CVPR99,
        YEAR = "1999",
        PAGES = "II: 110-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284444"}

@article{bb289812,
        AUTHOR = "Vajaria, H. and Sankar, R. and Kasturi, R.",
        TITLE = "Exploring Co-Occurence Between Speech and Body Movement for
Audio-Guided Video Localization",
        JOURNAL = CirSysVideo,
        VOLUME = "18",
        YEAR = "2008",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1608-1617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284445"}

@inproceedings{bb289813,
        AUTHOR = "Vajaria, H. and Islam, T. and Sarkar, S. and Sankar, R. and Kasturi, R.",
        TITLE = "Audio Segmentation and Speaker Localization in Meeting Videos",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "II: 1150-1153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284446"}

@article{bb289814,
        AUTHOR = "Talantzis, F. and Pnevmatikakis, A. and Constantinides, A.G.",
        TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
        JOURNAL = SMC-B,
        VOLUME = "39",
        YEAR = "2009",
        NUMBER = "1",
        MONTH = "February",
        PAGES = "7-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284447"}

@article{bb289815,
        AUTHOR = "Constantinides, A.G. and Pnevmatikakis, A. and Talantzis, F.",
        TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
        JOURNAL = SMC-B,
        VOLUME = "38",
        YEAR = "2008",
        NUMBER = "3",
        MONTH = "June",
        PAGES = "799-807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284447"}

@article{bb289816,
        AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.",
        TITLE = "Efficient video coding based on audio-visual focus of attention",
        JOURNAL = JVCIR,
        VOLUME = "22",
        YEAR = "2011",
        NUMBER = "8",
        MONTH = "November",
        PAGES = "704-711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284448"}

@article{bb289817,
        AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.",
        TITLE = "Voice activity detection and speaker localization using audiovisual
cues",
        JOURNAL = PRL,
        VOLUME = "33",
        YEAR = "2012",
        NUMBER = "4",
        MONTH = "March",
        PAGES = "373-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284449"}

@inproceedings{bb289818,
        AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.",
        TITLE = "Audiovisual voice activity detection using off-the-shelf cameras",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3886-3890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284450"}

@article{bb289819,
        AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.",
        TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using
Mid-Fusion of SVM and HMMs",
        JOURNAL = MultMed,
        VOLUME = "16",
        YEAR = "2014",
        NUMBER = "4",
        MONTH = "June",
        PAGES = "1032-1044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284451"}

@article{bb289820,
        AUTHOR = "Qian, X. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
        TITLE = "Multi-Speaker Tracking From an Audio-Visual Sensing Device",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2576-2588",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284452"}

@article{bb289821,
        AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.",
        TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank
and Kernelized Sparsity",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "865-869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284453"}

@article{bb289822,
        AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.",
        TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling
Factor Estimation",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1405-1409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284454"}

@article{bb289823,
        AUTHOR = "Ban, Y.T. and Alameda Pineda, X. and Girin, L. and Horaud, R.",
        TITLE = "Variational Bayesian Inference for Audio-Visual Tracking of Multiple
Speakers",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1761-1776",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284455"}

@inproceedings{bb289824,
        AUTHOR = "Ban, Y.T. and Girin, L. and Alameda Pineda, X. and Horaud, R.",
        TITLE = "Exploiting the Complementarity of Audio and Visual Data in
Multi-speaker Tracking",
        BOOKTITLE = CVAVM17,
        YEAR = "2017",
        PAGES = "446-454",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284456"}

@article{bb289825,
        AUTHOR = "Qian, X.Y. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
        TITLE = "Audio-Visual Tracking of Concurrent Speakers",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "942-954",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284457"}

@article{bb289826,
        AUTHOR = "Hu, D. and Wei, Y. and Qian, R. and Lin, W.Y. and Song, R.H. and Wen, J.R.",
        TITLE = "Class-Aware Sounding Objects Localization via Audiovisual
Correspondence",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9844-9859",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284458"}

@article{bb289827,
        AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.",
        TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "338-351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284459"}

@article{bb289828,
        AUTHOR = "Wang, Y. and Qian, X.H. and Zhou, W.",
        TITLE = "Transformer-Prompted Network: Efficient Audio-Visual Segmentation via
Transformer and Prompt Learning",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "516-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284460"}

@article{bb289829,
        AUTHOR = "Wang, H. and Zha, Z.J. and Li, L. and Chen, X.J. and Luo, J.B.",
        TITLE = "Semantic and Relation Modulation for Audio-Visual Event Localization",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7711-7725",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284461"}

@article{bb289830,
        AUTHOR = "Garg, R. and Gao, R.H. and Grauman, K.",
        TITLE = "Visually-Guided Audio Spatialization in Video with Geometry-Aware
Multi-task Learning",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2723-2737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284462"}

@article{bb289831,
        AUTHOR = "Wang, J.X. and Li, C.L. and Zheng, A. and Tang, J. and Luo, B.",
        TITLE = "Looking and Hearing Into Details:
Dual-Enhanced Siamese Adversarial Network for Audio-Visual Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7505-7516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284463"}

@article{bb289832,
        AUTHOR = "Liu, C. and Li, P. and Zhang, H. and Li, L.C. and Huang, Z. and Wang, D.D. and Yu, X.",
        TITLE = "BAVS: Bootstrapping Audio-Visual Segmentation by Integrating
Foundation Knowledge",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "10015-10028",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284464"}

@inproceedings{bb289833,
        AUTHOR = "Liu, C. and Li, P. and Yang, L.Y. and Wang, D.D. and Li, L.C. and Yu, X.",
        TITLE = "Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent
Alignment",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "28922-28931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284465"}

@inproceedings{bb289834,
        AUTHOR = "Liu, C. and Li, P.P. and Yu, Q. and Sheng, H.W. and Wang, D.D. and Li, L.C. and Yu, X.",
        TITLE = "Benchmarking Audio Visual Segmentation for Long-Untrimmed Videos",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "22712-22722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284466"}

@article{bb289835,
        AUTHOR = "Traa, J. and Smaragdis, P.",
        TITLE = "A Wrapped Kalman Filter for Azimuthal Speaker Tracking",
        JOURNAL = SPLetters,
        VOLUME = "20",
        YEAR = "2013",
        NUMBER = "12",
        PAGES = "1257-1260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284467"}

@article{bb289836,
        AUTHOR = "Li, Y. and Liu, H. and Yang, B.",
        TITLE = "STNet: Deep Audio-Visual Fusion Network for Robust Speaker Tracking",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1835-1847",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284468"}

@article{bb289837,
        AUTHOR = "Shi, Z.F. and Wu, Q.B. and Meng, F.M. and Xu, L.F. and Li, H.L.",
        TITLE = "Cross-Modal Cognitive Consensus Guided Audio-Visual Segmentation",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "209-223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284469"}

@article{bb289838,
        AUTHOR = "Senocak, A. and Ryu, H. and Kim, J. and Oh, T.H. and Pfister, H. and Chung, J.S.",
        TITLE = "Toward Interactive Sound Source Localization:
Better Align Sight and Sound!",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "7643-7659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284470"}

@article{bb289839,
        AUTHOR = "Jiang, Z.Y. and Chen, X. and Wang, S. and Qian, X.Y. and Li, H.Z.",
        TITLE = "TPEech: Target Speaker Extraction and Noise Suppression With
Historical Dialogue Text Cues",
        JOURNAL = SPLetters,
        VOLUME = "33",
        YEAR = "2026",
        PAGES = "351-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284471"}

@article{bb289840,
        AUTHOR = "Yang, W.H. and Wei, J.G. and Lu, W.H. and Song, X.Y. and Yue, X.",
        TITLE = "Listening for 'You': Enhancing Speech Image Retrieval via Target
Speaker Extraction",
        JOURNAL = SPLetters,
        VOLUME = "33",
        YEAR = "2026",
        PAGES = "201-205",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284472"}

@inproceedings{bb289841,
        AUTHOR = "Um, S.J. and Kim, D.J. and Lee, S. and Kim, J.U.",
        TITLE = "Object-aware Sound Source Localization via Audio-Visual Scene
Understanding",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8342-8351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284473"}

@inproceedings{bb289842,
        AUTHOR = "Kim, I.H. and Song, Y. and Park, J. and Kim, W.H. and Kwak, S.",
        TITLE = "Improving Sound Source Localization with Joint Slot Attention on
Image and Audio",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3121-3130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284474"}

@inproceedings{bb289843,
        AUTHOR = "Liu, C. and Yang, L.Y. and Li, P. and Wang, D.D. and Li, L. and Yu, X.",
        TITLE = "Dynamic Derivation and Elimination: Audio Visual Segmentation with
Enhanced Audio Semantics",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "3131-3141",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284475"}

@inproceedings{bb289844,
        AUTHOR = "Ryu, H. and Kim, S. and Chung, J.S. and Senocak, A.",
        TITLE = "Seeing Speech and Sound: Distinguishing and Locating Audio Sources in
Visual Scenes",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13540-13549",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284476"}

@inproceedings{bb289845,
        AUTHOR = "Wang, X.Z. and Cheng, F. and Bertasius, G.",
        TITLE = "LoCoNet: Long-Short Context Network for Active Speaker Detection",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18462-18472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284477"}

@inproceedings{bb289846,
        AUTHOR = "Huang, C. and Tian, Y.P. and Kumar, A. and Xu, C.L.",
        TITLE = "Egocentric Audio-Visual Object Localization",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22910-22921",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284478"}

@inproceedings{bb289847,
        AUTHOR = "Nugroho, M.A. and Woo, S. and Lee, S. and Kim, C.",
        TITLE = "Audio-Visual Glance Network for Efficient Video Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10116-10125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284479"}

@inproceedings{bb289848,
        AUTHOR = "Liu, Y. and Tan, Y. and Lan, H.Y.",
        TITLE = "Self-Supervised Contrastive Learning for Audio-Visual Action
Recognition",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1000-1004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284480"}

@inproceedings{bb289849,
        AUTHOR = "Mo, S.T. and Morgado, P.",
        TITLE = "Localizing Visual Sounds the Easy Way",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:218-234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284481"}

@inproceedings{bb289850,
        AUTHOR = "Xia, Y. and Zhao, Z.",
        TITLE = "Cross-modal Background Suppression for Audio-Visual Event
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19957-19966",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284482"}

@inproceedings{bb289851,
        AUTHOR = "Jiang, H. and Murdock, C. and Ithapu, V.K.",
        TITLE = "Egocentric Deep Multi-Channel Audio-Visual Active Speaker
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10534-10542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284483"}

@inproceedings{bb289852,
        AUTHOR = "Min, K. and Roy, S. and Tripathi, S. and Guha, T. and Majumdar, S.",
        TITLE = "Learning Long-Term Spatial-Temporal Graphs for Active Speaker Detection",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:371-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284484"}

@inproceedings{bb289853,
        AUTHOR = "Duan, B. and Tang, H. and Wang, W. and Zong, Z.L. and Yang, G.W. and Yan, Y.",
        TITLE = "Audio-Visual Event Localization via Recursive Fusion by Joint
Co-Attention",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "4012-4021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284485"}

@inproceedings{bb289854,
        AUTHOR = "Wu, Y. and Zhu, L.C. and Yan, Y. and Yang, Y.",
        TITLE = "Dual Attention Matching for Audio-Visual Event Localization",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "6291-6299",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284486"}

@inproceedings{bb289855,
        AUTHOR = "Majumder, S. and Al Halah, Z. and Grauman, K.",
        TITLE = "Move2Hear: Active Audio-Visual Source Separation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "275-285",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284487"}

@inproceedings{bb289856,
        AUTHOR = "Majumder, S. and Grauman, K.",
        TITLE = "Active Audio-Visual Separation of Dynamic Sound Sources",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIX:551-569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284488"}

@inproceedings{bb289857,
        AUTHOR = "Alcazar, J.L. and Heilbron, F.C. and Thabet, A.K. and Ghanem, B.",
        TITLE = "MAAS: Multi-modal Assignation for Active Speaker Detection",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "265-274",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284489"}

@inproceedings{bb289858,
        AUTHOR = "Kopuklu, O. and Taseska, M. and Rigoll, G.",
        TITLE = "How to Design a Three-Stage Architecture for Audio-Visual Active
Speaker Detection in the Wild",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1173-1183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284490"}

@inproceedings{bb289859,
        AUTHOR = "Wu, Y. and Yang, Y.",
        TITLE = "Exploring Heterogeneous Clues for Weakly-Supervised Audio-Visual
Video Parsing",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1326-1335",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284491"}

@inproceedings{bb289860,
        AUTHOR = "Liu, H. and Sun, Y.H. and Li, Y.D. and Yang, B.",
        TITLE = "3D Audio-Visual Speaker Tracking with A Novel Particle Filter",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7343-7348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284492"}

@inproceedings{bb289861,
        AUTHOR = "Liu, H. and Li, Y.D. and Yang, B.",
        TITLE = "3D Audio-Visual Speaker Tracking with A Two-Layer Particle Filter",
        BOOKTITLE = ICIP19,
        YEAR = "2019",
        PAGES = "1955-1959",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284493"}

@inproceedings{bb289862,
        AUTHOR = "He, G. and Liu, X. and Fan, F. and You, J.",
        TITLE = "Image2Audio: Facilitating Semi-supervised Audio Emotion Recognition
with Facial Expression Image",
        BOOKTITLE = VL3W20,
        YEAR = "2020",
        PAGES = "3978-3983",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284494"}

@inproceedings{bb289863,
        AUTHOR = "Le, N. and Heili, A. and Wu, D. and Odobez, J.M.",
        TITLE = "Temporally subsampled detection for accurate and efficient face
tracking and diarization",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "1792-1797",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284495"}

@inproceedings{bb289864,
        AUTHOR = "Saeed, A. and Al Hamadi, A. and Heuer, M.",
        TITLE = "Speaker Tracking Using Multi-modal Fusion Framework",
        BOOKTITLE = ICISP12,
        YEAR = "2012",
        PAGES = "539-546",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284496"}

@inproceedings{bb289865,
        AUTHOR = "Kelly, D. and Pitie, F. and Kokaram, A. and Boland, F.",
        TITLE = "A Comparative Error Analysis of Audio-Visual Source Localization",
        BOOKTITLE = M2SFA208,
        YEAR = "2008",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284497"}

@inproceedings{bb289866,
        AUTHOR = "Katsarakis, N. and Talantzis, F. and Pnevmatikakis, A. and Polymenakos, L.",
        TITLE = "The AIT 3D Audio / Visual Person Tracker for CLEAR 2007",
        BOOKTITLE = MTPH07,
        YEAR = "2007",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284498"}

@inproceedings{bb289867,
        AUTHOR = "Kushal, A. and Rahurkar, M. and Fei Fei, L. and Ponce, J. and Huang, T.",
        TITLE = "Audio-Visual Speaker Localization Using Graphical Models",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "I: 291-294",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284499"}

@inproceedings{bb289868,
        AUTHOR = "Tsuji, T. and Yamamoto, K. and Ishii, I.",
        TITLE = "Real-time Sound Source Localization Based on Audiovisual Frequency
Integration",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "IV: 322-325",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284500"}

@inproceedings{bb289869,
        AUTHOR = "Megherbi, N. and Ambellouis, S. and Colot, O. and Cabestaing, F.",
        TITLE = "Data Association in Multi-Target Tracking Using Belief Theory:
Handling Target Emergence and Disappearance Issue",
        BOOKTITLE = AVSBS05,
        YEAR = "2005",
        PAGES = "517-521",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284501"}

@inproceedings{bb289870,
        AUTHOR = "Megherbi, N. and Ambellouis, S. and Colot, O. and Cabestaing, F.",
        TITLE = "Joint audio-video people tracking using belief theory",
        BOOKTITLE = AVSBS05,
        YEAR = "2005",
        PAGES = "135-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284502"}

@inproceedings{bb289871,
        AUTHOR = "Li, X. and Sun, L. and Tao, L.M. and Xu, G.Y. and Jia, Y.",
        TITLE = "A Speaker Tracking Algorithm Based on Audio and Visual Information
Fusion Using Particle Filter",
        BOOKTITLE = ICIAR04,
        YEAR = "2004",
        PAGES = "II: 572-580",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284503"}

@inproceedings{bb289872,
        AUTHOR = "Lange, C. and Hermann, T. and Ritter, H.",
        TITLE = "Holistic Body Tracking for Gestural Interfaces",
        BOOKTITLE = GW03,
        YEAR = "2003",
        PAGES = "132-139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284504"}

@inproceedings{bb289873,
        AUTHOR = "Blake, A. and Gangnet, M. and Perez, P. and Vermaak, J.",
        TITLE = "Integrated tracking with vision and sound",
        BOOKTITLE = CIAP01,
        YEAR = "2001",
        PAGES = "354-357",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT284505"}

@article{bb289874,
        AUTHOR = "Mirhosseini, A.R. and Yan, H. and Lam, K.M.",
        TITLE = "Adaptive Deformable Model for Mouth Boundary Detection",
        JOURNAL = OptEng,
        VOLUME = "37",
        YEAR = "1998",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "869-875",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284506"}

@inproceedings{bb289875,
        AUTHOR = "Mirhosseini, A.R. and Lam, K.M. and Yan, H.",
        TITLE = "An adaptive deformable template for mouth boundary modeling",
        BOOKTITLE = CIAP97,
        YEAR = "1997",
        PAGES = "I: 559-566",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284507"}

@inproceedings{bb289876,
        AUTHOR = "Mirhosseini, A.R. and Yan, H. and Lam, K.M. and Chen, C.",
        TITLE = "A Hierarchical and Adaptive Deformable Model for
Mouth Boundary Detection",
        BOOKTITLE = ICIP97,
        YEAR = "1997",
        PAGES = "II: 756-759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284508"}

@article{bb289877,
        AUTHOR = "Li, C.H. and Yuen, P.C.",
        TITLE = "Regularized color clustering in medical image database",
        JOURNAL = MedImg,
        VOLUME = "19",
        YEAR = "2000",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1150-1155",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284509"}

@inproceedings{bb289878,
        AUTHOR = "Li, C.H. and Yuen, P.C.",
        TITLE = "Normalized sampling for color clustering in medical diagnosis",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "III: 819-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284510"}

@article{bb289879,
        AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.",
        TITLE = "Modelling and segmentation of lip area in face images",
        JOURNAL = VISP,
        VOLUME = "149",
        YEAR = "2002",
        NUMBER = "3",
        MONTH = "June",
        PAGES = "179-184",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284511"}

@inproceedings{bb289880,
        AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.",
        TITLE = "Spatial clustering of pixels in the mouth area of face images",
        BOOKTITLE = CIAP01,
        YEAR = "2001",
        PAGES = "36-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284512"}

@inproceedings{bb289881,
        AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.",
        TITLE = "Segmentation of Lip Pixels for Lip Tracker Initialisation",
        BOOKTITLE = ICIP01,
        YEAR = "2001",
        PAGES = "I: 50-53",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284513"}

@inproceedings{bb289882,
        AUTHOR = "Sadeghi, M.T. and Kittler, J.V. and Messer, K.",
        TITLE = "Real Time Segmentation of Lip Pixels for Lip Tracker Initialization",
        BOOKTITLE = CAIP01,
        YEAR = "2001",
        PAGES = "317 ff.",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284514"}

@article{bb289883,
        AUTHOR = "Lucey, S. and Sridharan, S. and Chandran, V.",
        TITLE = "Adaptive mouth segmentation using chromatic features",
        JOURNAL = PRL,
        VOLUME = "23",
        YEAR = "2002",
        NUMBER = "11",
        MONTH = "September",
        PAGES = "1293-1302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284515"}

@inproceedings{bb289884,
        AUTHOR = "Lucey, S. and Sridharan, S. and Chandran, V.",
        TITLE = "A Suitability Metric for Mouth Tracking Through Chromatic Segmentation",
        BOOKTITLE = ICIP01,
        YEAR = "2001",
        PAGES = "III: 258-261",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284516"}

@inproceedings{bb289885,
        AUTHOR = "Lucey, S. and Sridharan, S. and Chandran, V.",
        TITLE = "Initialised Eigenlip Estimator for Fast Lip Tracking Using Linear
Regression",
        BOOKTITLE = ICPR00,
        YEAR = "2000",
        PAGES = "Vol III: 178-181",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284517"}

@article{bb289886,
        AUTHOR = "Liew, A.W.C. and Leung, S.H. and Lau, W.H.",
        TITLE = "Lip contour extraction from color images using a deformable model",
        JOURNAL = PR,
        VOLUME = "35",
        YEAR = "2002",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2949-2962",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284518"}

@inproceedings{bb289887,
        AUTHOR = "Liew, A.W.C. and Leung, S.H. and Lau, W.H.",
        TITLE = "Lip Contour Extraction Using a Deformable Model",
        BOOKTITLE = ICIP00,
        YEAR = "2000",
        PAGES = "Vol II: 255-258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284519"}

@article{bb289888,
        AUTHOR = "Leung, S.H. and Wang, S.L. and Lau, W.H.",
        TITLE = "Lip Image Segmentation Using Fuzzy Clustering Incorporating an Elliptic
Shape Function",
        JOURNAL = IP,
        VOLUME = "13",
        YEAR = "2004",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "51-62",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284520"}

@article{bb289889,
        AUTHOR = "Wang, S.L. and Lau, W.H. and Leung, S.H.",
        TITLE = "Automatic lip contour extraction from color images",
        JOURNAL = PR,
        VOLUME = "37",
        YEAR = "2004",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2375-2387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284521"}

@inproceedings{bb289890,
        AUTHOR = "Mihara, I. and Yamauchi, Y. and Morishita, A. and Doi, M.",
        TITLE = "Image recognition method and apparatus",
        BOOKTITLE = US_Patent,
        YEAR = "2008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284522"}

@inproceedings{bb289891,
        AUTHOR = "Doi, M. and Morishita, A. and Yamauchi, Y. and Mihara, I.",
        TITLE = "Image recognition method and apparatus",
        BOOKTITLE = US_Patent,
        YEAR = "2008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284522"}

@inproceedings{bb289892,
        AUTHOR = "Mihara, I. and Doi, M.",
        TITLE = "Image recognition method and apparatus",
        BOOKTITLE = US_Patent,
        YEAR = "2003",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284522"}

@inproceedings{bb289893,
        AUTHOR = "Doi, M. and Morishita, A. and Yamauchi, Y. and Mihara, I.",
        TITLE = "Image recognition method and apparatus",
        BOOKTITLE = US_Patent,
        YEAR = "2004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284522"}

@article{bb289894,
        AUTHOR = "Eveno, N. and Caplier, A. and Coulon, P.Y.",
        TITLE = "Accurate and quasi-automatic lip tracking",
        JOURNAL = CirSysVideo,
        VOLUME = "14",
        YEAR = "2004",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "706-715",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284523"}

@inproceedings{bb289895,
        AUTHOR = "Eveno, N. and Caplier, A. and Coulon, P.Y.",
        TITLE = "Jumping snakes and parametric model for lip segmentation",
        BOOKTITLE = ICIP03,
        YEAR = "2003",
        PAGES = "II: 867-870",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284524"}

@inproceedings{bb289896,
        AUTHOR = "Bouvier, C. and Coulon, P.Y. and Maldague, X.",
        TITLE = "Unsupervised Lips Segmentation Based on ROI Optimisation and Parametric
Model",
        BOOKTITLE = ICIP07,
        YEAR = "2007",
        PAGES = "IV: 301-304",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284525"}

@article{bb289897,
        AUTHOR = "Aharon, M. and Kimmel, R.",
        TITLE = "Representation Analysis and Synthesis of Lip Images Using
Dimensionality Reduction",
        JOURNAL = IJCV,
        VOLUME = "67",
        YEAR = "2006",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "297-312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284526"}

@article{bb289898,
        AUTHOR = "Nguyen, D. and Halupka, D. and Aarabi, P. and Sheikholeslami, A.",
        TITLE = "Real-Time Face Detection and Lip Feature Extraction Using
Field-Programmable Gate Arrays",
        JOURNAL = SMC-B,
        VOLUME = "36",
        YEAR = "2006",
        NUMBER = "4",
        MONTH = "August",
        PAGES = "902-912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284527"}

@article{bb289899,
        AUTHOR = "Chang, W.Y. and Chen, C.S. and Jian, Y.D.",
        TITLE = "Visual Tracking in High-Dimensional State Space by Appearance-Guided
Particle Filtering",
        JOURNAL = IP,
        VOLUME = "17",
        YEAR = "2008",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1154-1167",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people917.html#TT284528"}

Last update:Apr 6, 2026 at 11:28:57