@article{bb367600, AUTHOR = "Fan, R. and Shankar, N.B. and Alwan, A.", TITLE = "UniEnc-CASSNAT: An Encoder-Only Non-Autoregressive ASR for Speech SSL Models", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "711-715", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361765"} @article{bb367601, AUTHOR = "Xing, B. and Tsang, I.W.", TITLE = "Co-Guiding for Multi-Intent Spoken Language Understanding", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "2965-2980", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361766"} @article{bb367602, AUTHOR = "Hwang, K. and Jung, I.H. and Lee, J.M.", TITLE = "An implementation of searchable video player", JOURNAL = IJCVR, VOLUME = "14", YEAR = "2024", NUMBER = "3", PAGES = "325-337", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361767"} @article{bb367603, AUTHOR = "Ma, Y.K. and Zhang, C. and Chen, Q. and Wang, W. and Ma, B.", TITLE = "Tuning Large Language Model for Speech Recognition With Mixed-Scale Re-Tokenization", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "1740-1744", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361768"} @article{bb367604, AUTHOR = "Mu, B.S. and Wan, X. and Zheng, N. and Zhou, H. and Xie, L.", TITLE = "MMGER: Multi-Modal and Multi-Granularity Generative Error Correction With LLM for Joint Accent and Speech Recognition", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "1940-1944", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361769"} @article{bb367605, AUTHOR = "Shi, Y. and Li, L. and Wang, D. and Han, J.Q.", TITLE = "Keyword Guided Target Speech Recognition", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "1945-1949", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361770"} @article{bb367606, AUTHOR = "Gao, X.X. and Li, Z.X. and Chen, Y.M. and Liu, C. and Li, H.Z.", TITLE = "Transferable Adversarial Attacks Against ASR", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "2200-2204", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361771"} @article{bb367607, AUTHOR = "Lu, H. and Cheng, G.F. and Yan, Y.H.", TITLE = "Conversational Short-Phrase Speaker Diarization via Self-Adjusting Speech Segmentation and Embedding Extraction", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "2340-2344", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361772"} @article{bb367608, AUTHOR = "Lee, C.W. and Lee, J.H. and Chang, J.H.", TITLE = "Language Model Personalization for Speech Recognition: A Clustered Federated Learning Approach With Adaptive Weight Average", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "2710-2714", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361773"} @article{bb367609, AUTHOR = "Xing, B. and Tsang, I.W.", TITLE = "HC2L: Hybrid and Cooperative Contrastive Learning for Cross-Lingual Spoken Language Understanding", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "8094-8105", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361774"} @article{bb367610, AUTHOR = "Chang, X. and Guo, P.C. and Fujita, Y. and Maekaku, T. and Watanabe, S.", TITLE = "MC-Whisper: Extending Speech Foundation Models to Multichannel Distant Speech Recognition", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "2850-2854", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361775"} @article{bb367611, AUTHOR = "Chen, Y.Q. and Niu, T. and Zhang, H. and Zhang, W. and Qu, D.", TITLE = "Meta-Prompt: Boosting Whisper's Performance in Low-Resource Speech Recognition", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "3039-3043", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361776"} @article{bb367612, AUTHOR = "Zheng, L. and Zhu, H. and Tian, S. and Zhao, Q.W. and Li, T.", TITLE = "Unsupervised Domain Adaptation on End-to-End Multi-Talker Overlapped Speech Recognition", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "3119-3123", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361777"} @article{bb367613, AUTHOR = "Lee, H. and Yoon, J.W. and Kim, S.S. and Kim, N.S.", TITLE = "Towards Maximum Likelihood Training for Transducer-Based Streaming Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "26-30", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361778"} @article{bb367614, AUTHOR = "Zhuang, X. and Qian, Y.K. and Wang, M.J.", TITLE = "Hypformer: A Fast Hypothesis-Driven Rescoring Speech Recognition Framework", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "471-475", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361779"} @article{bb367615, AUTHOR = "Akman, A. and Sun, Q. and Schuller, B.W.", TITLE = "Improving Audio Explanations Using Audio Language Models", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "741-745", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361780"} @article{bb367616, AUTHOR = "Liu, Y.P. and Yang, X. and Zhang, J.Y. and Xi, Y.L. and Qu, D.", TITLE = "TAML-Adapter: Enhancing Adapter Tuning Through Task-Agnostic Meta-Learning for Low-Resource Automatic Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "636-640", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361781"} @article{bb367617, AUTHOR = "Kumar, K.V. and Rao, R.R.", TITLE = "An approach for speaker diarisation using whale-anti coronavirus optimisation integrated deep fuzzy clustering", JOURNAL = IJCVR, VOLUME = "15", YEAR = "2025", NUMBER = "2", PAGES = "177-197", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361782"} @article{bb367618, AUTHOR = "Kim, T.Y. and Yang, J.F. and Park, E.", TITLE = "MSDLF-K: A Multimodal Feature Learning Approach for Sentiment Analysis in Korean Incorporating Text and Speech", JOURNAL = MultMed, VOLUME = "27", YEAR = "2025", PAGES = "1266-1276", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361783"} @article{bb367619, AUTHOR = "Zhang, W.J. and Xia, Z.H. and Ma, B. and Yan, D.", TITLE = "Paradoxical Role of Adversarial Attacks: Enabling Crosslinguistic Attacks and Information Hiding in Multilingual Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "1046-1050", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361784"} @article{bb367620, AUTHOR = "Jia, G. and He, D. and Zhou, X.", TITLE = "Low-Resource Speech Recognition of Radiotelephony Communications Based on Continuous Learning of In-Domain and Out-of-Domain Knowledge", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "1136-1140", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361785"} @article{bb367621, AUTHOR = "Nga, C.H. and Vu, D.Q. and Le, P.T. and Luong, H.H. and Wang, J.C.", TITLE = "MLSS: Mandarin English Code-Switching Speech Recognition via Mutual Learning-Based Semi-Supervised Method", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "1510-1514", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361786"} @article{bb367622, AUTHOR = "Peng, Z.H. and Chen, T.S. and Huang, S.P. and Hu, Y.Q.", TITLE = "Heterogeneous Correlation Aware Regularization for Sequential Confidence Calibration", JOURNAL = PAMI, VOLUME = "47", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "4597-4613", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361787"} @inproceedings{bb367623, AUTHOR = "Peng, Z.H. and Luo, Y. and Chen, T.S. and Xu, K. and Huang, S.P.", TITLE = "Perception and Semantic Aware Regularization for Sequential Confidence Calibration", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10658-10668", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361788"} @article{bb367624, AUTHOR = "Lee, M.H. and Mo, J.H. and Kang, J.H. and Son, J.Y. and Chang, J.H.", TITLE = "Bayesian Language Model Adaptation for Personalized Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "1620-1624", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361789"} @article{bb367625, AUTHOR = "Ghane, M. and Safari, M.S.", TITLE = "End-to-End Target Speaker Speech Recognition Using Context-Aware Attention Mechanisms for Challenging Enrollment Scenario", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "1940-1944", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361790"} @article{bb367626, AUTHOR = "Rouditchenko, A. and Thomas, S. and Kuehne, H. and Feris, R. and Glass, J.", TITLE = "mWhisper-Flamingo for Multilingual Audio-Visual Noise-Robust Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "2144-2148", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361791"} @article{bb367627, AUTHOR = "Ye, G. and Chen, Q.Q. and Kong, Z.Y. and Zhou, M.R. and Peng, Y.", TITLE = "Adaptive Multi-Granularity Information Exploration for EEG-Based Speech Recognition", JOURNAL = SPLetters, VOLUME = "32", YEAR = "2025", PAGES = "2987-2991", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361792"} @inproceedings{bb367628, AUTHOR = "Chen, K. and Gou, Y.H. and Huang, R. and Liu, Z. and Tan, D.X. and Xu, J. and Wang, C.W. and Zhu, Y. and Zeng, Y.H. and Yang, K. and Wang, D.D. and Xiang, K. and Li, H.Y. and Bai, H. and Han, J.H. and Li, X.H. and Jin, W. and Xie, N. and Zhang, Y. and Kwok, J.T. and Zhao, H.S. and Liang, X.D. and Yeung, D.Y. and Chen, X. and Li, Z.G. and Zhang, W. and Liu, Q. and Hong, L.Q. and Hou, L. and Xu, H.", TITLE = "EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "5455-5466", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361793"} @inproceedings{bb367629, AUTHOR = "Wang, J.Y. and Liu, Z. and Wu, X.Y.", TITLE = "LOCO-MAD: Long-range Context-enhanced Model Towards Plot-centric Movie Audio Description", BOOKTITLE = ACCV24, YEAR = "2024", PAGES = "V: 95-112", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361794"} @article{bb367630, AUTHOR = "Lin, J. and Ge, M. and Wang, W. and Li, H.Z. and Feng, M.L.", TITLE = "Selective HuBERT: Self-Supervised Pre-Training for Target Speaker in Clean and Mixture Speech", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "1014-1018", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361795"} @inproceedings{bb367631, AUTHOR = "Feng, Y. and Liu, Z.Y. and Luo, S. and Ling, Y. and Dong, S. and Wang, S.Y. and Ferry, B.", TITLE = "Noise-Free Audio Signal Processing in Noisy Environment: A Hardware and Algorithm Solution", BOOKTITLE = VAQuality24, YEAR = "2024", PAGES = "368-373", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361796"} @inproceedings{bb367632, AUTHOR = "Ng, H.W. and Guan, C.T.", TITLE = "Efficient Representation Learning for Inner Speech Domain Generalization", BOOKTITLE = CAIP23, YEAR = "2023", PAGES = "I:131-141", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361797"} @inproceedings{bb367633, AUTHOR = "Oneata, D. and Cucu, H.", TITLE = "Improving Multimodal Speech Recognition by Data Augmentation and Speech Representations", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4578-4587", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361798"} @inproceedings{bb367634, AUTHOR = "Tapia, L.S. and Gomez, A. and Esparza, M. and Jatla, V. and Pattichis, M. and Celedon Pattichis, S. and Lopez Leiva, C.", TITLE = "Bilingual Speech Recognition by Estimating Speaker Geometry from Video Data", BOOKTITLE = CAIP21, YEAR = "2021", PAGES = "I:79-89", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361799"} @inproceedings{bb367635, AUTHOR = "Qiao, F.C. and Peng, X.", TITLE = "Uncertainty-guided Model Generalization to Unseen Domains", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "6786-6796", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361800"} @inproceedings{bb367636, AUTHOR = "Ngantcha, P. and Amith, M. and Tao, C. and Roberts, K.", TITLE = "Patient-Provider Communication Training Models for Interactive Speech Devices", BOOKTITLE = DHM21, YEAR = "2021", PAGES = "I:250-268", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361801"} @inproceedings{bb367637, AUTHOR = "Wu, Y.C. and Liao, W.H.", TITLE = "Toward Text-independent Cross-lingual Speaker Recognition Using English-Mandarin-Taiwanese Dataset", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "8515-8522", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361802"} @inproceedings{bb367638, AUTHOR = "Chen, Y. and Ma, Y. and Ko, T. and Wang, J.P. and Li, Q.", TITLE = "MetaMix: Improved Meta-Learning with Interpolation-based Consistency Regularization", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "407-414", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361803"} @inproceedings{bb367639, AUTHOR = "Zhou, L.X. and Zhang, J.", TITLE = "From Bottom to Top: A Coordinated Feature Representation Method for Speech Recognition", BOOKTITLE = MMDLCA20, YEAR = "2020", PAGES = "396-403", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361804"} @inproceedings{bb367640, AUTHOR = "Zhao, J. and Parry, C.J. and dos Anjos, R. and Anslow, C. and Rhee, T.", TITLE = "Voice Interaction for Augmented Reality Navigation Interfaces with Natural Language Understanding", BOOKTITLE = IVCNZ20, YEAR = "2020", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361805"} @inproceedings{bb367641, AUTHOR = "ABAKARIM, F. and ABENAOU, A.", TITLE = "Amazigh isolated word speech recognition system using the Adaptive Orthogonal Transform Method.", BOOKTITLE = ISCV20, YEAR = "2020", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361806"} @inproceedings{bb367642, AUTHOR = "Perez, A.F. and Sanguineti, V. and Morerio, P. and Murino, V.", TITLE = "Audio-Visual Model Distillation Using Acoustic Images", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "2843-2852", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361807"} @inproceedings{bb367643, AUTHOR = "Tapu, R. and Mocanu, B. and Zaharia, T.", TITLE = "Dynamic Subtitles: A Multimodal Video Accessibility Enhancement Dedicated to Deaf and Hearing Impaired Users", BOOKTITLE = ACVR19, YEAR = "2019", PAGES = "2558-2566", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361808"} @inproceedings{bb367644, AUTHOR = "Roberto, A. and Saggese, A. and Vento, M.", TITLE = "A Challenging Voice Dataset for Robotic Applications in Noisy Environments", BOOKTITLE = CAIP19, YEAR = "2019", PAGES = "II:354-364", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361809"} @inproceedings{bb367645, AUTHOR = "Naszadi, K. and Oualil, Y. and Klakow, D.", TITLE = "Image-Sensitive Language Modeling for Automatic Speech Recognition", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:173-179", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361810"} @inproceedings{bb367646, AUTHOR = "Gauvain, J. and Lamel, L. and Le, V.B. and Despres, J. and Gauvain, J.L. and Messaoudi, A. and Vieru, B. and Ben Kheder, W.", TITLE = "Challenges in Audio Processing of Terrorist-Related Data", BOOKTITLE = "MMMod19", YEAR = "2019", PAGES = "II:80-92", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361811"} @inproceedings{bb367647, AUTHOR = "Jorrin, J. and Buera, L.", TITLE = "DANTE Speaker Recognition Module. An Efficient and Robust Automatic Speaker Searching Solution for Terrorism-Related Scenarios", BOOKTITLE = "MMMod19", YEAR = "2019", PAGES = "I:704-715", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361812"} @inproceedings{bb367648, AUTHOR = "Galanopoulos, D. and Mezaris, V.", TITLE = "Temporal Lecture Video Fragmentation Using Word Embeddings", BOOKTITLE = "MMMod19", YEAR = "2019", PAGES = "II:254-265", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361813"} @inproceedings{bb367649, AUTHOR = "Mukherjee, H. and Obaidullah, S.M. and Phadikar, S. and Roy, K.", TITLE = "A Dravidian Language Identification System", BOOKTITLE = ICPR18, YEAR = "2018", PAGES = "2654-2657", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361814"} @inproceedings{bb367650, AUTHOR = "Galiotou, E. and Karanikolas, N. and Ralli, A.", TITLE = "Preservation and Management of Greek Dialectal Data", BOOKTITLE = EuroMed18, YEAR = "2018", PAGES = "I:752-761", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361815"} @inproceedings{bb367651, AUTHOR = "Li, R. and Yu, J.", TITLE = "Multimodal 3D visible articulation system for syllable based Mandarin Chinese training", BOOKTITLE = VCIP17, YEAR = "2017", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361816"} @inproceedings{bb367652, AUTHOR = "Le, N. and Odobez, J.M.", TITLE = "Improving Speaker Turn Embedding by Crossmodal Transfer Learning from Face Embedding", BOOKTITLE = CVAVM17, YEAR = "2017", PAGES = "428-437", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361817"} @inproceedings{bb367653, AUTHOR = "Arandjelovic, R. and Zisserman, A.", TITLE = "Look, Listen and Learn", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "609-617", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361818"} @inproceedings{bb367654, AUTHOR = "Muniandy, T. and Alvar, T.A. and Boon, C.J.", TITLE = "Mandarin Language Learning System for Nasal Voice User", BOOKTITLE = IVIC17, YEAR = "2017", PAGES = "376-388", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361819"} @inproceedings{bb367655, AUTHOR = "Madhavi, M.C. and Patil, H.A. and Bhendawade, N.", TITLE = "Spoken Keyword Retrieval Using Source and System Features", BOOKTITLE = PReMI17, YEAR = "2017", PAGES = "333-341", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361820"} @inproceedings{bb367656, AUTHOR = "Kacprzak, S.", TITLE = "Spoken language clustering in the i-vectors space", BOOKTITLE = WSSIP17, YEAR = "2017", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361821"} @inproceedings{bb367657, AUTHOR = "Pironkov, G. and Dupont, S. and Dutoit, T.", TITLE = "Speaker-aware Multi-Task Learning for automatic speech recognition", BOOKTITLE = ICPR16, YEAR = "2016", PAGES = "2900-2905", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361822"} @inproceedings{bb367658, AUTHOR = "Zhao, Y. and Zhao, R. and Wang, X.Y. and Ji, Q.", TITLE = "Multilingual articulatory features augmentation learning", BOOKTITLE = ICPR16, YEAR = "2016", PAGES = "2895-2899", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361823"} @inproceedings{bb367659, AUTHOR = "Ogawa, T. and Mallidi, S.H. and Dupoux, E. and Cohen, J. and Feldman, N.H. and Hermansky, H.", TITLE = "A new efficient measure for accuracy prediction and its application to multistream-based unsupervised adaptation", BOOKTITLE = ICPR16, YEAR = "2016", PAGES = "2222-2227", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361824"} @inproceedings{bb367660, AUTHOR = "Mzah, Y. and Ahfir, M. and Jaidane, M.", TITLE = "Late pre-dereverberation for speech intelligibility enhancement in public address systems", BOOKTITLE = ISIVC16, YEAR = "2016", PAGES = "291-296", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361825"} @inproceedings{bb367661, AUTHOR = "Montalvo, A. and Calvo, J.R.", TITLE = "Discriminative Capacity and Phonetic Information of Bottleneck Features in Speech", BOOKTITLE = CIARP16, YEAR = "2016", PAGES = "134-141", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361826"} @inproceedings{bb367662, AUTHOR = "Ondas, S. and Juhar, J.", TITLE = "Towards human-machine dialog in Slovak", BOOKTITLE = WSSIP16, YEAR = "2016", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361827"} @inproceedings{bb367663, AUTHOR = "Calvo, M. and Hurtado, L.F. and Garcia, F. and Sanchis, E.", TITLE = "Combining Several ASR Outputs in a Graph-Based SLU System", BOOKTITLE = CIARP15, YEAR = "2015", PAGES = "551-558", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361828"} @inproceedings{bb367664, AUTHOR = "Rohrbach, A. and Rohrbach, M. and Schiele, B.", TITLE = "The Long-Short Story of Movie Description", BOOKTITLE = GCPR15, YEAR = "2015", PAGES = "209-221", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361829"} @inproceedings{bb367665, AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tandon, N. and Schiele, B.", TITLE = "A dataset for Movie Description", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "3202-3212", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361830"} @inproceedings{bb367666, AUTHOR = "Zhao, H.Q. and Qin, Z.C. and Wang, Y. and Wang, Y.X.", TITLE = "A Bag-of-phonemes Model for Homeplace Classification of Mandarin Speakers", BOOKTITLE = IbPRIA15, YEAR = "2015", PAGES = "683-690", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361831"} @inproceedings{bb367667, AUTHOR = "Yakubu, M.A. and Maddage, N.C. and Atrey, P.K.", TITLE = "Audio Secret Management Scheme Using Shamir's Secret Sharing", BOOKTITLE = MMMod15, YEAR = "2015", PAGES = "I: 396-407", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361832"} @inproceedings{bb367668, AUTHOR = "Bello, C. and Ribas, D. and Calvo, J.R. and Ferrer, C.A.", TITLE = "From Speech Quality Measures to Speaker Recognition Performance", BOOKTITLE = CIARP14, YEAR = "2014", PAGES = "199-206", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361833"} @inproceedings{bb367669, AUTHOR = "Oropeza Rodriguez, J.L. and Suarez Guerra, S. and Jimenez Hernandez, M.", TITLE = "The Place Theory as an Alternative Solution in Automatic Speech Recognition Tasks", BOOKTITLE = CIARP14, YEAR = "2014", PAGES = "167-174", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361834"} @article{bb367670, AUTHOR = "Diez, M. and Varona, A. and Penagarikano, M. and Rodriguez Fuentes, L.J. and Bordel, G.", TITLE = "On the Projection of PLLRs for Unbounded Feature Distributions in Spoken Language Recognition", JOURNAL = SPLetters, VOLUME = "21", YEAR = "2014", NUMBER = "9", MONTH = "September", PAGES = "1073-1077", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361835"} @inproceedings{bb367671, AUTHOR = "Diez, M. and Varona, A. and Penagarikano, M. and Rodriguez Fuentes, L.J. and Bordel, G.", TITLE = "Optimizing PLLR Features for Spoken Language Recognition", BOOKTITLE = ICPR14, YEAR = "2014", PAGES = "779-784", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361836"} @inproceedings{bb367672, AUTHOR = "Missaoui, I. and Lachiri, Z.", TITLE = "Gabor Filterbank Features for Robust Speech Recognition", BOOKTITLE = ICISP14, YEAR = "2014", PAGES = "665-671", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361837"} @inproceedings{bb367673, AUTHOR = "Carletti, V. and Foggia, P. and Percannella, G. and Saggese, A. and Strisciuglio, N. and Vento, M.", TITLE = "Audio surveillance using a bag of aural words classifier", BOOKTITLE = AVSS13, YEAR = "2013", PAGES = "81-86", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361838"} @inproceedings{bb367674, AUTHOR = "Hurtado, L.F. and Calvo, M. and Gomez, J.A. and Garcia, F. and Sanchis, E.", TITLE = "A Phonetic-Based Approach to Query-by-Example Spoken Term Detection", BOOKTITLE = CIARP13, YEAR = "2013", PAGES = "I:504-511", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361839"} @inproceedings{bb367675, AUTHOR = "Chaloupka, J. and Nouza, J. and Kucharova, M.", TITLE = "Using Various Types of Multimedia Resources to Train System for Automatic Transcription of Czech Historical Oral Archives", BOOKTITLE = MM4CH13, YEAR = "2013", PAGES = "228-237", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361840"} @inproceedings{bb367676, AUTHOR = "Nouza, J. and Cerva, P. and Silovsky, J.", TITLE = "Dealing with Bilingualism in Automatic Transcription of Historical Archive of Czech Radio", BOOKTITLE = MM4CH13, YEAR = "2013", PAGES = "238-246", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361841"} @inproceedings{bb367677, AUTHOR = "Chan, K.Y. and Nordholm, S.E. and Yiu, C.K.F.", TITLE = "Multichannel filters for speech recognition using a particle swarm optimization", BOOKTITLE = ICARCV12, YEAR = "2012", PAGES = "937-942", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361842"} @inproceedings{bb367678, AUTHOR = "Zhao, Y. and Xu, X.N. and Yang, G.S.", TITLE = "Unsupervised Tibetan speech features Learning based on Dynamic Bayesian Networks", BOOKTITLE = ICPR12, YEAR = "2012", PAGES = "2319-2322", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361843"} @inproceedings{bb367679, AUTHOR = "Nour Eddine, L. and Abdelkader, A.", TITLE = "Reduced Universal Background Model for Speech Recognition and Identification System", BOOKTITLE = MCPR12, YEAR = "2012", PAGES = "303-312", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361844"} @inproceedings{bb367680, AUTHOR = "Amrous, A.I. and Debyeche, M.", TITLE = "Robust Arabic Multi-stream Speech Recognition System in Noisy Environment", BOOKTITLE = ICISP12, YEAR = "2012", PAGES = "571-578", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361845"} @inproceedings{bb367681, AUTHOR = "Touazi, A. and Debyeche, M.", TITLE = "New Encoding Algorithm for Distributed Speech Recognition Based on DTFS Transform", BOOKTITLE = ICISP12, YEAR = "2012", PAGES = "547-554", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361846"} @inproceedings{bb367682, AUTHOR = "Ghigi, F. and Tamarit, V. and Martinez Hinarejos, C.D. and Benedi, J.M.", TITLE = "Active Learning for Dialogue Act Labelling", BOOKTITLE = IbPRIA11, YEAR = "2011", PAGES = "652-659", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361847"} @inproceedings{bb367683, AUTHOR = "Meng, L. and Xiang, J. and Zhao, D. and Zhao, H.", TITLE = "A New Application of MEG and DTI on Word Recognition", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "2472-2475", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361848"} @inproceedings{bb367684, AUTHOR = "O'Gorman, L.", TITLE = "Latency in Speech Feature Analysis for Telepresence Event Coding", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4464-4467", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361849"} @inproceedings{bb367685, AUTHOR = "Zhang, S.L. and Shi, Q. and Qin, Y.", TITLE = "Modeling Syllable-Based Pronunciation Variation for Accented Mandarin Speech Recognition", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "1606-1609", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361850"} @inproceedings{bb367686, AUTHOR = "Zhang, S.L. and Zhang, S.W. and Xu, B.", TITLE = "A Two-level Method for Unsupervised Speaker-based Audio Segmentation", BOOKTITLE = ICPR06, YEAR = "2006", PAGES = "IV: 298-301", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361851"} @inproceedings{bb367687, AUTHOR = "Krajewski, J. and Batliner, A. and Kessel, S.", TITLE = "Comparing Multiple Classifiers for Speech-Based Detection of Self-Confidence: A Pilot Study", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "3716-3719", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361852"} @inproceedings{bb367688, AUTHOR = "Nolazco Flores, J.A. and Aceves L., R.A. and Garcia Perera, L.P.", TITLE = "Speech Magnitude-Spectrum Information-Entropy (MSIE) for Automatic Speech Recognition in Noisy Environments", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4364-4367", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361853"} @inproceedings{bb367689, AUTHOR = "Kelly, F. and Harte, N.", TITLE = "Auditory Features Revisited for Robust Speech Recognition", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4456-4459", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361854"} @inproceedings{bb367690, AUTHOR = "Xie, Z.Q. and Miao, Z.J.", TITLE = "Tone Recognition of Isolated Mandarin Syllables", BOOKTITLE = ICISP10, YEAR = "2010", PAGES = "412-418", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361855"} @inproceedings{bb367691, AUTHOR = "Alotaibi, Y.A. and Alghamdi, M. and Alotaiby, F.", TITLE = "Speech Recognition System of Arabic Alphabet Based on a Telephony Arabic Corpus", BOOKTITLE = ICISP10, YEAR = "2010", PAGES = "122-129", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361856"} @inproceedings{bb367692, AUTHOR = "Lu, G. and Yu, H.Z. and Li, Y.H. and Zhang, R.S.", TITLE = "Study on SAMPA_ST for Lhasa Tibetan and realization of automatic labelling system", BOOKTITLE = IASP10, YEAR = "2010", PAGES = "133-137", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361857"} @inproceedings{bb367693, AUTHOR = "Chen, X.Y. and Jin, H.M. and Yu, H.Z.", TITLE = "Acoustic research on long and short vowels in Tibetan Lhasa dialect", BOOKTITLE = IASP10, YEAR = "2010", PAGES = "561-564", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361858"} @inproceedings{bb367694, AUTHOR = "Sahu, V.P. and Mishra, H.K. and Sekhar, C.C.", TITLE = "Variational Bayes Adapted GMM Based Models for Audio Clip Classification", BOOKTITLE = PReMI09, YEAR = "2009", PAGES = "513-518", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361859"} @inproceedings{bb367695, AUTHOR = "Verteletskaya, E. and Sakhnov, K. and Simak, B.", TITLE = "Pitch Detection Algorithms and Voiced/Unvoiced Classification for Noisy Speech", BOOKTITLE = WSSIP09, YEAR = "2009", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361860"} @inproceedings{bb367696, AUTHOR = "Vlaj, D. and Kos, M. and Grasic, M. and Kacic, Z.", TITLE = "Influence of Hangover and Hangbefore Criteria on Automatic Speech Recognition", BOOKTITLE = WSSIP09, YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361861"} @inproceedings{bb367697, AUTHOR = "Hanzl, V. and Pollak, P.", TITLE = "Accuracy Analysis of Generalized Pronunciation Variant Selection in ASR Systems", BOOKTITLE = COST08, YEAR = "2008", PAGES = "399-408", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361862"} @inproceedings{bb367698, AUTHOR = "Camarena Ibarrola, A. and Chavez, E. and Tellez, E.S.", TITLE = "Robust Radio Broadcast Monitoring Using a Multi-Band Spectral Entropy Signature", BOOKTITLE = CIARP09, YEAR = "2009", PAGES = "587-594", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361863"} @inproceedings{bb367699, AUTHOR = "Mantilla Caeiros, A. and Miyatake, M.N. and Perez Meana, H.", TITLE = "Isolate Speech Recognition Based on Time-Frequency Analysis Methods", BOOKTITLE = CIARP09, YEAR = "2009", PAGES = "297-304", BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT361864"}