@article{bb373800,
AUTHOR = "Shahnawazuddin, S. and Adiga, N. and Kathania, H.K. and Sai, B.T.",
TITLE = "Creating speaker independent ASR system through prosody modification
based data augmentation",
JOURNAL = PRL,
VOLUME = "131",
YEAR = "2020",
PAGES = "213-218",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367904"}
@article{bb373801,
AUTHOR = "Park, T.J. and Han, K.J. and Kumar, M. and Narayanan, S.",
TITLE = "Auto-Tuning Spectral Clustering for Speaker Diarization Using
Normalized Maximum Eigengap",
JOURNAL = SPLetters,
VOLUME = "27",
YEAR = "2020",
PAGES = "381-385",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367905"}
@article{bb373802,
AUTHOR = "Deb, S. and Dandapat, S. and Krajewski, J.",
TITLE = "Analysis and Classification of Cold Speech Using Variational Mode
Decomposition",
JOURNAL = AffCom,
VOLUME = "11",
YEAR = "2020",
NUMBER = "2",
MONTH = "April",
PAGES = "296-307",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367906"}
@article{bb373803,
AUTHOR = "Sanchez Junquera, J. and Villasenor Pineda, L. and Montes y Gomez, M. and Rosso, P. and Stamatatos, E.",
TITLE = "Masking domain-specific information for cross-domain deception
detection",
JOURNAL = PRL,
VOLUME = "135",
YEAR = "2020",
PAGES = "122-130",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367907"}
@inproceedings{bb373804,
AUTHOR = "Rill Garcia, R. and Villasenor Pineda, L. and Reyes Meza, V. and Escalante, H.J.",
TITLE = "From Text to Speech: A Multimodal Cross-Domain Approach for Deception
Detection",
BOOKTITLE = MIPPSNA18,
YEAR = "2018",
PAGES = "164-177",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367908"}
@article{bb373805,
AUTHOR = "Lim, H. and Kim, Y. and Kim, H.",
TITLE = "Cross-Informed Domain Adversarial Training for Noise-Robust Wake-Up
Word Detection",
JOURNAL = SPLetters,
VOLUME = "27",
YEAR = "2020",
PAGES = "1769-1773",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367909"}
@article{bb373806,
AUTHOR = "Zhao, L. and Zhang, A. and Liu, Y. and Fei, H.",
TITLE = "Encoding multi-granularity structural information for joint Chinese
word segmentation and POS tagging",
JOURNAL = PRL,
VOLUME = "138",
YEAR = "2020",
PAGES = "163-169",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367910"}
@article{bb373807,
AUTHOR = "Bang, J. and Han, S. and Lee, J.H.",
TITLE = "Listening-oriented response generation by exploiting user responses",
JOURNAL = PRL,
VOLUME = "140",
YEAR = "2020",
PAGES = "230-237",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367911"}
@article{bb373808,
AUTHOR = "Zhou, J.T.Y. and Zhang, H. and Jin, D. and Peng, X.",
TITLE = "Dual Adversarial Transfer for Sequence Labeling",
JOURNAL = PAMI,
VOLUME = "43",
YEAR = "2021",
NUMBER = "2",
MONTH = "February",
PAGES = "434-446",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367912"}
@article{bb373809,
AUTHOR = "Chen, N. and Watanabe, S. and Villalba, J. and Zelasko, P. and Dehak, N.",
TITLE = "Non-Autoregressive Transformer for Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "121-125",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367913"}
@article{bb373810,
AUTHOR = "Haeb Umbach, R. and Heymann, J. and Drude, L. and Watanabe, S. and Delcroix, M. and Nakatani, T.",
TITLE = "Far-Field Automatic Speech Recognition",
JOURNAL = PIEEE,
VOLUME = "109",
YEAR = "2021",
NUMBER = "2",
MONTH = "February",
PAGES = "124-148",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367914"}
@article{bb373811,
AUTHOR = "Fritsch, J. and Magimai Doss, M.",
TITLE = "Utterance Verification-Based Dysarthric Speech Intelligibility
Assessment Using Phonetic Posterior Features",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "224-228",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367915"}
@article{bb373812,
AUTHOR = "Lu, L. and Kanda, N. and Li, J.Y. and Gong, Y.F.",
TITLE = "Streaming End-to-End Multi-Talker Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "803-807",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367916"}
@article{bb373813,
AUTHOR = "Yi, C. and Zhou, S.Y. and Xu, B.",
TITLE = "Efficiently Fusing Pretrained Acoustic and Linguistic Encoders for
Low-Resource Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "788-792",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367917"}
@article{bb373814,
AUTHOR = "Xu, P. and Huang, Y. and Yuan, T. and Xiang, T. and Hospedales, T.M. and Song, Y.Z. and Wang, L.",
TITLE = "On Learning Semantic Representations for Large-Scale Abstract
Sketches",
JOURNAL = CirSysVideo,
VOLUME = "31",
YEAR = "2021",
NUMBER = "9",
MONTH = "September",
PAGES = "3366-3379",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367918"}
@article{bb373815,
AUTHOR = "Kim, J. and Lee, Y.",
TITLE = "Improving End-to-End Contextual Speech Recognition via a
Word-Matching Algorithm With Backward Search",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "2087-2091",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367919"}
@article{bb373816,
AUTHOR = "Zhu, S. and Zhang, Y. and He, K. and Zhao, L.",
TITLE = "Acoustic Word Embedding Based on Multi-Head Attention Quadruplet
Network",
JOURNAL = SPLetters,
VOLUME = "29",
YEAR = "2022",
PAGES = "184-188",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367920"}
@article{bb373817,
AUTHOR = "Tiwari, R. and Sharma, V. and Sahoo, R.C.",
TITLE = "Isolated spoken word recognition using packed-MFCC on padded-voice
signal for unscripted languages",
JOURNAL = IJCVR,
VOLUME = "12",
YEAR = "2022",
NUMBER = "2",
PAGES = "120-140",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367921"}
@article{bb373818,
AUTHOR = "Tian, Z.K. and Yi, J.Y. and Tao, J.H. and Zhang, S. and Wen, Z.Q.",
TITLE = "Hybrid Autoregressive and Non-Autoregressive Transformer Models for
Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "29",
YEAR = "2022",
PAGES = "762-766",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367922"}
@article{bb373819,
AUTHOR = "Xiao, F.Y. and Guan, J. and Lan, H.Y. and Zhu, Q. and Wang, W.W.",
TITLE = "Local Information Assisted Attention-Free Decoder for Audio
Captioning",
JOURNAL = SPLetters,
VOLUME = "29",
YEAR = "2022",
PAGES = "1604-1608",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367923"}
@article{bb373820,
AUTHOR = "Perochon, S.",
TITLE = "A Presentation and Short Discussion of rVAD-fast, a Fast Voice
Activity Detector",
JOURNAL = IPOL,
VOLUME = "12",
YEAR = "2022",
PAGES = "404-419",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367924"}
@article{bb373821,
AUTHOR = "Huang, H.J. and Huang, P.J. and Zhu, Z.B. and Li, J. and Lin, P.",
TITLE = "CLID: A Chunk-Level Intent Detection Framework for Multiple Intent
Spoken Language Understanding",
JOURNAL = SPLetters,
VOLUME = "29",
YEAR = "2022",
PAGES = "2123-2127",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367925"}
@article{bb373822,
AUTHOR = "Du, X. and Pun, C.M.",
TITLE = "Robust Audio Patch Attacks Using Physical Sample Simulation and
Adversarial Patch Noise Generation",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "4381-4393",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367926"}
@article{bb373823,
AUTHOR = "Kim, H. and Park, J. and Lee, J.W.",
TITLE = "Generating Transferable Adversarial Examples for Speech
Classification",
JOURNAL = PR,
VOLUME = "137",
YEAR = "2023",
PAGES = "109286",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367927"}
@article{bb373824,
AUTHOR = "Wei, G.Y. and Duan, Z.K. and Li, S. and Yu, X.M. and Yang, G.G.",
TITLE = "LFEformer: Local Feature Enhancement Using Sliding Window With
Deformability for Automatic Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "180-184",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367928"}
@article{bb373825,
AUTHOR = "Xiao, F.Y. and Guan, J. and Zhu, Q. and Wang, W.W.",
TITLE = "Graph Attention for Automated Audio Captioning",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "413-417",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367929"}
@article{bb373826,
AUTHOR = "Chang, C.M. and Lee, C.C.",
TITLE = "Learning Enhanced Acoustic Latent Representation for Small Scale
Affective Corpus with Adversarial Cross Corpora Integration",
JOURNAL = AffCom,
VOLUME = "14",
YEAR = "2023",
NUMBER = "2",
MONTH = "April",
PAGES = "1308-1321",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367930"}
@article{bb373827,
AUTHOR = "Qu, H.L. and Su, X.D. and Wang, Y. and Hao, X. and Gao, G.L.",
TITLE = "Noise-Separated Adaptive Feature Distillation for Robust Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "763-767",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367931"}
@article{bb373828,
AUTHOR = "Nga, C.H. and Vu, D.Q. and Luong, H.H. and Huang, C.L. and Wang, J.C.",
TITLE = "Cyclic Transfer Learning for Mandarin-English Code-Switching Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "1387-1391",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367932"}
@article{bb373829,
AUTHOR = "Dong, F. and Qian, Y.Y. and Wang, T.L. and Liu, P. and Cao, J.W.",
TITLE = "A Transformer-Based End-to-End Automatic Speech Recognition Algorithm",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "1592-1596",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367933"}
@article{bb373830,
AUTHOR = "Fan, P. and Shan, C.H. and Sun, S.N. and Yang, Q. and Zhang, J.W.",
TITLE = "Key Frame Mechanism for Efficient Conformer Based End-to-End Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "30",
YEAR = "2023",
PAGES = "1612-1616",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367934"}
@article{bb373831,
AUTHOR = "Mahmoudi, H. and Camboim, S. and Brovelli, M.A.",
TITLE = "Development of a Voice Virtual Assistant for the Geospatial Data
Visualization Application on the Web",
JOURNAL = IJGI,
VOLUME = "12",
YEAR = "2023",
NUMBER = "11",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367935"}
@article{bb373832,
AUTHOR = "Vitolo, P. and Liguori, R. and di Benedetto, L. and Rubino, A. and Licciardo, G.D.",
TITLE = "Automatic Audio Feature Extraction for Keyword Spotting",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "161-165",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367936"}
@article{bb373833,
AUTHOR = "Li, J.H. and Duan, Z.K. and Li, S. and Yu, X.M. and Yang, G.G.",
TITLE = "ESAformer: Enhanced Self-Attention for Automatic Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "471-475",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367937"}
@article{bb373834,
AUTHOR = "Nie, W.Z. and Bao, Y. and Zhao, Y. and Liu, A.",
TITLE = "Long Dialogue Emotion Detection Based on Commonsense Knowledge Graph
Guidance",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "514-528",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367938"}
@article{bb373835,
AUTHOR = "Sun, T.L. and Chen, H.N. and Hu, G.S. and He, L.H. and Zhao, C.R.",
TITLE = "Explainability of Speech Recognition Transformers via Gradient-Based
Attention Visualization",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "1395-1406",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367939"}
@article{bb373836,
AUTHOR = "Jacobs, C. and Kamper, H.",
TITLE = "Leveraging Multilingual Transfer for Unsupervised Semantic Acoustic
Word Embeddings",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "311-315",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367940"}
@article{bb373837,
AUTHOR = "Wang, F.Y. and Xu, B. and Xu, B.",
TITLE = "SSCFormer: Push the Limit of Chunk-Wise Conformer for Streaming ASR
Using Sequentially Sampled Chunks and Chunked Causal Convolution",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "421-425",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367941"}
@article{bb373838,
AUTHOR = "Fan, R. and Shankar, N.B. and Alwan, A.",
TITLE = "UniEnc-CASSNAT: An Encoder-Only Non-Autoregressive ASR for Speech SSL
Models",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "711-715",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367942"}
@article{bb373839,
AUTHOR = "Xing, B. and Tsang, I.W.",
TITLE = "Co-Guiding for Multi-Intent Spoken Language Understanding",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "2965-2980",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367943"}
@article{bb373840,
AUTHOR = "Hwang, K. and Jung, I.H. and Lee, J.M.",
TITLE = "An implementation of searchable video player",
JOURNAL = IJCVR,
VOLUME = "14",
YEAR = "2024",
NUMBER = "3",
PAGES = "325-337",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367944"}
@article{bb373841,
AUTHOR = "Ma, Y.K. and Zhang, C. and Chen, Q. and Wang, W. and Ma, B.",
TITLE = "Tuning Large Language Model for Speech Recognition With Mixed-Scale
Re-Tokenization",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1740-1744",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367945"}
@article{bb373842,
AUTHOR = "Mu, B.S. and Wan, X. and Zheng, N. and Zhou, H. and Xie, L.",
TITLE = "MMGER: Multi-Modal and Multi-Granularity Generative Error Correction
With LLM for Joint Accent and Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1940-1944",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367946"}
@article{bb373843,
AUTHOR = "Shi, Y. and Li, L. and Wang, D. and Han, J.Q.",
TITLE = "Keyword Guided Target Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1945-1949",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367947"}
@article{bb373844,
AUTHOR = "Gao, X.X. and Li, Z.X. and Chen, Y.M. and Liu, C. and Li, H.Z.",
TITLE = "Transferable Adversarial Attacks Against ASR",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "2200-2204",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367948"}
@article{bb373845,
AUTHOR = "Lu, H. and Cheng, G.F. and Yan, Y.H.",
TITLE = "Conversational Short-Phrase Speaker Diarization via Self-Adjusting
Speech Segmentation and Embedding Extraction",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "2340-2344",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367949"}
@article{bb373846,
AUTHOR = "Lee, C.W. and Lee, J.H. and Chang, J.H.",
TITLE = "Language Model Personalization for Speech Recognition: A Clustered
Federated Learning Approach With Adaptive Weight Average",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "2710-2714",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367950"}
@article{bb373847,
AUTHOR = "Xing, B. and Tsang, I.W.",
TITLE = "HC2L: Hybrid and Cooperative Contrastive Learning for Cross-Lingual
Spoken Language Understanding",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "8094-8105",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367951"}
@article{bb373848,
AUTHOR = "Chang, X. and Guo, P.C. and Fujita, Y. and Maekaku, T. and Watanabe, S.",
TITLE = "MC-Whisper: Extending Speech Foundation Models to Multichannel
Distant Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "2850-2854",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367952"}
@article{bb373849,
AUTHOR = "Chen, Y.Q. and Niu, T. and Zhang, H. and Zhang, W. and Qu, D.",
TITLE = "Meta-Prompt: Boosting Whisper's Performance in Low-Resource Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "3039-3043",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367953"}
@article{bb373850,
AUTHOR = "Zheng, L. and Zhu, H. and Tian, S. and Zhao, Q.W. and Li, T.",
TITLE = "Unsupervised Domain Adaptation on End-to-End Multi-Talker Overlapped
Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "3119-3123",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367954"}
@article{bb373851,
AUTHOR = "Lee, H. and Yoon, J.W. and Kim, S.S. and Kim, N.S.",
TITLE = "Towards Maximum Likelihood Training for Transducer-Based Streaming
Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "26-30",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367955"}
@article{bb373852,
AUTHOR = "Zhuang, X. and Qian, Y.K. and Wang, M.J.",
TITLE = "Hypformer: A Fast Hypothesis-Driven Rescoring Speech Recognition
Framework",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "471-475",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367956"}
@article{bb373853,
AUTHOR = "Akman, A. and Sun, Q. and Schuller, B.W.",
TITLE = "Improving Audio Explanations Using Audio Language Models",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "741-745",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367957"}
@article{bb373854,
AUTHOR = "Liu, Y.P. and Yang, X. and Zhang, J.Y. and Xi, Y.L. and Qu, D.",
TITLE = "TAML-Adapter: Enhancing Adapter Tuning Through Task-Agnostic
Meta-Learning for Low-Resource Automatic Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "636-640",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367958"}
@article{bb373855,
AUTHOR = "Kumar, K.V. and Rao, R.R.",
TITLE = "An approach for speaker diarisation using whale-anti coronavirus
optimisation integrated deep fuzzy clustering",
JOURNAL = IJCVR,
VOLUME = "15",
YEAR = "2025",
NUMBER = "2",
PAGES = "177-197",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367959"}
@article{bb373856,
AUTHOR = "Kim, T.Y. and Yang, J.F. and Park, E.",
TITLE = "MSDLF-K: A Multimodal Feature Learning Approach for Sentiment
Analysis in Korean Incorporating Text and Speech",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "1266-1276",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367960"}
@article{bb373857,
AUTHOR = "Zhang, W.J. and Xia, Z.H. and Ma, B. and Yan, D.",
TITLE = "Paradoxical Role of Adversarial Attacks: Enabling Crosslinguistic
Attacks and Information Hiding in Multilingual Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "1046-1050",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367961"}
@article{bb373858,
AUTHOR = "Jia, G. and He, D. and Zhou, X.",
TITLE = "Low-Resource Speech Recognition of Radiotelephony Communications
Based on Continuous Learning of In-Domain and Out-of-Domain Knowledge",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "1136-1140",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367962"}
@article{bb373859,
AUTHOR = "Nga, C.H. and Vu, D.Q. and Le, P.T. and Luong, H.H. and Wang, J.C.",
TITLE = "MLSS: Mandarin English Code-Switching Speech Recognition via Mutual
Learning-Based Semi-Supervised Method",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "1510-1514",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367963"}
@article{bb373860,
AUTHOR = "Peng, Z.H. and Chen, T.S. and Huang, S.P. and Hu, Y.Q.",
TITLE = "Heterogeneous Correlation Aware Regularization for Sequential
Confidence Calibration",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "4597-4613",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367964"}
@inproceedings{bb373861,
AUTHOR = "Peng, Z.H. and Luo, Y. and Chen, T.S. and Xu, K. and Huang, S.P.",
TITLE = "Perception and Semantic Aware Regularization for Sequential
Confidence Calibration",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "10658-10668",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367965"}
@article{bb373862,
AUTHOR = "Lee, M.H. and Mo, J.H. and Kang, J.H. and Son, J.Y. and Chang, J.H.",
TITLE = "Bayesian Language Model Adaptation for Personalized Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "1620-1624",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367966"}
@article{bb373863,
AUTHOR = "Ghane, M. and Safari, M.S.",
TITLE = "End-to-End Target Speaker Speech Recognition Using Context-Aware
Attention Mechanisms for Challenging Enrollment Scenario",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "1940-1944",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367967"}
@article{bb373864,
AUTHOR = "Rouditchenko, A. and Thomas, S. and Kuehne, H. and Feris, R. and Glass, J.",
TITLE = "mWhisper-Flamingo for Multilingual Audio-Visual Noise-Robust Speech
Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "2144-2148",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367968"}
@article{bb373865,
AUTHOR = "Ye, G. and Chen, Q.Q. and Kong, Z.Y. and Zhou, M.R. and Peng, Y.",
TITLE = "Adaptive Multi-Granularity Information Exploration for EEG-Based
Speech Recognition",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "2987-2991",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367969"}
@article{bb373866,
AUTHOR = "Kutum, S. and Sinha, A. and Kathania, H.K. and Kadiri, S.R. and Govil, M.C.",
TITLE = "Zero-shot KWS for children's speech using layer-wise features from
SSL models",
JOURNAL = PRL,
VOLUME = "197",
YEAR = "2025",
PAGES = "304-311",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367970"}
@article{bb373867,
AUTHOR = "Sinha, A. and Kathania, H.K. and Kadiri, S.R. and Narayanan, S.",
TITLE = "Can Layer-Wise SSL Features Improve Zero-Shot ASR Performance for
Children's Speech?",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "3759-3763",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367971"}
@article{bb373868,
AUTHOR = "Jayasinghe, H.M. and Wong, K.W. and Nugaliyadde, A.",
TITLE = "A systematic review of interpretability and explainability for speech
emotion features in automatic speech emotion recognition",
JOURNAL = PR,
VOLUME = "171",
YEAR = "2026",
PAGES = "112122",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367972"}
@article{bb373869,
AUTHOR = "Gu, Y. and Du, Z.H. and Shi, Y. and Han, J.Q. and He, Y.J.",
TITLE = "Knowledge-Decoupled Functionally Invariant Path With Synthetic
Personal Data for Personalized ASR",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "4024-4028",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367973"}
@article{bb373870,
AUTHOR = "Kim, J.Y. and Yacef, K.",
TITLE = "Encoding Affective Cues in Multimodal Textual Transcriptions",
JOURNAL = AffCom,
VOLUME = "16",
YEAR = "2025",
NUMBER = "4",
MONTH = "October",
PAGES = "3624-3632",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367974"}
@article{bb373871,
AUTHOR = "Yoon, S. and Kim, H. and Kim, K. and Lee, S.",
TITLE = "Comparative Analysis of Automatic Speech Recognition Fine-Tuning
Strategies for Speech From Cochlear Implant Users",
JOURNAL = SPLetters,
VOLUME = "33",
YEAR = "2026",
PAGES = "236-240",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367975"}
@inproceedings{bb373872,
AUTHOR = "Namomsa, G.B. and Gichamba, A. and Ebiyau, B. and Barros, J.",
TITLE = "Eyes and Ears: Automated Annotation of Audio Data Using Computer
Vision",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "2748-2753",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367976"}
@inproceedings{bb373873,
AUTHOR = "Tang, J. and Sarokin, R. and Ignasheva, E. and Jensen, G. and Chen, L. and Lee, J. and Kulik, A. and Grundmann, M.",
TITLE = "Scaling On-Device GPU Inference for Large Generative Models",
BOOKTITLE = EDGE25,
YEAR = "2025",
PAGES = "6345-6354",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367977"}
@inproceedings{bb373874,
AUTHOR = "Chen, K. and Gou, Y.H. and Huang, R. and Liu, Z. and Tan, D.X. and Xu, J. and Wang, C.W. and Zhu, Y. and Zeng, Y.H. and Yang, K. and Wang, D.D. and Xiang, K. and Li, H.Y. and Bai, H. and Han, J.H. and Li, X.H. and Jin, W. and Xie, N. and Zhang, Y. and Kwok, J.T. and Zhao, H.S. and Liang, X.D. and Yeung, D.Y. and Chen, X. and Li, Z.G. and Zhang, W. and Liu, Q. and Hong, L.Q. and Hou, L. and Xu, H.",
TITLE = "EMOVA: Empowering Language Models to See, Hear and Speak with Vivid
Emotions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "5455-5466",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367978"}
@inproceedings{bb373875,
AUTHOR = "Wang, J.Y. and Liu, Z. and Wu, X.Y.",
TITLE = "LOCO-MAD: Long-range Context-enhanced Model Towards Plot-centric Movie
Audio Description",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "V: 95-112",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367979"}
@article{bb373876,
AUTHOR = "Lin, J. and Ge, M. and Wang, W. and Li, H.Z. and Feng, M.L.",
TITLE = "Selective HuBERT: Self-Supervised Pre-Training for Target Speaker in
Clean and Mixture Speech",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1014-1018",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367980"}
@inproceedings{bb373877,
AUTHOR = "Feng, Y. and Liu, Z.Y. and Luo, S. and Ling, Y. and Dong, S. and Wang, S.Y. and Ferry, B.",
TITLE = "Noise-Free Audio Signal Processing in Noisy Environment:
A Hardware and Algorithm Solution",
BOOKTITLE = VAQuality24,
YEAR = "2024",
PAGES = "368-373",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367981"}
@inproceedings{bb373878,
AUTHOR = "Ng, H.W. and Guan, C.T.",
TITLE = "Efficient Representation Learning for Inner Speech Domain
Generalization",
BOOKTITLE = CAIP23,
YEAR = "2023",
PAGES = "I:131-141",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367982"}
@inproceedings{bb373879,
AUTHOR = "Oneata, D. and Cucu, H.",
TITLE = "Improving Multimodal Speech Recognition by Data Augmentation and
Speech Representations",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4578-4587",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367983"}
@inproceedings{bb373880,
AUTHOR = "Tapia, L.S. and Gomez, A. and Esparza, M. and Jatla, V. and Pattichis, M. and Celedon Pattichis, S. and Lopez Leiva, C.",
TITLE = "Bilingual Speech Recognition by Estimating Speaker Geometry from Video
Data",
BOOKTITLE = CAIP21,
YEAR = "2021",
PAGES = "I:79-89",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367984"}
@inproceedings{bb373881,
AUTHOR = "Qiao, F.C. and Peng, X.",
TITLE = "Uncertainty-guided Model Generalization to Unseen Domains",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "6786-6796",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367985"}
@inproceedings{bb373882,
AUTHOR = "Ngantcha, P. and Amith, M. and Tao, C. and Roberts, K.",
TITLE = "Patient-Provider Communication Training Models for Interactive Speech
Devices",
BOOKTITLE = DHM21,
YEAR = "2021",
PAGES = "I:250-268",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367986"}
@inproceedings{bb373883,
AUTHOR = "Wu, Y.C. and Liao, W.H.",
TITLE = "Toward Text-independent Cross-lingual Speaker Recognition Using
English-Mandarin-Taiwanese Dataset",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "8515-8522",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367987"}
@inproceedings{bb373884,
AUTHOR = "Chen, Y.B. and Ma, Y. and Ko, T. and Wang, J.P. and Li, Q.",
TITLE = "MetaMix: Improved Meta-Learning with Interpolation-based Consistency
Regularization",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "407-414",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367988"}
@inproceedings{bb373885,
AUTHOR = "Zhou, L.X. and Zhang, J.",
TITLE = "From Bottom to Top: A Coordinated Feature Representation Method for
Speech Recognition",
BOOKTITLE = MMDLCA20,
YEAR = "2020",
PAGES = "396-403",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367989"}
@inproceedings{bb373886,
AUTHOR = "Zhao, J. and Parry, C.J. and dos Anjos, R. and Anslow, C. and Rhee, T.",
TITLE = "Voice Interaction for Augmented Reality Navigation Interfaces with
Natural Language Understanding",
BOOKTITLE = IVCNZ20,
YEAR = "2020",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367990"}
@inproceedings{bb373887,
AUTHOR = "ABAKARIM, F. and ABENAOU, A.",
TITLE = "Amazigh isolated word speech recognition system using the Adaptive
Orthogonal Transform Method.",
BOOKTITLE = ISCV20,
YEAR = "2020",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367991"}
@inproceedings{bb373888,
AUTHOR = "Perez, A.F. and Sanguineti, V. and Morerio, P. and Murino, V.",
TITLE = "Audio-Visual Model Distillation Using Acoustic Images",
BOOKTITLE = WACV20,
YEAR = "2020",
PAGES = "2843-2852",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367992"}
@inproceedings{bb373889,
AUTHOR = "Tapu, R. and Mocanu, B. and Zaharia, T.",
TITLE = "Dynamic Subtitles: A Multimodal Video Accessibility Enhancement
Dedicated to Deaf and Hearing Impaired Users",
BOOKTITLE = ACVR19,
YEAR = "2019",
PAGES = "2558-2566",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367993"}
@inproceedings{bb373890,
AUTHOR = "Roberto, A. and Saggese, A. and Vento, M.",
TITLE = "A Challenging Voice Dataset for Robotic Applications in Noisy
Environments",
BOOKTITLE = CAIP19,
YEAR = "2019",
PAGES = "II:354-364",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367994"}
@inproceedings{bb373891,
AUTHOR = "Naszadi, K. and Oualil, Y. and Klakow, D.",
TITLE = "Image-Sensitive Language Modeling for Automatic Speech Recognition",
BOOKTITLE = VL18,
YEAR = "2018",
PAGES = "IV:173-179",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367995"}
@inproceedings{bb373892,
AUTHOR = "Gauvain, J. and Lamel, L. and Le, V.B. and Despres, J. and Gauvain, J.L. and Messaoudi, A. and Vieru, B. and Ben Kheder, W.",
TITLE = "Challenges in Audio Processing of Terrorist-Related Data",
BOOKTITLE = "MMMod19",
YEAR = "2019",
PAGES = "II:80-92",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367996"}
@inproceedings{bb373893,
AUTHOR = "Jorrin, J. and Buera, L.",
TITLE = "DANTE Speaker Recognition Module. An Efficient and Robust Automatic
Speaker Searching Solution for Terrorism-Related Scenarios",
BOOKTITLE = "MMMod19",
YEAR = "2019",
PAGES = "I:704-715",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367997"}
@inproceedings{bb373894,
AUTHOR = "Galanopoulos, D. and Mezaris, V.",
TITLE = "Temporal Lecture Video Fragmentation Using Word Embeddings",
BOOKTITLE = "MMMod19",
YEAR = "2019",
PAGES = "II:254-265",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367998"}
@inproceedings{bb373895,
AUTHOR = "Mukherjee, H. and Obaidullah, S.M. and Phadikar, S. and Roy, K.",
TITLE = "A Dravidian Language Identification System",
BOOKTITLE = ICPR18,
YEAR = "2018",
PAGES = "2654-2657",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT367999"}
@inproceedings{bb373896,
AUTHOR = "Galiotou, E. and Karanikolas, N. and Ralli, A.",
TITLE = "Preservation and Management of Greek Dialectal Data",
BOOKTITLE = EuroMed18,
YEAR = "2018",
PAGES = "I:752-761",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT368000"}
@inproceedings{bb373897,
AUTHOR = "Li, R. and Yu, J.",
TITLE = "Multimodal 3D visible articulation system for syllable based Mandarin
Chinese training",
BOOKTITLE = VCIP17,
YEAR = "2017",
PAGES = "1-4",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT368001"}
@inproceedings{bb373898,
AUTHOR = "Le, N. and Odobez, J.M.",
TITLE = "Improving Speaker Turn Embedding by Crossmodal Transfer Learning from
Face Embedding",
BOOKTITLE = CVAVM17,
YEAR = "2017",
PAGES = "428-437",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT368002"}
@inproceedings{bb373899,
AUTHOR = "Arandjelovic, R. and Zisserman, A.",
TITLE = "Look, Listen and Learn",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "609-617",
BIBSOURCE = "http://www.visionbib.com/bibliography/other1023.html#TT368003"}
Last update:Jan 8, 2026 at 12:52:16