@inproceedings{bb335700, AUTHOR = "Zhang, L. and Li, H.F. and Ma, L.", TITLE = "An adaptive unsupervised clustering of pronunciation errors for automatic pronunciation error detection", BOOKTITLE = ICPR12, YEAR = "2012", PAGES = "1521-1525", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT329998"} @inproceedings{bb335701, AUTHOR = "Rosales Perez, A. and Reyes Garcia, C.A. and Gonzalez, J.A. and Arch Tirado, E.", TITLE = "Infant Cry Classification Using Genetic Selection of a Fuzzy Model", BOOKTITLE = CIARP12, YEAR = "2012", PAGES = "212-219", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT329999"} @inproceedings{bb335702, AUTHOR = "Gonzalez, D.C. and Ling, L.L. and Violaro, F.", TITLE = "Analysis of the Multifractal Nature of Speech Signals", BOOKTITLE = CIARP12, YEAR = "2012", PAGES = "740-748", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330000"} @inproceedings{bb335703, AUTHOR = "Tanveer, S. and Muhammad, A. and Martinez Enriquez, A.M. and Escalada Imaz, G.", TITLE = "Phonetic Unification of Multiple Accents for Spanish and Arabic Languages", BOOKTITLE = MCPR12, YEAR = "2012", PAGES = "323-333", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330001"} @inproceedings{bb335704, AUTHOR = "Falek, L. and Teffahi, H. and Djeradi, A.", TITLE = "Methodology for Acoustic Characterization of a Labial Constraint in Speech Production", BOOKTITLE = ICISP12, YEAR = "2012", PAGES = "131-141", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330002"} @inproceedings{bb335705, AUTHOR = "Krum, D.M. and Suma, E.A. and Bolas, M.", TITLE = "Spatial misregistration of virtual human audio: Implications of the precedence effect", BOOKTITLE = "3DUI12", YEAR = "2012", PAGES = "147-148", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330003"} @inproceedings{bb335706, AUTHOR = "Yang, Y.J. and Zhang, H.H. and Guo, X.", TITLE = "A pitch tracking method mixing ACF and AMDF algorithms based on correlations", BOOKTITLE = IASP11, YEAR = "2011", PAGES = "553-556", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330004"} @inproceedings{bb335707, AUTHOR = "Guo, S. and Gao, L. and Yu, H.Z.", TITLE = "Research on Lhasa Tibetan prosodic model of journalese based on respiratory signal", BOOKTITLE = IASP11, YEAR = "2011", PAGES = "26-30", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330005"} @inproceedings{bb335708, AUTHOR = "Resmi, K. and Kumar, S. and Sardana, H.K. and Chhabra, R.", TITLE = "Graphical Speech Training system for hearing impaired", BOOKTITLE = ICIIP11, YEAR = "2011", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330006"} @inproceedings{bb335709, AUTHOR = "Gomez, J.A. and Calvo, M.", TITLE = "Improvements on Automatic Speech Segmentation at the Phonetic Level", BOOKTITLE = CIARP11, YEAR = "2011", PAGES = "557-564", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330007"} @inproceedings{bb335710, AUTHOR = "Le, P.N. and Epps, J. and Choi, E.H.C. and Ambikairajah, E.", TITLE = "A Study of Voice Source and Vocal Tract Filter Based Features in Cognitive Load Classification", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4516-4519", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330008"} @inproceedings{bb335711, AUTHOR = "Stark, M. and Wohlmayr, M. and Pernkopf, F.", TITLE = "Single Channel Speech Separation Using Source-Filter Representation", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "826-829", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330009"} @inproceedings{bb335712, AUTHOR = "Stadelmann, T. and Wang, Y.H. and Smith, M. and Ewerth, R. and Freisleben, B.", TITLE = "Rethinking Algorithm Design and Development in Speech Processing", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4476-4479", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330010"} @inproceedings{bb335713, AUTHOR = "Gonzalez Caravaca, G. and Toledano, D.T. and Puertas, M.", TITLE = "Phone-Conditioned Suboptimal Wiener Filtering", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "4480-4483", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330011"} @inproceedings{bb335714, AUTHOR = "Sepehr, H. and Nooralahiyan, A.Y. and Brennan, P.V.", TITLE = "Improving Performance of a Noise Reduction Algorithm by Switching the Analysis Filter Bank", BOOKTITLE = ICISP10, YEAR = "2010", PAGES = "262-271", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330012"} @inproceedings{bb335715, AUTHOR = "Kos, M. and Grasic, M. and Vlaj, D. and Kacic, Z.", TITLE = "On-Line Speech/Music Segmentation for Broadcast News Domain", BOOKTITLE = WSSIP09, YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330013"} @inproceedings{bb335716, AUTHOR = "Grasic, M. and Kos, M. and Vlaj, D. and Kacic, Z.", TITLE = "The Influence of Speech/Non-Speech Segmentation on On-Line and Off-Line Speaker Segmentation Accuracy", BOOKTITLE = WSSIP09, YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330014"} @inproceedings{bb335717, AUTHOR = "Zuta, V.", TITLE = "Voice Pleasantness of Female Voices and the Assessment of Physical Characteristics", BOOKTITLE = COST08, YEAR = "2008", PAGES = "116-125", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330015"} @inproceedings{bb335718, AUTHOR = "Pignotti, A. and Marcozzi, D. and Cifani, S. and Squartini, S. and Piazza, F.", TITLE = "A Blind Source Separation Based Approach for Speech Enhancement in Noisy and Reverberant Environment", BOOKTITLE = COST08, YEAR = "2008", PAGES = "356-367", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330016"} @inproceedings{bb335719, AUTHOR = "Stadelmann, T. and Heinzl, S. and Unterberger, M. and Freisleben, B.", TITLE = "WebVoice: A Toolkit for Perceptual Insights into Speech Processing", BOOKTITLE = CISP09, YEAR = "2009", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330017"} @inproceedings{bb335720, AUTHOR = "Tang, Y.B. and Huang, R. and Wu, Z.Y.", TITLE = "A 2.4kbps Multiband Characteristic Waveform Interpolation Speech Coding Algorithm", BOOKTITLE = CISP09, YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330018"} @inproceedings{bb335721, AUTHOR = "Zou, X. and Zhang, X.W.", TITLE = "A 450bps Speech Coding Algorithm Based on Multi-Mode Matrix Quantization", BOOKTITLE = CISP09, YEAR = "2009", PAGES = "1-3", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330019"} @inproceedings{bb335722, AUTHOR = "Kuhnapfel, T. and Tan, T. and Venkatesh, S. and Igel, B.", TITLE = "Distributed Audio Network for Speech Enhancement in Challenging Noise Backgrounds", BOOKTITLE = AVSBS09, YEAR = "2009", PAGES = "308-313", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330020"} @inproceedings{bb335723, AUTHOR = "Kuhnapfel, T. and Tan, T. and Venkatesh, S. and Nordholm, S.E. and Igel, B.", TITLE = "Adaptive speech enhancement with varying noise backgrounds", BOOKTITLE = ICPR08, YEAR = "2008", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330021"} @inproceedings{bb335724, AUTHOR = "Li, X.K. and Deng, Y.", TITLE = "Combining speech energy and edge information for fast and efficient voice activity detection in noisy environments", BOOKTITLE = ICPR08, YEAR = "2008", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330022"} @inproceedings{bb335725, AUTHOR = "Kukharchik, P. and Kheidorov, I. and Bovbel, E. and Ladeev, D.", TITLE = "Speech Signal Processing Based on Wavelets and SVM for Vocal Tract Pathology Detection", BOOKTITLE = ICISP08, YEAR = "2008", PAGES = "192-199", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330023"} @inproceedings{bb335726, AUTHOR = "Nagesha and Kumar, G.H.", TITLE = "Signal Resampling Technique Combining Level Crossing and Auditory Features", BOOKTITLE = PReMI07, YEAR = "2007", PAGES = "447-454", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330024"} @inproceedings{bb335727, AUTHOR = "Varallyay, G.", TITLE = "SSM: A Novel Method to Recognize the Fundamental Frequency in Voice Signals", BOOKTITLE = CIARP07, YEAR = "2007", PAGES = "88-95", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330025"} @inproceedings{bb335728, AUTHOR = "Ferrer, C.A. and Gonzalez, E. and Hernandez Diaz, M.E.", TITLE = "Evaluation of Time and Frequency Domain-Based Methods for the Estimation of Harmonics-to-Noise-Ratios in Voice Signals", BOOKTITLE = CIARP06, YEAR = "2006", PAGES = "406-415", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330026"} @inproceedings{bb335729, AUTHOR = "Li, W.H. and Liu, M. and Zhu, Z.G. and Huang, T.S.", TITLE = "LDV Remote Voice Acquisition and Enhancement", BOOKTITLE = ICPR06, YEAR = "2006", PAGES = "IV: 262-265", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330027"} @inproceedings{bb335730, AUTHOR = "Xue, W. and Du, S. and Fang, C.Z. and Ye, Y.X.", TITLE = "Voice Activity Detection Using Wavelet-Based Multiresolution Spectrum and Support Vector Machines and Audio Mixing Algorithm", BOOKTITLE = CVHCI06, YEAR = "2006", PAGES = "78-88", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330028"} @inproceedings{bb335731, AUTHOR = "Garcia Perera, L.P. and Nolazco Flores, J.A. and Mex Perera, C.", TITLE = "Cryptographic-Speech-Key Generation Architecture Improvements", BOOKTITLE = IbPRIA05, YEAR = "2005", PAGES = "II:579", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330029"} @inproceedings{bb335732, AUTHOR = "Welk, M. and Bergmeister, A. and Weickert, J.", TITLE = "Denoising of Audio Data by Nonlinear Diffusion", BOOKTITLE = ScaleSpace05, YEAR = "2005", PAGES = "598-609", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330030"} @inproceedings{bb335733, AUTHOR = "Cristani, M. and Bicego, M. and Murino, V.", TITLE = "On-line adaptive background modelling for audio surveillance", BOOKTITLE = ICPR04, YEAR = "2004", PAGES = "II: 399-402", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330031"} @inproceedings{bb335734, AUTHOR = "Lefevre, S. and Maillard, B. and Vincent, N.", TITLE = "A two level classifier process for audio segmentation", BOOKTITLE = ICPR02, YEAR = "2002", PAGES = "III: 891-894", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330032"} @inproceedings{bb335735, AUTHOR = "de Stefano, C. and Della Cioppa, A. and Marcelli, A.", TITLE = "An investigation on MPEG audio segmentation by evolutionary algorithms", BOOKTITLE = ICDAR01, YEAR = "2001", PAGES = "952-956", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330033"} @inproceedings{bb335736, AUTHOR = "Edmonds, E.A. and Pan, L.Y. and O'Brien, S.M.", TITLE = "Automatic feature extraction from spectrograms for acoustic-phonetic analysis", BOOKTITLE = ICPR92, YEAR = "1992", PAGES = "II:701-704", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024sa1.html#TT330034"} @article{bb335737, AUTHOR = "Yeh, C.Y. and Hwang, S.H.", TITLE = "Efficient text analyser with prosody generator-driven approach for Mandarin text-to-speech", JOURNAL = VISP, VOLUME = "152", YEAR = "2005", NUMBER = "6", MONTH = "December", PAGES = "793-799", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330035"} @article{bb335738, AUTHOR = "Chouireb, F. and Guerti, M.", TITLE = "Towards a high quality Arabic speech synthesis system based on neural networks and residual excited vocal tract model", JOURNAL = SIViP, VOLUME = "2", YEAR = "2008", NUMBER = "1", MONTH = "January", PAGES = "73-87", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330036"} @article{bb335739, AUTHOR = "Elfitri, I. and Gunel, B. and Kondoz, A.M.", TITLE = "Multichannel Audio Coding Based on Analysis by Synthesis", JOURNAL = PIEEE, VOLUME = "99", YEAR = "2011", NUMBER = "4", MONTH = "April", PAGES = "657-670", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330037"} @article{bb335740, AUTHOR = "Jung, C.S. and Joo, Y.S. and Kang, H.G.", TITLE = "Waveform Interpolation-Based Speech Analysis/Synthesis for HMM-Based TTS Systems", JOURNAL = SPLetters, VOLUME = "19", YEAR = "2012", NUMBER = "12", MONTH = "December", PAGES = "809-812", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330038"} @article{bb335741, AUTHOR = "Carmona, J.L. and Barker, J. and Gomez, A.M. and Ma, N.", TITLE = "Speech Spectral Envelope Enhancement by HMM-Based Analysis/Resynthesis", JOURNAL = SPLetters, VOLUME = "20", YEAR = "2013", NUMBER = "6", PAGES = "563-566", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330039"} @article{bb335742, AUTHOR = "Tokuda, K. and Nankaku, Y. and Toda, T. and Zen, H. and Yamagishi, J. and Oura, K.", TITLE = "Speech Synthesis Based on Hidden Markov Models", JOURNAL = PIEEE, VOLUME = "100", YEAR = "2013", NUMBER = "5", MONTH = "May", PAGES = "1234-1252", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330040"} @article{bb335743, AUTHOR = "Ling, Z. and Kang, S. and Zen, H. and Senior, A. and Schuster, M. and Qian, X. and Meng, H. and Deng, L.", TITLE = "Deep Learning for Acoustic Modeling in Parametric Speech Generation: A systematic review of existing techniques and future trends", JOURNAL = SPMag, VOLUME = "32", YEAR = "2015", NUMBER = "3", MONTH = "May", PAGES = "35-52", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330041"} @article{bb335744, AUTHOR = "Bordel, G. and Penagarikano, M. and Rodriguez Fuentes, L.J. and Alvarez, A. and Varona, A.", TITLE = "Probabilistic Kernels for Improved Text-to-Speech Alignment in Long Audio Tracks", JOURNAL = SPLetters, VOLUME = "23", YEAR = "2016", NUMBER = "1", MONTH = "January", PAGES = "126-129", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330042"} @article{bb335745, AUTHOR = "Ninh, D.K. and Yamashita, Y.", TITLE = "F0 Parameterization of Glottalized Tones in HMM-Based Speech Synthesis for Hanoi Vietnamese", JOURNAL = IEICE, VOLUME = "E98-D", YEAR = "2015", NUMBER = "12", MONTH = "December", PAGES = "2280-2289", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330043"} @article{bb335746, AUTHOR = "Erro, D.", TITLE = "Two-Band Radial Postfiltering in Cepstral Domain with Application to Speech Synthesis", JOURNAL = SPLetters, VOLUME = "23", YEAR = "2016", NUMBER = "2", MONTH = "February", PAGES = "202-206", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330044"} @article{bb335747, AUTHOR = "Hu, Y.J. and Ling, Z.H.", TITLE = "DBN-based Spectral Feature Representation for Statistical Parametric Speech Synthesis", JOURNAL = SPLetters, VOLUME = "23", YEAR = "2016", NUMBER = "3", MONTH = "March", PAGES = "321-325", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330045"} @article{bb335748, AUTHOR = "Tsiaras, V. and Maia, R. and Diakoloukas, V. and Stylianou, Y. and Digalakis, V.", TITLE = "Global Variance in Speech Synthesis With Linear Dynamical Models", JOURNAL = SPLetters, VOLUME = "23", YEAR = "2016", NUMBER = "8", MONTH = "August", PAGES = "1057-1061", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330046"} @article{bb335749, AUTHOR = "Wang, F.Z. and Nagano, H. and Kashino, K. and Igarashi, T.", TITLE = "Visualizing Video Sounds With Sound Word Animation to Enrich User Experience", JOURNAL = MultMed, VOLUME = "19", YEAR = "2017", NUMBER = "2", MONTH = "February", PAGES = "418-429", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330047"} @article{bb335750, AUTHOR = "Sharma, B. and Prasanna, S.R.M.", TITLE = "Enhancement of Spectral Tilt in Synthesized Speech", JOURNAL = SPLetters, VOLUME = "24", YEAR = "2017", NUMBER = "4", MONTH = "April", PAGES = "382-386", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330048"} @article{bb335751, AUTHOR = "Singh, R. and Jimenez, A. and Oland, A.", TITLE = "Voice disguise by mimicry: deriving statistical articulometric evidence to evaluate claimed impersonation", JOURNAL = IET-Bio, VOLUME = "6", YEAR = "2017", NUMBER = "4", MONTH = "July", PAGES = "282-289", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330049"} @article{bb335752, AUTHOR = "Lee, K.S.", TITLE = "Restricted Boltzmann Machine-Based Voice Conversion for Nonparallel Corpus", JOURNAL = SPLetters, VOLUME = "24", YEAR = "2017", NUMBER = "8", MONTH = "August", PAGES = "1103-1107", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330050"} @article{bb335753, AUTHOR = "Reddy, M.K. and Rao, K.S.", TITLE = "Robust Pitch Extraction Method for the HMM-Based Speech Synthesis System", JOURNAL = SPLetters, VOLUME = "24", YEAR = "2017", NUMBER = "8", MONTH = "August", PAGES = "1133-1137", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330051"} @article{bb335754, AUTHOR = "Liu, Z.C. and Ling, Z.H. and Dai, L.R.", TITLE = "Statistical Parametric Speech Synthesis Using Generalized Distillation Framework", JOURNAL = SPLetters, VOLUME = "25", YEAR = "2018", NUMBER = "5", MONTH = "May", PAGES = "695-699", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330052"} @article{bb335755, AUTHOR = "Drugman, T. and Huybrechts, G. and Klimkov, V. and Moinet, A.", TITLE = "Traditional Machine Learning for Pitch Detection", JOURNAL = SPLetters, VOLUME = "25", YEAR = "2018", NUMBER = "11", MONTH = "November", PAGES = "1745-1749", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330053"} @article{bb335756, AUTHOR = "Arik, S.O. and Jun, H. and Diamos, G.", TITLE = "Fast Spectrogram Inversion Using Multi-Head Convolutional Neural Networks", JOURNAL = SPLetters, VOLUME = "26", YEAR = "2019", NUMBER = "1", MONTH = "January", PAGES = "94-98", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330054"} @article{bb335757, AUTHOR = "Masuyama, Y. and Yatabe, K. and Oikawa, Y.", TITLE = "Griffin-Lim Like Phase Recovery via Alternating Direction Method of Multipliers", JOURNAL = SPLetters, VOLUME = "26", YEAR = "2019", NUMBER = "1", MONTH = "January", PAGES = "184-188", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330055"} @article{bb335758, AUTHOR = "Kwon, O. and Jang, I. and Ahn, C. and Kang, H.", TITLE = "An Effective Style Token Weight Control Technique for End-to-End Emotional Speech Synthesis", JOURNAL = SPLetters, VOLUME = "26", YEAR = "2019", NUMBER = "9", MONTH = "September", PAGES = "1383-1387", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330056"} @article{bb335759, AUTHOR = "Liu, Q. and Jackson, P.J.B. and Wang, W.", TITLE = "A Speech Synthesis Approach for High Quality Speech Separation and Generation", JOURNAL = SPLetters, VOLUME = "26", YEAR = "2019", NUMBER = "12", MONTH = "December", PAGES = "1872-1876", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330057"} @article{bb335760, AUTHOR = "Cotescu, M. and Drugman, T. and Huybrechts, G. and Lorenzo Trueba, J. and Moinet, A.", TITLE = "Voice Conversion for Whispered Speech Synthesis", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "186-190", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330058"} @article{bb335761, AUTHOR = "Aylett, M.P. and Vinciarelli, A. and Wester, M.", TITLE = "Speech Synthesis for the Generation of Artificial Personality", JOURNAL = AffCom, VOLUME = "11", YEAR = "2020", NUMBER = "2", MONTH = "April", PAGES = "361-372", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330059"} @article{bb335762, AUTHOR = "Rao, M.V.A. and Ghosh, P.K.", TITLE = "SFNet: A Computationally Efficient Source Filter Model Based Neural Speech Synthesis", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "1170-1174", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330060"} @article{bb335763, AUTHOR = "Zhou, Y. and Tian, X. and Li, H.", TITLE = "Multi-Task WaveRNN With an Integrated Architecture for Cross-Lingual Voice Conversion", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "1310-1314", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330061"} @article{bb335764, AUTHOR = "Yang, J.C. and Lin, P. and He, Q.H.", TITLE = "Constant-Q magnitude-phase coefficients extraction for synthetic speech detection", JOURNAL = IET-Bio, VOLUME = "9", YEAR = "2020", NUMBER = "5", MONTH = "September", PAGES = "216-221", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330062"} @article{bb335765, AUTHOR = "Liu, R. and Sisman, B. and Bao, F. and Gao, G. and Li, H.", TITLE = "Modeling Prosodic Phrasing With Multi-Task Learning in Tacotron-Based TTS", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "1470-1474", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330063"} @article{bb335766, AUTHOR = "Qi, J. and Du, J. and Siniscalchi, S.M. and Ma, X. and Lee, C.", TITLE = "On Mean Absolute Error for Deep Neural Network Based Vector-to-Vector Regression", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "1485-1489", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330064"} @article{bb335767, AUTHOR = "Yang, S. and Wang, Y. and Xie, L.", TITLE = "Adversarial Feature Learning and Unsupervised Clustering Based Speech Synthesis for Found Data With Acoustic and Textual Noise", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "1730-1734", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330065"} @article{bb335768, AUTHOR = "Lee, J.Y. and Cheon, S.J. and Choi, B.J. and Kim, N.S.", TITLE = "Memory Attention: Robust Alignment Using Gating Mechanism for End-to-End Speech Synthesis", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "2004-2008", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330066"} @article{bb335769, AUTHOR = "Zhang, Y. and Jiang, F. and Duan, Z.Y.", TITLE = "One-Class Learning Towards Synthetic Voice Spoofing Detection", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "937-941", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330067"} @article{bb335770, AUTHOR = "Saeki, T. and Takamichi, S. and Saruwatari, H.", TITLE = "Incremental Text-to-Speech Synthesis Using Pseudo Lookahead With Large Pretrained Language Model", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "857-861", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330068"} @article{bb335771, AUTHOR = "Comanducci, L. and Bestagini, P. and Tagliasacchi, M. and Sarti, A. and Tubaro, S.", TITLE = "Reconstructing Speech From CNN Embeddings", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "952-956", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330069"} @article{bb335772, AUTHOR = "Hua, G. and Teoh, A.B.J. and Zhang, H.J.", TITLE = "Towards End-to-End Synthetic Speech Detection", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1265-1269", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330070"} @article{bb335773, AUTHOR = "Cheon, S.J. and Choi, B.J. and Kim, M. and Lee, H. and Kim, N.S.", TITLE = "A Controllable Multi-Lingual Multi-Speaker Multi-Style Text-to-Speech Synthesis With Multivariate Information Minimization", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "55-59", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330071"} @article{bb335774, AUTHOR = "Bilbao, S.", TITLE = "3D Interpolation in Wave-Based Acoustic Simulation", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "384-388", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330072"} @article{bb335775, AUTHOR = "Saleem, N. and Gao, J. and Irfan, M. and Verdu, E. and Fuente, J.P.", TITLE = "E2E-V2SResNet: Deep residual convolutional neural networks for end-to-end video driven speech synthesis", JOURNAL = IVC, VOLUME = "119", YEAR = "2022", PAGES = "104389", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330073"} @article{bb335776, AUTHOR = "Sun, X. and Li, J.Y. and Tao, J.H.", TITLE = "Emotional Conversation Generation Orientated Syntactically Constrained Bidirectional-Asynchronous Framework", JOURNAL = AffCom, VOLUME = "13", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "187-198", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330074"} @article{bb335777, AUTHOR = "Liu, S.G. and Li, S. and Cheng, H.", TITLE = "Towards an End-to-End Visual-to-Raw-Audio Generation With GAN", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "3", MONTH = "March", PAGES = "1299-1312", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330075"} @article{bb335778, AUTHOR = "Li, C.T. and Yang, F. and Yang, J.", TITLE = "The Role of Long-Term Dependency in Synthetic Speech Detection", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "1142-1146", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330076"} @article{bb335779, AUTHOR = "Cui, S.S. and Huang, B.Y. and Huang, J.W. and Kang, X.G.", TITLE = "Synthetic Speech Detection Based on Local Autoregression and Variance Statistics", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "1462-1466", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330077"} @article{bb335780, AUTHOR = "Lei, Y. and Yang, S. and Zhu, X.F. and Xie, L. and Su, D.", TITLE = "Cross-Speaker Emotion Transfer Through Information Perturbation in Emotional Speech Synthesis", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "1948-1952", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330078"} @article{bb335781, AUTHOR = "Choi, B.J. and Jeong, M. and Lee, J.Y. and Kim, N.S.", TITLE = "SNAC: Speaker-Normalized Affine Coupling Layer in Flow-Based Architecture for Zero-Shot Multi-Speaker Text-to-Speech", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "2502-2506", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330079"} @article{bb335782, AUTHOR = "Choi, B.J. and Jeong, M. and Kim, M. and Kim, N.S.", TITLE = "Variable-Length Speaker Conditioning in Flow-Based Text-to-Speech", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "899-903", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330080"} @article{bb335783, AUTHOR = "Chen, L.C. and Chen, P.H. and Tsai, R.T.H. and Tsao, Y.", TITLE = "EPG2S: Speech Generation and Speech Enhancement Based on Electropalatography and Audio Signals Using Multimodal Learning", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "2582-2586", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330081"} @article{bb335784, AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.", TITLE = "Emotion Intensity and its Control for Emotional Voice Conversion", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "31-48", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330082"} @article{bb335785, AUTHOR = "Huang, B. and Cui, S. and Huang, J.W. and Kang, X.", TITLE = "Discriminative Frequency Information Learning for End-to-End Speech Anti-Spoofing", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "185-189", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330083"} @article{bb335786, AUTHOR = "Zhao, W. and Wang, Z. and Xu, L.", TITLE = "Mandarin Text-to-Speech Front-End With Lightweight Distilled Convolution Network", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "249-253", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330084"} @article{bb335787, AUTHOR = "Ma, K. and Feng, Y. and Chen, B. and Zhao, G.Y.", TITLE = "End-to-End Dual-Branch Network Towards Synthetic Speech Detection", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "359-363", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330085"} @article{bb335788, AUTHOR = "Mira, R. and Vougioukas, K. and Ma, P.C. and Petridis, S. and Schuller, B.W. and Pantic, M.", TITLE = "End-to-End Video-to-Speech Synthesis Using Generative Adversarial Networks", JOURNAL = Cyber, VOLUME = "53", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "3454-3466", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330086"} @article{bb335789, AUTHOR = "Yoon, H. and Kim, C. and Um, S. and Yoon, H.W. and Kang, H.G.", TITLE = "SC-CNN: Effective Speaker Conditioning Method for Zero-Shot Multi-Speaker Text-to-Speech Systems", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "593-597", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330087"} @inproceedings{bb335790, AUTHOR = "Gu, Y.W. and Zhao, X.F. and Yi, X.W. and Xiao, J.C.", TITLE = "Voice Conversion Using Learnable Similarity-guided Masked Autoencoder", BOOKTITLE = IWDW22, YEAR = "2022", PAGES = "53-67", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330088"} @article{bb335791, AUTHOR = "Zhang, M.Y. and Zhou, X. and Wu, Z.Z. and Li, H.Z.", TITLE = "Towards Zero-Shot Multi-Speaker Multi-Accent Text-to-Speech Synthesis", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "947-951", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330089"} @article{bb335792, AUTHOR = "Ly, E. and Villegas, J.", TITLE = "Cartesian Genetic Programming Parameterization in the Context of Audio Synthesis", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1077-1081", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330090"} @article{bb335793, AUTHOR = "Mingote, V. and Gimeno, P. and Vicente, L. and Khurana, S. and Laurent, A. and Duret, J.", TITLE = "Direct Text to Speech Translation System Using Acoustic Units", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1262-1266", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330091"} @article{bb335794, AUTHOR = "Wang, Z.C. and Chen, Y.Z. and Xie, L. and Tian, Q. and Wang, Y.P.", TITLE = "LM-VC: Zero-Shot Voice Conversion via Speech Generation Based on Language Models", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1157-1161", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330092"} @article{bb335795, AUTHOR = "van Niekerk, B. and Carbonneau, M.A. and Kamper, H.", TITLE = "Rhythm Modeling for Voice Conversion", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1297-1301", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330093"} @article{bb335796, AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.", TITLE = "Speech Synthesis With Mixed Emotions", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "4", MONTH = "October", PAGES = "3120-3134", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330094"} @article{bb335797, AUTHOR = "Liu, Y. and Wei, L.F. and Qian, X.Y. and Zhang, T.H. and Chen, S.L. and Yin, X.C.", TITLE = "M3TTS: Multi-modal text-to-speech of multi-scale style control for dubbing", JOURNAL = PRL, VOLUME = "179", YEAR = "2024", PAGES = "158-164", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330095"} @article{bb335798, AUTHOR = "Jeong, M. and Kim, M. and Lee, J.Y. and Kim, N.S.", TITLE = "Efficient Parallel Audio Generation Using Group Masked Language Modeling", JOURNAL = SPLetters, VOLUME = "31", YEAR = "2024", PAGES = "979-983", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330096"} @article{bb335799, AUTHOR = "Yi, J.Y. and Wang, C.L. and Tao, J.H. and Zhang, C.Y. and Fan, C.H. and Tian, Z.K. and Ma, H.X. and Fu, R.", TITLE = "SceneFake: An initial dataset and benchmarks for scene fake audio detection", JOURNAL = PR, VOLUME = "152", YEAR = "2024", PAGES = "110468", BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT330097"}