@article{bb374500,
        AUTHOR = "Parisae, V. and Bhavanam, S.N.",
        TITLE = "Stacked U-Net with Time-Frequency Attention and Deep Connection Net for
Single Channel Speech Enhancement",
        JOURNAL = IJIG,
        VOLUME = "26",
        YEAR = "2026",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "2550067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368604"}

@article{bb374501,
        AUTHOR = "Wang, H. and Wang, C.L. and Wang, X.T. and Yu, L. and Jiang, Y.M.",
        TITLE = "MBTU-SE: A Speech Enhancement Network Integrates Enhanced Taylor
Multi-Branch Linear Transformer With U-Net Architecture",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "4309-4313",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368605"}

@article{bb374502,
        AUTHOR = "Pan, Y. and Yang, Y.G. and Yao, J. and Ma, L. and Zhao, J.J.",
        TITLE = "Zero-Shot Voice Conversion via Content-Aware Timbre Ensemble and
Conditional Flow Matching",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "4199-4203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368606"}

@inproceedings{bb374503,
        AUTHOR = "Wang, Q. and Song, X. and He, Y.H. and Han, J.Z. and Ding, C.H. and Gao, X.Y. and Gong, Y.H.",
        TITLE = "Boosting Domain Incremental Learning: Selecting the Optimal
Parameters is All You Need",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4839-4849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368607"}

@inproceedings{bb374504,
        AUTHOR = "Li, X.S. and Tan, Z.H. and Xia, Z.C. and Wu, D. and Zhang, B.",
        TITLE = "Single-Channel Speech Separation Focusing on Attention DE",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "3204-3209",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368608"}

@inproceedings{bb374505,
        AUTHOR = "Xu, X.M. and Hao, J.J.",
        TITLE = "U-Former: Improving Monaural Speech Enhancement with Multi-head Self
and Cross Attention",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "663-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368609"}

@inproceedings{bb374506,
        AUTHOR = "Li, D.S. and Zhao, L.X. and Xiao, J. and Liu, J.Q. and Guan, D.Z. and Wang, Q.R.",
        TITLE = "Adaptive Speech Intelligibility Enhancement for Far-and-Near-end Noise
Environments Based on Self-attention StarGAN",
        BOOKTITLE = MMMod22,
        YEAR = "2022",
        PAGES = "II:205-217",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368610"}

@inproceedings{bb374507,
        AUTHOR = "Xiao, J. and Liu, J.Q. and Li, D.S. and Zhao, L.X. and Wang, Q.R.",
        TITLE = "Speech Intelligibility Enhancement By Non-Parallel Speech Style
Conversion Using CWT and iMetricGAN Based CycleGAN",
        BOOKTITLE = MMMod22,
        YEAR = "2022",
        PAGES = "I:544-556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368611"}

@inproceedings{bb374508,
        AUTHOR = "Hegde, S.B. and Prajwal, K.R. and Mukhopadhyay, R. and Namboodiri, V. and Jawahar, C.V.",
        TITLE = "Visual Speech Enhancement Without A Real Visual Stream",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "1925-1934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368612"}

@inproceedings{bb374509,
        AUTHOR = "Sun, Z.B. and Wang, Y.N. and Cao, L.",
        TITLE = "An Attention Based Speaker-independent Audio-visual Deep Learning Model
for Speech Enhancement",
        BOOKTITLE = MMMod20,
        YEAR = "2020",
        PAGES = "II:722-728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368613"}

@inproceedings{bb374510,
        AUTHOR = "Wang, Y.",
        TITLE = "Research Progress in Speech Enhancement Technology",
        BOOKTITLE = CVIDL20,
        YEAR = "2020",
        PAGES = "222-226",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368614"}

@inproceedings{bb374511,
        AUTHOR = "Dendani, B. and Bahi, H. and Sari, T.",
        TITLE = "Speech Enhancement Based on Deep Autoencoder for Remote Arabic Speech
Recognition",
        BOOKTITLE = ICISP20,
        YEAR = "2020",
        PAGES = "221-229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368615"}

@inproceedings{bb374512,
        AUTHOR = "Coto Jimenez, M.",
        TITLE = "Experimental Study on Transfer Learning in Denoising Autoencoders for
Speech Enhancement",
        BOOKTITLE = MCPR20,
        YEAR = "2020",
        PAGES = "307-317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368616"}

@inproceedings{bb374513,
        AUTHOR = "Zhang, R. and Hu, R.M. and Li, G. and Wang, X.C.",
        TITLE = "Spectral Tilt Estimation for Speech Intelligibility Enhancement Using
RNN Based on All-Pole Model",
        BOOKTITLE = "MMMod19",
        YEAR = "2019",
        PAGES = "II:144-156",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368617"}

@inproceedings{bb374514,
        AUTHOR = "Samui, S. and Chakrabarti, I. and Ghosh, S.K.",
        TITLE = "Improving the Performance of Deep Learning Based Speech Enhancement
System Using Fuzzy Restricted Boltzmann Machine",
        BOOKTITLE = PReMI17,
        YEAR = "2017",
        PAGES = "534-542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368618"}

@inproceedings{bb374515,
        AUTHOR = "Pignotti, A. and Marcozzi, D. and Cifani, S. and Squartini, S. and Piazza, F.",
        TITLE = "A Blind Source Separation Based Approach for Speech Enhancement in
Noisy and Reverberant Environment",
        BOOKTITLE = COST08,
        YEAR = "2008",
        PAGES = "356-367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368619"}

@inproceedings{bb374516,
        AUTHOR = "Kuhnapfel, T. and Tan, T. and Venkatesh, S. and Igel, B.",
        TITLE = "Distributed Audio Network for Speech Enhancement in Challenging Noise
Backgrounds",
        BOOKTITLE = AVSBS09,
        YEAR = "2009",
        PAGES = "308-313",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368620"}

@inproceedings{bb374517,
        AUTHOR = "Kuhnapfel, T. and Tan, T. and Venkatesh, S. and Nordholm, S.E. and Igel, B.",
        TITLE = "Adaptive speech enhancement with varying noise backgrounds",
        BOOKTITLE = ICPR08,
        YEAR = "2008",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368621"}

@inproceedings{bb374518,
        AUTHOR = "Li, W.H. and Liu, M. and Zhu, Z.G. and Huang, T.S.",
        TITLE = "LDV Remote Voice Acquisition and Enhancement",
        BOOKTITLE = ICPR06,
        YEAR = "2006",
        PAGES = "IV: 262-265",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024spen2.html#TT368622"}

@article{bb374519,
        AUTHOR = "Yeh, C.Y. and Hwang, S.H.",
        TITLE = "Efficient text analyser with prosody generator-driven approach for
Mandarin text-to-speech",
        JOURNAL = VISP,
        VOLUME = "152",
        YEAR = "2005",
        NUMBER = "6",
        MONTH = "December",
        PAGES = "793-799",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368623"}

@article{bb374520,
        AUTHOR = "Chouireb, F. and Guerti, M.",
        TITLE = "Towards a high quality Arabic speech synthesis system based on neural
networks and residual excited vocal tract model",
        JOURNAL = SIViP,
        VOLUME = "2",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "73-87",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368624"}

@article{bb374521,
        AUTHOR = "Elfitri, I. and Gunel, B. and Kondoz, A.M.",
        TITLE = "Multichannel Audio Coding Based on Analysis by Synthesis",
        JOURNAL = PIEEE,
        VOLUME = "99",
        YEAR = "2011",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "657-670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368625"}

@article{bb374522,
        AUTHOR = "Jung, C.S. and Joo, Y.S. and Kang, H.G.",
        TITLE = "Waveform Interpolation-Based Speech Analysis/Synthesis for HMM-Based
TTS Systems",
        JOURNAL = SPLetters,
        VOLUME = "19",
        YEAR = "2012",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368626"}

@article{bb374523,
        AUTHOR = "Carmona, J.L. and Barker, J. and Gomez, A.M. and Ma, N.",
        TITLE = "Speech Spectral Envelope Enhancement by HMM-Based Analysis/Resynthesis",
        JOURNAL = SPLetters,
        VOLUME = "20",
        YEAR = "2013",
        NUMBER = "6",
        PAGES = "563-566",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368627"}

@article{bb374524,
        AUTHOR = "Tokuda, K. and Nankaku, Y. and Toda, T. and Zen, H. and Yamagishi, J. and Oura, K.",
        TITLE = "Speech Synthesis Based on Hidden Markov Models",
        JOURNAL = PIEEE,
        VOLUME = "100",
        YEAR = "2013",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1234-1252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368628"}

@article{bb374525,
        AUTHOR = "Ling, Z. and Kang, S. and Zen, H. and Senior, A. and Schuster, M. and Qian, X. and Meng, H. and Deng, L.",
        TITLE = "Deep Learning for Acoustic Modeling in Parametric Speech Generation:
A systematic review of existing techniques and future trends",
        JOURNAL = SPMag,
        VOLUME = "32",
        YEAR = "2015",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "35-52",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368629"}

@article{bb374526,
        AUTHOR = "Bordel, G. and Penagarikano, M. and Rodriguez Fuentes, L.J. and Alvarez, A. and Varona, A.",
        TITLE = "Probabilistic Kernels for Improved Text-to-Speech Alignment in Long
Audio Tracks",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "126-129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368630"}

@article{bb374527,
        AUTHOR = "Ninh, D.K. and Yamashita, Y.",
        TITLE = "F0 Parameterization of Glottalized Tones in HMM-Based Speech Synthesis
for Hanoi Vietnamese",
        JOURNAL = IEICE,
        VOLUME = "E98-D",
        YEAR = "2015",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2280-2289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368631"}

@article{bb374528,
        AUTHOR = "Erro, D.",
        TITLE = "Two-Band Radial Postfiltering in Cepstral Domain with Application to
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "202-206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368632"}

@article{bb374529,
        AUTHOR = "Hu, Y.J. and Ling, Z.H.",
        TITLE = "DBN-based Spectral Feature Representation for Statistical Parametric
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "321-325",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368633"}

@article{bb374530,
        AUTHOR = "Tsiaras, V. and Maia, R. and Diakoloukas, V. and Stylianou, Y. and Digalakis, V.",
        TITLE = "Global Variance in Speech Synthesis With Linear Dynamical Models",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1057-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368634"}

@article{bb374531,
        AUTHOR = "Wang, F.Z. and Nagano, H. and Kashino, K. and Igarashi, T.",
        TITLE = "Visualizing Video Sounds With Sound Word Animation to Enrich User
Experience",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "418-429",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368635"}

@article{bb374532,
        AUTHOR = "Sharma, B. and Prasanna, S.R.M.",
        TITLE = "Enhancement of Spectral Tilt in Synthesized Speech",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "382-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368636"}

@article{bb374533,
        AUTHOR = "Singh, R. and Jimenez, A. and Oland, A.",
        TITLE = "Voice disguise by mimicry: deriving statistical articulometric evidence
to evaluate claimed impersonation",
        JOURNAL = IET-Bio,
        VOLUME = "6",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "282-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368637"}

@article{bb374534,
        AUTHOR = "Lee, K.S.",
        TITLE = "Restricted Boltzmann Machine-Based Voice Conversion for Nonparallel
Corpus",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1103-1107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368638"}

@article{bb374535,
        AUTHOR = "Reddy, M.K. and Rao, K.S.",
        TITLE = "Robust Pitch Extraction Method for the HMM-Based Speech Synthesis
System",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1133-1137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368639"}

@article{bb374536,
        AUTHOR = "Liu, Z.C. and Ling, Z.H. and Dai, L.R.",
        TITLE = "Statistical Parametric Speech Synthesis Using Generalized
Distillation Framework",
        JOURNAL = SPLetters,
        VOLUME = "25",
        YEAR = "2018",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "695-699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368640"}

@article{bb374537,
        AUTHOR = "Drugman, T. and Huybrechts, G. and Klimkov, V. and Moinet, A.",
        TITLE = "Traditional Machine Learning for Pitch Detection",
        JOURNAL = SPLetters,
        VOLUME = "25",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1745-1749",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368641"}

@article{bb374538,
        AUTHOR = "Arik, S.O. and Jun, H. and Diamos, G.",
        TITLE = "Fast Spectrogram Inversion Using Multi-Head Convolutional Neural
Networks",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "94-98",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368642"}

@article{bb374539,
        AUTHOR = "Masuyama, Y. and Yatabe, K. and Oikawa, Y.",
        TITLE = "Griffin-Lim Like Phase Recovery via Alternating Direction Method of
Multipliers",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "184-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368643"}

@article{bb374540,
        AUTHOR = "Kwon, O. and Jang, I. and Ahn, C. and Kang, H.",
        TITLE = "An Effective Style Token Weight Control Technique for End-to-End
Emotional Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1383-1387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368644"}

@article{bb374541,
        AUTHOR = "Liu, Q. and Jackson, P.J.B. and Wang, W.",
        TITLE = "A Speech Synthesis Approach for High Quality Speech Separation and
Generation",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "1872-1876",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368645"}

@article{bb374542,
        AUTHOR = "Cotescu, M. and Drugman, T. and Huybrechts, G. and Lorenzo Trueba, J. and Moinet, A.",
        TITLE = "Voice Conversion for Whispered Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "186-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368646"}

@article{bb374543,
        AUTHOR = "Aylett, M.P. and Vinciarelli, A. and Wester, M.",
        TITLE = "Speech Synthesis for the Generation of Artificial Personality",
        JOURNAL = AffCom,
        VOLUME = "11",
        YEAR = "2020",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "361-372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368647"}

@article{bb374544,
        AUTHOR = "Rao, M.V.A. and Ghosh, P.K.",
        TITLE = "SFNet: A Computationally Efficient Source Filter Model Based Neural
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1170-1174",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368648"}

@article{bb374545,
        AUTHOR = "Zhou, Y. and Tian, X. and Li, H.",
        TITLE = "Multi-Task WaveRNN With an Integrated Architecture for Cross-Lingual
Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1310-1314",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368649"}

@article{bb374546,
        AUTHOR = "Yang, J.C. and Lin, P. and He, Q.H.",
        TITLE = "Constant-Q magnitude-phase coefficients extraction for synthetic speech
detection",
        JOURNAL = IET-Bio,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "September",
        PAGES = "216-221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368650"}

@article{bb374547,
        AUTHOR = "Liu, R. and Sisman, B. and Bao, F. and Gao, G. and Li, H.",
        TITLE = "Modeling Prosodic Phrasing With Multi-Task Learning in Tacotron-Based
TTS",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1470-1474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368651"}

@article{bb374548,
        AUTHOR = "Qi, J. and Du, J. and Siniscalchi, S.M. and Ma, X. and Lee, C.",
        TITLE = "On Mean Absolute Error for Deep Neural Network Based Vector-to-Vector
Regression",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1485-1489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368652"}

@article{bb374549,
        AUTHOR = "Yang, S. and Wang, Y. and Xie, L.",
        TITLE = "Adversarial Feature Learning and Unsupervised Clustering Based Speech
Synthesis for Found Data With Acoustic and Textual Noise",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1730-1734",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368653"}

@article{bb374550,
        AUTHOR = "Lee, J.Y. and Cheon, S.J. and Choi, B.J. and Kim, N.S.",
        TITLE = "Memory Attention: Robust Alignment Using Gating Mechanism for
End-to-End Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "2004-2008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368654"}

@article{bb374551,
        AUTHOR = "Zhang, Y. and Jiang, F. and Duan, Z.Y.",
        TITLE = "One-Class Learning Towards Synthetic Voice Spoofing Detection",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "937-941",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368655"}

@article{bb374552,
        AUTHOR = "Saeki, T. and Takamichi, S. and Saruwatari, H.",
        TITLE = "Incremental Text-to-Speech Synthesis Using Pseudo Lookahead With
Large Pretrained Language Model",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "857-861",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368656"}

@article{bb374553,
        AUTHOR = "Comanducci, L. and Bestagini, P. and Tagliasacchi, M. and Sarti, A. and Tubaro, S.",
        TITLE = "Reconstructing Speech From CNN Embeddings",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "952-956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368657"}

@article{bb374554,
        AUTHOR = "Hua, G. and Teoh, A.B.J. and Zhang, H.J.",
        TITLE = "Towards End-to-End Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1265-1269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368658"}

@article{bb374555,
        AUTHOR = "Cheon, S.J. and Choi, B.J. and Kim, M. and Lee, H. and Kim, N.S.",
        TITLE = "A Controllable Multi-Lingual Multi-Speaker Multi-Style Text-to-Speech
Synthesis With Multivariate Information Minimization",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "55-59",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368659"}

@article{bb374556,
        AUTHOR = "Bilbao, S.",
        TITLE = "3D Interpolation in Wave-Based Acoustic Simulation",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "384-388",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368660"}

@article{bb374557,
        AUTHOR = "Saleem, N. and Gao, J. and Irfan, M. and Verdu, E. and Fuente, J.P.",
        TITLE = "E2E-V2SResNet: Deep residual convolutional neural networks for
end-to-end video driven speech synthesis",
        JOURNAL = IVC,
        VOLUME = "119",
        YEAR = "2022",
        PAGES = "104389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368661"}

@article{bb374558,
        AUTHOR = "Sun, X. and Li, J.Y. and Tao, J.H.",
        TITLE = "Emotional Conversation Generation Orientated Syntactically
Constrained Bidirectional-Asynchronous Framework",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "187-198",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368662"}

@article{bb374559,
        AUTHOR = "Liu, S.G. and Li, S. and Cheng, H.",
        TITLE = "Towards an End-to-End Visual-to-Raw-Audio Generation With GAN",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1299-1312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368663"}

@article{bb374560,
        AUTHOR = "Li, C.T. and Yang, F. and Yang, J.",
        TITLE = "The Role of Long-Term Dependency in Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1142-1146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368664"}

@article{bb374561,
        AUTHOR = "Cui, S.S. and Huang, B.Y. and Huang, J.W. and Kang, X.G.",
        TITLE = "Synthetic Speech Detection Based on Local Autoregression and Variance
Statistics",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1462-1466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368665"}

@article{bb374562,
        AUTHOR = "Lei, Y. and Yang, S. and Zhu, X.F. and Xie, L. and Su, D.",
        TITLE = "Cross-Speaker Emotion Transfer Through Information Perturbation in
Emotional Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1948-1952",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368666"}

@article{bb374563,
        AUTHOR = "Choi, B.J. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SNAC: Speaker-Normalized Affine Coupling Layer in Flow-Based
Architecture for Zero-Shot Multi-Speaker Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2502-2506",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368667"}

@article{bb374564,
        AUTHOR = "Choi, B.J. and Jeong, M. and Kim, M. and Kim, N.S.",
        TITLE = "Variable-Length Speaker Conditioning in Flow-Based Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "899-903",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368668"}

@article{bb374565,
        AUTHOR = "Chen, L.C. and Chen, P.H. and Tsai, R.T.H. and Tsao, Y.",
        TITLE = "EPG2S: Speech Generation and Speech Enhancement Based on
Electropalatography and Audio Signals Using Multimodal Learning",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2582-2586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368669"}

@article{bb374566,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Emotion Intensity and its Control for Emotional Voice Conversion",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "31-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368670"}

@article{bb374567,
        AUTHOR = "Huang, B. and Cui, S. and Huang, J.W. and Kang, X.",
        TITLE = "Discriminative Frequency Information Learning for End-to-End Speech
Anti-Spoofing",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "185-189",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368671"}

@article{bb374568,
        AUTHOR = "Zhao, W. and Wang, Z. and Xu, L.",
        TITLE = "Mandarin Text-to-Speech Front-End With Lightweight Distilled
Convolution Network",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "249-253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368672"}

@article{bb374569,
        AUTHOR = "Ma, K.J. and Feng, Y.F. and Chen, B.J. and Zhao, G.Y.",
        TITLE = "End-to-End Dual-Branch Network Towards Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "359-363",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368673"}

@article{bb374570,
        AUTHOR = "Mira, R. and Vougioukas, K. and Ma, P.C. and Petridis, S. and Schuller, B.W. and Pantic, M.",
        TITLE = "End-to-End Video-to-Speech Synthesis Using Generative Adversarial
Networks",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3454-3466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368674"}

@article{bb374571,
        AUTHOR = "Yoon, H.C. and Kim, C. and Um, S. and Yoon, H.W. and Kang, H.G.",
        TITLE = "SC-CNN: Effective Speaker Conditioning Method for Zero-Shot
Multi-Speaker Text-to-Speech Systems",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "593-597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368675"}

@inproceedings{bb374572,
        AUTHOR = "Gu, Y.W. and Zhao, X.F. and Yi, X.W. and Xiao, J.C.",
        TITLE = "Voice Conversion Using Learnable Similarity-guided Masked Autoencoder",
        BOOKTITLE = IWDW22,
        YEAR = "2022",
        PAGES = "53-67",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368676"}

@article{bb374573,
        AUTHOR = "Zhang, M.Y. and Zhou, X.H. and Wu, Z.Z. and Li, H.Z.",
        TITLE = "Towards Zero-Shot Multi-Speaker Multi-Accent Text-to-Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "947-951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368677"}

@article{bb374574,
        AUTHOR = "Ly, E. and Villegas, J.",
        TITLE = "Cartesian Genetic Programming Parameterization in the Context of
Audio Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1077-1081",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368678"}

@article{bb374575,
        AUTHOR = "Mingote, V. and Gimeno, P. and Vicente, L. and Khurana, S. and Laurent, A. and Duret, J.",
        TITLE = "Direct Text to Speech Translation System Using Acoustic Units",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1262-1266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368679"}

@article{bb374576,
        AUTHOR = "Wang, Z.C. and Chen, Y.Z. and Xie, L. and Tian, Q. and Wang, Y.P.",
        TITLE = "LM-VC: Zero-Shot Voice Conversion via Speech Generation Based on
Language Models",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1157-1161",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368680"}

@article{bb374577,
        AUTHOR = "van Niekerk, B. and Carbonneau, M.A. and Kamper, H.",
        TITLE = "Rhythm Modeling for Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1297-1301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368681"}

@article{bb374578,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Speech Synthesis With Mixed Emotions",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "3120-3134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368682"}

@article{bb374579,
        AUTHOR = "Liu, Y. and Wei, L.F. and Qian, X.Y. and Zhang, T.H. and Chen, S.L. and Yin, X.C.",
        TITLE = "M3TTS: Multi-modal text-to-speech of multi-scale style control for
dubbing",
        JOURNAL = PRL,
        VOLUME = "179",
        YEAR = "2024",
        PAGES = "158-164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368683"}

@article{bb374580,
        AUTHOR = "Jeong, M. and Kim, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "Efficient Parallel Audio Generation Using Group Masked Language
Modeling",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "979-983",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368684"}

@article{bb374581,
        AUTHOR = "Yi, J.Y. and Wang, C.L. and Tao, J.H. and Zhang, C.Y. and Fan, C.H. and Tian, Z.K. and Ma, H.X. and Fu, R.",
        TITLE = "SceneFake:
An initial dataset and benchmarks for scene fake audio detection",
        JOURNAL = PR,
        VOLUME = "152",
        YEAR = "2024",
        PAGES = "110468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368685"}

@article{bb374582,
        AUTHOR = "Tan, X. and Chen, J.W. and Liu, H. and Cong, J. and Zhang, C. and Liu, Y.Q. and Wang, X. and Leng, Y. and Yi, Y.H. and He, L. and Zhao, S. and Qin, T. and Soong, F. and Liu, T.Y.",
        TITLE = "NaturalSpeech:
End-to-End Text-to-Speech Synthesis With Human-Level Quality",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4234-4245",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368686"}

@article{bb374583,
        AUTHOR = "Zhou, J. and Li, Y. and Fan, C.H. and Tao, L. and Kwan, H.K.",
        TITLE = "Multi-Level Information Aggregation Based Graph Attention Networks
Towards Fake Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "1580-1584",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368687"}

@article{bb374584,
        AUTHOR = "Cao, D.Y. and Zhang, Z.Y. and Zhang, J.Y.",
        TITLE = "NeuralVC: Any-to-Any Voice Conversion Using Neural Networks Decoder
for Real-Time Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2070-2074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368688"}

@article{bb374585,
        AUTHOR = "Valin, J.M. and Mustafa, A. and Buthe, J.",
        TITLE = "Very Low Complexity Speech Synthesis Using Framewise Autoregressive
GAN (FARGAN) With Pitch Prediction",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2115-2119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368689"}

@article{bb374586,
        AUTHOR = "Xue, J. and Fan, C.H. and Yi, J.Y. and Zhou, J. and Lv, Z.",
        TITLE = "Dynamic Ensemble Teacher-Student Distillation Framework for
Light-Weight Fake Audio Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2305-2309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368690"}

@article{bb374587,
        AUTHOR = "Cheng, X.Y. and Wang, Y.F. and Liu, C. and Hu, D.H. and Su, Z.",
        TITLE = "HiFi-GANw: Watermarked Speech Synthesis via Fine-Tuning of HiFi-GAN",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2440-2444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368691"}

@article{bb374588,
        AUTHOR = "Zhang, Y.M. and Du, R. and Tan, Z.H. and Wang, W.W. and Ma, Z.Y.",
        TITLE = "Generating Accurate and Diverse Audio Captions Through Variational
Autoencoder Framework",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2520-2524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368692"}

@article{bb374589,
        AUTHOR = "Huang, W.C. and Wu, Y.C. and Toda, T.",
        TITLE = "Multi-Speaker Text-to-Speech Training With Speaker Anonymized Data",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2995-2999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368693"}

@article{bb374590,
        AUTHOR = "Lee, J. and Shin, Y. and Chang, J.H.",
        TITLE = "Differentiable Duration Refinement Using Internal Division for
Non-Autoregressive Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "3154-3158",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368694"}

@article{bb374591,
        AUTHOR = "Xu, X. and Ma, Z.Y. and Wu, M.Y. and Yu, K.",
        TITLE = "Towards Weakly Supervised Text-to-Audio Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "11126-11138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368695"}

@article{bb374592,
        AUTHOR = "Kim, M. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SegINR: Segment-Wise Implicit Neural Representation for Sequence
Alignment in Neural Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "646-650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368696"}

@article{bb374593,
        AUTHOR = "Zheng, J.J. and Zhou, J. and Zheng, W.M. and Tao, L. and Kwan, H.K.",
        TITLE = "Controllable Multi-Speaker Emotional Speech Synthesis With an Emotion
Representation of High Generalization Capability",
        JOURNAL = AffCom,
        VOLUME = "16",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "68-82",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368697"}

@article{bb374594,
        AUTHOR = "Chen, K. and Huang, Z.H. and He, L. and Yan, Y.H.",
        TITLE = "UnitDiff: A Unit-Diffusion Model for Code-Switching Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1051-1055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368698"}

@article{bb374595,
        AUTHOR = "Chang, Y. and Ko, Y.J.",
        TITLE = "Soft engagement with pseudo initiatives for multi-party dialogue
generation",
        JOURNAL = PRL,
        VOLUME = "191",
        YEAR = "2025",
        PAGES = "103-109",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368699"}

@article{bb374596,
        AUTHOR = "He, Y.L. and Wang, H.X. and Qiu, Y.Q. and Cao, H.",
        TITLE = "ASSMark: Dual Defense Against Speech Synthesis Attack via Adversarial
Robust Watermarking",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1870-1874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368700"}

@article{bb374597,
        AUTHOR = "Wang, R. and Chen, L.P. and Lee, K.A. and Ling, Z.H.",
        TITLE = "Asynchronous Voice Anonymization by Learning From Speaker-Adversarial
Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1905-1909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368701"}

@article{bb374598,
        AUTHOR = "Feng, Y. and Zhang, X.B. and Feng, F.Y. and Zhang, G.L. and Xu, L.T.",
        TITLE = "Robust and Imperceptible Watermarking Framework for Generative Audio
Models",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3196-3200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368702"}

@article{bb374599,
        AUTHOR = "Lee, J. and Song, N.S. and Chang, J.H.",
        TITLE = "Vector Field Decomposition-Based Flow Matching for Zero-Shot
Cross-Lingual Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3560-3564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT368703"}

Last update:Jan 8, 2026 at 12:52:16