@article{bb368300,
        AUTHOR = "Hua, G. and Teoh, A.B.J. and Zhang, H.J.",
        TITLE = "Towards End-to-End Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1265-1269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362465"}

@article{bb368301,
        AUTHOR = "Cheon, S.J. and Choi, B.J. and Kim, M. and Lee, H. and Kim, N.S.",
        TITLE = "A Controllable Multi-Lingual Multi-Speaker Multi-Style Text-to-Speech
Synthesis With Multivariate Information Minimization",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "55-59",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362466"}

@article{bb368302,
        AUTHOR = "Bilbao, S.",
        TITLE = "3D Interpolation in Wave-Based Acoustic Simulation",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "384-388",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362467"}

@article{bb368303,
        AUTHOR = "Saleem, N. and Gao, J. and Irfan, M. and Verdu, E. and Fuente, J.P.",
        TITLE = "E2E-V2SResNet: Deep residual convolutional neural networks for
end-to-end video driven speech synthesis",
        JOURNAL = IVC,
        VOLUME = "119",
        YEAR = "2022",
        PAGES = "104389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362468"}

@article{bb368304,
        AUTHOR = "Sun, X. and Li, J.Y. and Tao, J.H.",
        TITLE = "Emotional Conversation Generation Orientated Syntactically
Constrained Bidirectional-Asynchronous Framework",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "187-198",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362469"}

@article{bb368305,
        AUTHOR = "Liu, S.G. and Li, S. and Cheng, H.",
        TITLE = "Towards an End-to-End Visual-to-Raw-Audio Generation With GAN",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1299-1312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362470"}

@article{bb368306,
        AUTHOR = "Li, C.T. and Yang, F. and Yang, J.",
        TITLE = "The Role of Long-Term Dependency in Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1142-1146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362471"}

@article{bb368307,
        AUTHOR = "Cui, S.S. and Huang, B.Y. and Huang, J.W. and Kang, X.G.",
        TITLE = "Synthetic Speech Detection Based on Local Autoregression and Variance
Statistics",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1462-1466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362472"}

@article{bb368308,
        AUTHOR = "Lei, Y. and Yang, S. and Zhu, X.F. and Xie, L. and Su, D.",
        TITLE = "Cross-Speaker Emotion Transfer Through Information Perturbation in
Emotional Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1948-1952",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362473"}

@article{bb368309,
        AUTHOR = "Choi, B.J. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SNAC: Speaker-Normalized Affine Coupling Layer in Flow-Based
Architecture for Zero-Shot Multi-Speaker Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2502-2506",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362474"}

@article{bb368310,
        AUTHOR = "Choi, B.J. and Jeong, M. and Kim, M. and Kim, N.S.",
        TITLE = "Variable-Length Speaker Conditioning in Flow-Based Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "899-903",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362475"}

@article{bb368311,
        AUTHOR = "Chen, L.C. and Chen, P.H. and Tsai, R.T.H. and Tsao, Y.",
        TITLE = "EPG2S: Speech Generation and Speech Enhancement Based on
Electropalatography and Audio Signals Using Multimodal Learning",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2582-2586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362476"}

@article{bb368312,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Emotion Intensity and its Control for Emotional Voice Conversion",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "31-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362477"}

@article{bb368313,
        AUTHOR = "Huang, B. and Cui, S. and Huang, J.W. and Kang, X.",
        TITLE = "Discriminative Frequency Information Learning for End-to-End Speech
Anti-Spoofing",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "185-189",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362478"}

@article{bb368314,
        AUTHOR = "Zhao, W. and Wang, Z. and Xu, L.",
        TITLE = "Mandarin Text-to-Speech Front-End With Lightweight Distilled
Convolution Network",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "249-253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362479"}

@article{bb368315,
        AUTHOR = "Ma, K.J. and Feng, Y.F. and Chen, B.J. and Zhao, G.Y.",
        TITLE = "End-to-End Dual-Branch Network Towards Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "359-363",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362480"}

@article{bb368316,
        AUTHOR = "Mira, R. and Vougioukas, K. and Ma, P.C. and Petridis, S. and Schuller, B.W. and Pantic, M.",
        TITLE = "End-to-End Video-to-Speech Synthesis Using Generative Adversarial
Networks",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3454-3466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362481"}

@article{bb368317,
        AUTHOR = "Yoon, H.C. and Kim, C. and Um, S. and Yoon, H.W. and Kang, H.G.",
        TITLE = "SC-CNN: Effective Speaker Conditioning Method for Zero-Shot
Multi-Speaker Text-to-Speech Systems",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "593-597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362482"}

@inproceedings{bb368318,
        AUTHOR = "Gu, Y.W. and Zhao, X.F. and Yi, X.W. and Xiao, J.C.",
        TITLE = "Voice Conversion Using Learnable Similarity-guided Masked Autoencoder",
        BOOKTITLE = IWDW22,
        YEAR = "2022",
        PAGES = "53-67",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362483"}

@article{bb368319,
        AUTHOR = "Zhang, M.Y. and Zhou, X.H. and Wu, Z.Z. and Li, H.Z.",
        TITLE = "Towards Zero-Shot Multi-Speaker Multi-Accent Text-to-Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "947-951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362484"}

@article{bb368320,
        AUTHOR = "Ly, E. and Villegas, J.",
        TITLE = "Cartesian Genetic Programming Parameterization in the Context of
Audio Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1077-1081",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362485"}

@article{bb368321,
        AUTHOR = "Mingote, V. and Gimeno, P. and Vicente, L. and Khurana, S. and Laurent, A. and Duret, J.",
        TITLE = "Direct Text to Speech Translation System Using Acoustic Units",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1262-1266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362486"}

@article{bb368322,
        AUTHOR = "Wang, Z.C. and Chen, Y.Z. and Xie, L. and Tian, Q. and Wang, Y.P.",
        TITLE = "LM-VC: Zero-Shot Voice Conversion via Speech Generation Based on
Language Models",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1157-1161",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362487"}

@article{bb368323,
        AUTHOR = "van Niekerk, B. and Carbonneau, M.A. and Kamper, H.",
        TITLE = "Rhythm Modeling for Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1297-1301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362488"}

@article{bb368324,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Speech Synthesis With Mixed Emotions",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "3120-3134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362489"}

@article{bb368325,
        AUTHOR = "Liu, Y. and Wei, L.F. and Qian, X.Y. and Zhang, T.H. and Chen, S.L. and Yin, X.C.",
        TITLE = "M3TTS: Multi-modal text-to-speech of multi-scale style control for
dubbing",
        JOURNAL = PRL,
        VOLUME = "179",
        YEAR = "2024",
        PAGES = "158-164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362490"}

@article{bb368326,
        AUTHOR = "Jeong, M. and Kim, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "Efficient Parallel Audio Generation Using Group Masked Language
Modeling",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "979-983",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362491"}

@article{bb368327,
        AUTHOR = "Yi, J.Y. and Wang, C.L. and Tao, J.H. and Zhang, C.Y. and Fan, C.H. and Tian, Z.K. and Ma, H.X. and Fu, R.",
        TITLE = "SceneFake:
An initial dataset and benchmarks for scene fake audio detection",
        JOURNAL = PR,
        VOLUME = "152",
        YEAR = "2024",
        PAGES = "110468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362492"}

@article{bb368328,
        AUTHOR = "Tan, X. and Chen, J.W. and Liu, H. and Cong, J. and Zhang, C. and Liu, Y.Q. and Wang, X. and Leng, Y. and Yi, Y.H. and He, L. and Zhao, S. and Qin, T. and Soong, F. and Liu, T.Y.",
        TITLE = "NaturalSpeech:
End-to-End Text-to-Speech Synthesis With Human-Level Quality",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4234-4245",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362493"}

@article{bb368329,
        AUTHOR = "Zhou, J. and Li, Y. and Fan, C.H. and Tao, L. and Kwan, H.K.",
        TITLE = "Multi-Level Information Aggregation Based Graph Attention Networks
Towards Fake Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "1580-1584",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362494"}

@article{bb368330,
        AUTHOR = "Cao, D.Y. and Zhang, Z.Y. and Zhang, J.Y.",
        TITLE = "NeuralVC: Any-to-Any Voice Conversion Using Neural Networks Decoder
for Real-Time Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2070-2074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362495"}

@article{bb368331,
        AUTHOR = "Valin, J.M. and Mustafa, A. and Buthe, J.",
        TITLE = "Very Low Complexity Speech Synthesis Using Framewise Autoregressive
GAN (FARGAN) With Pitch Prediction",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2115-2119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362496"}

@article{bb368332,
        AUTHOR = "Xue, J. and Fan, C.H. and Yi, J.Y. and Zhou, J. and Lv, Z.",
        TITLE = "Dynamic Ensemble Teacher-Student Distillation Framework for
Light-Weight Fake Audio Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2305-2309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362497"}

@article{bb368333,
        AUTHOR = "Cheng, X.Y. and Wang, Y.F. and Liu, C. and Hu, D.H. and Su, Z.",
        TITLE = "HiFi-GANw: Watermarked Speech Synthesis via Fine-Tuning of HiFi-GAN",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2440-2444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362498"}

@article{bb368334,
        AUTHOR = "Zhang, Y.M. and Du, R. and Tan, Z.H. and Wang, W.W. and Ma, Z.Y.",
        TITLE = "Generating Accurate and Diverse Audio Captions Through Variational
Autoencoder Framework",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2520-2524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362499"}

@article{bb368335,
        AUTHOR = "Huang, W.C. and Wu, Y.C. and Toda, T.",
        TITLE = "Multi-Speaker Text-to-Speech Training With Speaker Anonymized Data",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2995-2999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362500"}

@article{bb368336,
        AUTHOR = "Lee, J. and Shin, Y. and Chang, J.H.",
        TITLE = "Differentiable Duration Refinement Using Internal Division for
Non-Autoregressive Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "3154-3158",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362501"}

@article{bb368337,
        AUTHOR = "Xu, X. and Ma, Z.Y. and Wu, M.Y. and Yu, K.",
        TITLE = "Towards Weakly Supervised Text-to-Audio Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "11126-11138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362502"}

@article{bb368338,
        AUTHOR = "Kim, M. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SegINR: Segment-Wise Implicit Neural Representation for Sequence
Alignment in Neural Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "646-650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362503"}

@article{bb368339,
        AUTHOR = "Zheng, J.J. and Zhou, J. and Zheng, W.M. and Tao, L. and Kwan, H.K.",
        TITLE = "Controllable Multi-Speaker Emotional Speech Synthesis With an Emotion
Representation of High Generalization Capability",
        JOURNAL = AffCom,
        VOLUME = "16",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "68-82",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362504"}

@article{bb368340,
        AUTHOR = "Chen, K. and Huang, Z.H. and He, L. and Yan, Y.H.",
        TITLE = "UnitDiff: A Unit-Diffusion Model for Code-Switching Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1051-1055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362505"}

@article{bb368341,
        AUTHOR = "Chang, Y. and Ko, Y.J.",
        TITLE = "Soft engagement with pseudo initiatives for multi-party dialogue
generation",
        JOURNAL = PRL,
        VOLUME = "191",
        YEAR = "2025",
        PAGES = "103-109",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362506"}

@article{bb368342,
        AUTHOR = "He, Y.L. and Wang, H.X. and Qiu, Y.Q. and Cao, H.",
        TITLE = "ASSMark: Dual Defense Against Speech Synthesis Attack via Adversarial
Robust Watermarking",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1870-1874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362507"}

@article{bb368343,
        AUTHOR = "Wang, R. and Chen, L.P. and Lee, K.A. and Ling, Z.H.",
        TITLE = "Asynchronous Voice Anonymization by Learning From Speaker-Adversarial
Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1905-1909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362508"}

@article{bb368344,
        AUTHOR = "Feng, Y. and Zhang, X.B. and Feng, F.Y. and Zhang, G.L. and Xu, L.T.",
        TITLE = "Robust and Imperceptible Watermarking Framework for Generative Audio
Models",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3196-3200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362509"}

@article{bb368345,
        AUTHOR = "Lee, J. and Song, N.S. and Chang, J.H.",
        TITLE = "Vector Field Decomposition-Based Flow Matching for Zero-Shot
Cross-Lingual Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3560-3564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362510"}

@article{bb368346,
        AUTHOR = "Wang, H. and Yang, Y.F. and Liu, S. and Li, J. and Meng, L. and Liu, Y.Q. and Zhou, J.M. and Sun, H.Q. and Lu, Y. and Qin, Y.",
        TITLE = "StreamMel: Real-Time Zero-Shot Text-to-Speech Via Interleaved
Continuous Autoregressive Modeling",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3530-3534",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362511"}

@inproceedings{bb368347,
        AUTHOR = "Kushwaha, S.S. and Tian, Y.P.",
        TITLE = "VinTAGe: Joint Video and Text Conditioning for Holistic Audio
Generation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13529-13539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362512"}

@inproceedings{bb368348,
        AUTHOR = "Kim, J.H. and Choi, J. and Kim, J.H. and Jung, C. and Chung, J.S.",
        TITLE = "From Faces to Voices: Learning Hierarchical Representations for
High-quality Video-to-Speech",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "15874-15884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362513"}

@inproceedings{bb368349,
        AUTHOR = "Cong, G.X. and Pan, J. and Li, L. and Qi, Y.K. and Peng, Y.X. and van den Hengel, A.J. and Yang, J. and Huang, Q.M.",
        TITLE = "EmoDubber: Towards High Quality and Emotion Controllable Movie
Dubbing",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "15863-15873",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362514"}

@inproceedings{bb368350,
        AUTHOR = "Zhang, Z.D. and Li, L. and Yan, C.G. and Liu, C.S. and van den Hengel, A.J. and Qi, Y.K.",
        TITLE = "Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled
Prosody Adapting for Movie Dubbing",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "172-182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362515"}

@inproceedings{bb368351,
        AUTHOR = "Rai, A. and Sridhar, S.",
        TITLE = "EgoSonics: Generating Synchronized Audio for Silent Egocentric Videos",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "4935-4946",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362516"}

@inproceedings{bb368352,
        AUTHOR = "Yadav, A.K.S. and Bhagtani, K. and Salvi, D. and Bestagini, P. and Delp, E.J.",
        TITLE = "FairSSD: Understanding Bias in Synthetic Speech Detectors",
        BOOKTITLE = WMF24,
        YEAR = "2024",
        PAGES = "4418-4428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362517"}

@inproceedings{bb368353,
        AUTHOR = "Cuccovillo, L. and Gerhardt, M. and Aichroth, P.",
        TITLE = "Audio Transformer for Synthetic Speech Detection via Multi-Formant
Analysis",
        BOOKTITLE = WMF24,
        YEAR = "2024",
        PAGES = "4409-4417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362518"}

@inproceedings{bb368354,
        AUTHOR = "Cong, G.X. and Li, L. and Qi, Y.K. and Zha, Z.J. and Wu, Q. and Wang, W.Y. and Jiang, B. and Yang, M.H. and Huang, Q.M.",
        TITLE = "Learning to Dub Movies via Hierarchical Prosody Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14687-14697",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362519"}

@inproceedings{bb368355,
        AUTHOR = "Hsu, W.N. and Remez, T. and Shi, B. and Donley, J. and Adi, Y.",
        TITLE = "ReVISE: Self-Supervised Speech Resynthesis with Visual Input for
Universal and Generalized Speech Regeneration",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18796-18806",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362520"}

@inproceedings{bb368356,
        AUTHOR = "Sun, C.Z. and Jia, S. and Hou, S.W. and Lyu, S.W.",
        TITLE = "AI-Synthesized Voice Detection Using Neural Vocoder Artifacts",
        BOOKTITLE = WMF23,
        YEAR = "2023",
        PAGES = "904-912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362521"}

@inproceedings{bb368357,
        AUTHOR = "Noufi, C. and May, L. and Berger, J.",
        TITLE = "The Role of Vocal Persona in Natural and Synthesized Speech",
        BOOKTITLE = FG23,
        YEAR = "2023",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362522"}

@inproceedings{bb368358,
        AUTHOR = "Hwang, I.S. and Lee, S.H. and Lee, S.W.",
        TITLE = "StyleVC: Non-Parallel Voice Conversion with Adversarial Style
Generalization",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "23-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362523"}

@inproceedings{bb368359,
        AUTHOR = "Wang, W.B. and Song, Y. and Jha, S.",
        TITLE = "Autolv: Automatic Lecture Video Generator",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1086-1090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362524"}

@inproceedings{bb368360,
        AUTHOR = "Borzi, S. and Giudice, O. and Stanco, F. and Allegra, D.",
        TITLE = "Is synthetic voice detection research going into the right direction?",
        BOOKTITLE = WMF22,
        YEAR = "2022",
        PAGES = "71-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362525"}

@inproceedings{bb368361,
        AUTHOR = "Hassid, M. and Ramanovich, M.T. and Shillingford, B. and Wang, M. and Jia, Y. and Remez, T.",
        TITLE = "More than Words: In-the-Wild Visually-Driven Prosody for
Text-to-Speech",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10577-10587",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362526"}

@inproceedings{bb368362,
        AUTHOR = "Kwak, I.Y. and Kwag, S. and Lee, J. and Huh, J.H. and Lee, C.H. and Jeon, Y.B. and Hwang, J.H. and Yoon, J.W.",
        TITLE = "ResMax: Detecting Voice Spoofing Attacks with Residual Network and
Max Feature Map",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "4837-4844",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362527"}

@inproceedings{bb368363,
        AUTHOR = "Wang, D.H. and Wang, R. and Dong, L. and Yan, D. and Ren, Y.M.",
        TITLE = "Efficient Generation of Speech Adversarial Examples with Generative
Model",
        BOOKTITLE = IWDW20,
        YEAR = "2020",
        PAGES = "251-264",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362528"}

@inproceedings{bb368364,
        AUTHOR = "Zhou, H. and Liu, Z. and Xu, X. and Luo, P. and Wang, X.",
        TITLE = "Vision-Infused Deep Audio Inpainting",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "283-292",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362529"}

@inproceedings{bb368365,
        AUTHOR = "Bailer, W. and Wijnants, M. and Lievens, H. and Claes, S.",
        TITLE = "Multimedia Analytics Challenges and Opportunities for Creating
Interactive Radio Content",
        BOOKTITLE = MMMod20,
        YEAR = "2020",
        PAGES = "II:375-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362530"}

@inproceedings{bb368366,
        AUTHOR = "Huang, T. and Wang, H.X. and Chen, Y. and He, P.S.",
        TITLE = "GRU-SVM Model for Synthetic Speech Detection",
        BOOKTITLE = IWDW19,
        YEAR = "2019",
        PAGES = "115-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362531"}

@inproceedings{bb368367,
        AUTHOR = "Wong, A. and Xu, A. and Dudek, G.",
        TITLE = "Investigating Trust Factors in Human-Robot Shared Control:
Implicit Gender Bias Around Robot Voice",
        BOOKTITLE = CRV19,
        YEAR = "2019",
        PAGES = "195-200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362532"}

@inproceedings{bb368368,
        AUTHOR = "Xiao, L. and Wang, Z.",
        TITLE = "Dense Convolutional Recurrent Neural Network for Generalized Speech
Animation",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "633-638",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362533"}

@inproceedings{bb368369,
        AUTHOR = "Shah, N.J. and Patil, H.A.",
        TITLE = "Analysis of Features and Metrics for Alignment in Text-Dependent Voice
Conversion",
        BOOKTITLE = PReMI17,
        YEAR = "2017",
        PAGES = "299-307",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362534"}

@inproceedings{bb368370,
        AUTHOR = "Rybarova, R. and Drozd, I. and Rozinaj, G.",
        TITLE = "GUI for interactive speech synthesis",
        BOOKTITLE = WSSIP16,
        YEAR = "2016",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362535"}

@inproceedings{bb368371,
        AUTHOR = "Coto Jimenez, M. and Goddard Close, J.",
        TITLE = "LSTM Deep Neural Networks Postfiltering for Improving the Quality of
Synthetic Voices",
        BOOKTITLE = MCPR16,
        YEAR = "2016",
        PAGES = "280-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362536"}

@inproceedings{bb368372,
        AUTHOR = "Vasek, M. and Rozinaj, G. and Rybarova, R.",
        TITLE = "Letter-To-Sound conversion for speech synthesizer",
        BOOKTITLE = WSSIP16,
        YEAR = "2016",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362537"}

@inproceedings{bb368373,
        AUTHOR = "Rybarova, R. and del Corral, G. and Rozinaj, G.",
        TITLE = "Diphone spanish text-to-speech synthesizer",
        BOOKTITLE = WSSIP15,
        YEAR = "2015",
        PAGES = "121-124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362538"}

@inproceedings{bb368374,
        AUTHOR = "Verma, R. and Sarkar, P. and Rao, K.S.",
        TITLE = "Conversion of neutral speech to storytelling style speech",
        BOOKTITLE = ICAPR15,
        YEAR = "2015",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362539"}

@inproceedings{bb368375,
        AUTHOR = "Narendra, N.P. and Rao, K.S.",
        TITLE = "Optimal residual frame based source modeling for HMM-based speech
synthesis",
        BOOKTITLE = ICAPR15,
        YEAR = "2015",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362540"}

@inproceedings{bb368376,
        AUTHOR = "Wang, Y. and Tao, J.H. and Yang, M.H. and Li, Y.",
        TITLE = "Extended Decision Tree with or Relationship for HMM-Based Speech
Synthesis",
        BOOKTITLE = ACPR13,
        YEAR = "2013",
        PAGES = "225-229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362541"}

@inproceedings{bb368377,
        AUTHOR = "Gao, L. and Yu, H.Z. and Zhang, J.H. and Fang, H.P.",
        TITLE = "Research on HMM_based speech synthesis for Lhasa dialect",
        BOOKTITLE = IASP11,
        YEAR = "2011",
        PAGES = "429-433",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362542"}

@inproceedings{bb368378,
        AUTHOR = "Chakraborty, R. and Garain, U.",
        TITLE = "Role of Synthetically Generated Samples on Speech Recognition in a
Resource-Scarce Language",
        BOOKTITLE = ICPR10,
        YEAR = "2010",
        PAGES = "1618-1621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362543"}

@inproceedings{bb368379,
        AUTHOR = "Rao, K.S. and Maity, S. and Taru, A. and Koolagudi, S.G.",
        TITLE = "Unit Selection Using Linguistic, Prosodic and Spectral Distance for
Developing Text-to-Speech System in Hindi",
        BOOKTITLE = PReMI09,
        YEAR = "2009",
        PAGES = "531-536",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362544"}

@inproceedings{bb368380,
        AUTHOR = "Bahrampour, A. and Barkhoda, W. and Azami, B.Z.",
        TITLE = "Implementation of Three Text to Speech Systems for Kurdish Language",
        BOOKTITLE = CIARP09,
        YEAR = "2009",
        PAGES = "321-328",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362545"}

@inproceedings{bb368381,
        AUTHOR = "Shirbahadurkar, S.D. and Bormane, D.S.",
        TITLE = "Marathi Language Speech Synthesizer Using Concatenative Synthesis
Strategy (Spoken in Maharashtra, India)",
        BOOKTITLE = ICMV09,
        YEAR = "2009",
        PAGES = "181-185",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362546"}

@inproceedings{bb368382,
        AUTHOR = "Tuckova, J. and Holub, J. and Dubeda, T.",
        TITLE = "Technical and Phonetic Aspects of Speech Quality Assessment:
The Case of Prosody Synthesis",
        BOOKTITLE = COST08,
        YEAR = "2008",
        PAGES = "126-132",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362547"}

@inproceedings{bb368383,
        AUTHOR = "Bauer, D. and Kannampuzha, J. and Kroger, B.J.",
        TITLE = "Articulatory Speech Re-synthesis:
Profiting from Natural Acoustic Speech Data",
        BOOKTITLE = COST08,
        YEAR = "2008",
        PAGES = "344-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362548"}

@inproceedings{bb368384,
        AUTHOR = "Gu, H.Y. and Cai, C.L. and Cai, S.F.",
        TITLE = "An HNM-Based Speaker-Nonspecific Timbre Transformation Scheme for
Speech Synthesis",
        BOOKTITLE = CISP09,
        YEAR = "2009",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT362549"}

@article{bb368385,
        AUTHOR = "Lung, S.Y. and Chen, C.C.T.",
        TITLE = "A new approach for text-independent speaker recognition",
        JOURNAL = PR,
        VOLUME = "33",
        YEAR = "2000",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1401-1403",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362550"}

@article{bb368386,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Multi-resolution form of SVD for text-independent speaker recognition",
        JOURNAL = PR,
        VOLUME = "35",
        YEAR = "2002",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1637-1639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362551"}

@article{bb368387,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Further reduced form of wavelet feature for text independent speaker
recognition",
        JOURNAL = PR,
        VOLUME = "37",
        YEAR = "2004",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1565-1566",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362552"}

@article{bb368388,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Feature extracted from wavelet eigenfunction estimation for
text-independent speaker recognition",
        JOURNAL = PR,
        VOLUME = "37",
        YEAR = "2004",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1543-1544",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362553"}

@article{bb368389,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Wavelet feature domain adaptive noise reduction using learning
algorithm for text-independent speaker recognition",
        JOURNAL = PR,
        VOLUME = "40",
        YEAR = "2007",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2603-2606",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362554"}

@article{bb368390,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Efficient text independent speaker recognition with wavelet feature
selection based multilayered neural network using supervised learning
algorithm",
        JOURNAL = PR,
        VOLUME = "40",
        YEAR = "2007",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3616-3620",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362555"}

@article{bb368391,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Distributed genetic algorithm for Gaussian mixture model based speaker
identification",
        JOURNAL = PR,
        VOLUME = "36",
        YEAR = "2003",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2479-2481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362556"}

@article{bb368392,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Adaptive fuzzy wavelet algorithm for text-independent speaker
recognition",
        JOURNAL = PR,
        VOLUME = "37",
        YEAR = "2004",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2095-2096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362557"}

@article{bb368393,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Wavelet feature selection based neural networks with application to the
text independent speaker identification",
        JOURNAL = PR,
        VOLUME = "39",
        YEAR = "2006",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1518-1521",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362558"}

@article{bb368394,
        AUTHOR = "Lung, S.Y.",
        TITLE = "Feature extracted from wavelet decomposition using biorthogonal Riesz
basis for text-independent speaker recognition",
        JOURNAL = PR,
        VOLUME = "41",
        YEAR = "2008",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "3068-3070",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362559"}

@article{bb368395,
        AUTHOR = "Chen, K. and Wu, T.Y. and Zhang, H.J.",
        TITLE = "On the use of nearest feature line for speaker identification",
        JOURNAL = PRL,
        VOLUME = "23",
        YEAR = "2002",
        NUMBER = "14",
        MONTH = "December",
        PAGES = "1735-1746",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362560"}

@article{bb368396,
        AUTHOR = "Ramachandran, R.P. and Farrell, K.R. and Ramachandran, R. and Mammone, R.J.",
        TITLE = "Speaker recognition:
general classifier approaches and data fusion methods",
        JOURNAL = PR,
        VOLUME = "35",
        YEAR = "2002",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2801-2821",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362561"}

@article{bb368397,
        AUTHOR = "Chen, K.",
        TITLE = "Towards better making a decision in speaker verification",
        JOURNAL = PR,
        VOLUME = "36",
        YEAR = "2003",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "329-346",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362562"}

@article{bb368398,
        AUTHOR = "Rodriguez Linares, L. and Garcia Mateo, C. and Alba Castro, J.L.",
        TITLE = "On combining classifiers for speaker authentication",
        JOURNAL = PR,
        VOLUME = "36",
        YEAR = "2003",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "347-359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362563"}

@article{bb368399,
        AUTHOR = "Damper, R.I. and Higgins, J.E.",
        TITLE = "Improving speaker identification in noise by subband processing and
decision fusion",
        JOURNAL = PRL,
        VOLUME = "24",
        YEAR = "2003",
        NUMBER = "13",
        MONTH = "September",
        PAGES = "2167-2173",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024.html#TT362564"}

Last update:Oct 6, 2025 at 14:07:43