@inproceedings{a7006c1f2b1944a289d96a455e8b7b33,
title = "Sub-band basis spectrum model for pitch-synchronous log-spectrum and phase based on approximation of sparse coding",
abstract = "In this paper, we propose a sub-band basis spectrum model which is a new spectrum representation model based on a linear combination of sub-band basis vectors. We apply sparse coding to the pitch-synchronously analyzed log-spectra. Based on the approximation of the resulting basis, we obtain subband basis vectors with 1-cycle sinusoidal shapes that have mel-scale for lower frequencies and equally spaced scale for higher frequencies. Parameters of the sub-band basis spectrum model representing the log spectrum and the phase spectrum are calculated by fitting the basis to the spectrum. Since the parameters represent the shape of a spectrum, it can be easily used for voice adaptation, interpolation and conversion. Experimental results show that the analysis synthesis speech based on the proposed model is close to original speech and that there is no significant difference between the synthetic speech using analysis-synthesis database and those using original database for unit-fusion based TTS[1].",
keywords = "Sparse coding, Spectrum parameter, Speech synthesis, Sub-band basis spectrum model, Voice adaptation",
author = "Masatsune Tamura and Takehiko Kagoshima and Masami Akamine",
note = "Copyright: Copyright 2020 Elsevier B.V., All rights reserved.",
year = "2010",
language = "English",
series = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
publisher = "International Speech Communication Association",
pages = "2406--2409",
booktitle = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
}