You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
3.1 KiB
Go

package tts
import (
"context"
texttospeech "cloud.google.com/go/texttospeech/apiv1"
"gitlab.com/cinnamon/voiceagent/icserror"
texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"
)
func TTS(ssml string, SampleRate int32) ([]byte, *icserror.IcsError) {
return tts(ssml, SampleRate)
/*
// Instantiates a client.
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
icserror.ICSERRTTSFail.SetError(err)
return nil, icserror.ICSERRTTSFail
}
defer client.Close()
// Perform the text-to-speech request on the text input with the selected
// voice parameters and audio file type.
req := texttospeechpb.SynthesizeSpeechRequest{
// Set the text input to be synthesized.
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: ssml},
},
// Build the voice request, select the language code ("en-US") and the SSML
// voice gender ("neutral").
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: "ko-KR",
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
},
// Select the type of audio file you want returned.
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
SampleRateHertz: SampleRate,
},
}
resp, err := client.SynthesizeSpeech(ctx, &req)
if err != nil {
icserror.ICSERRTTSFail.SetError(err)
return nil, icserror.ICSERRTTSFail
}
if len(resp.AudioContent) <= 0 {
return nil, icserror.ICSERRTTSFailEmpty
}
return resp.AudioContent, nil
*/
}
func tts(text string, SampleRate int32) ([]byte, *icserror.IcsError) {
// Instantiates a client.
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
icserror.ICSERRTTSFail.SetError(err)
return nil, icserror.ICSERRTTSFail
}
defer client.Close()
// Perform the text-to-speech request on the text input with the selected
// voice parameters and audio file type.
req := texttospeechpb.SynthesizeSpeechRequest{
// Set the text input to be synthesized.
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: text},
},
// Build the voice request, select the language code ("en-US") and the SSML
// voice gender ("neutral").
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: "ko-KR",
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
},
// Select the type of audio file you want returned.
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
SampleRateHertz: SampleRate,
},
}
resp, err := client.SynthesizeSpeech(ctx, &req)
if err != nil {
icserror.ICSERRTTSFail.SetError(err)
return nil, icserror.ICSERRTTSFail
}
if len(resp.AudioContent) <= 0 {
return nil, icserror.ICSERRTTSFailEmpty
}
/*
// The resp's AudioContent is binary.
filename := "output.mp3"
err = ioutil.WriteFile(filename, resp.AudioContent, 0644)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Audio content written to file: %v\n", filename)
*/
return resp.AudioContent, nil
}