You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
110 lines
3.1 KiB
Go
110 lines
3.1 KiB
Go
3 years ago
|
package tts
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
|
||
|
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
||
|
"gitlab.com/cinnamon/voiceagent/icserror"
|
||
|
texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"
|
||
|
)
|
||
|
|
||
|
|
||
|
func TTS(ssml string, SampleRate int32) ([]byte, *icserror.IcsError) {
|
||
|
return tts(ssml, SampleRate)
|
||
|
/*
|
||
|
// Instantiates a client.
|
||
|
ctx := context.Background()
|
||
|
|
||
|
client, err := texttospeech.NewClient(ctx)
|
||
|
if err != nil {
|
||
|
icserror.ICSERRTTSFail.SetError(err)
|
||
|
return nil, icserror.ICSERRTTSFail
|
||
|
}
|
||
|
defer client.Close()
|
||
|
|
||
|
// Perform the text-to-speech request on the text input with the selected
|
||
|
// voice parameters and audio file type.
|
||
|
req := texttospeechpb.SynthesizeSpeechRequest{
|
||
|
// Set the text input to be synthesized.
|
||
|
Input: &texttospeechpb.SynthesisInput{
|
||
|
InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: ssml},
|
||
|
},
|
||
|
// Build the voice request, select the language code ("en-US") and the SSML
|
||
|
// voice gender ("neutral").
|
||
|
Voice: &texttospeechpb.VoiceSelectionParams{
|
||
|
LanguageCode: "ko-KR",
|
||
|
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
|
||
|
},
|
||
|
// Select the type of audio file you want returned.
|
||
|
AudioConfig: &texttospeechpb.AudioConfig{
|
||
|
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
|
||
|
SampleRateHertz: SampleRate,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
resp, err := client.SynthesizeSpeech(ctx, &req)
|
||
|
if err != nil {
|
||
|
icserror.ICSERRTTSFail.SetError(err)
|
||
|
return nil, icserror.ICSERRTTSFail
|
||
|
}
|
||
|
if len(resp.AudioContent) <= 0 {
|
||
|
return nil, icserror.ICSERRTTSFailEmpty
|
||
|
}
|
||
|
|
||
|
return resp.AudioContent, nil
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
func tts(text string, SampleRate int32) ([]byte, *icserror.IcsError) {
|
||
|
// Instantiates a client.
|
||
|
ctx := context.Background()
|
||
|
|
||
|
client, err := texttospeech.NewClient(ctx)
|
||
|
if err != nil {
|
||
|
icserror.ICSERRTTSFail.SetError(err)
|
||
|
return nil, icserror.ICSERRTTSFail
|
||
|
}
|
||
|
defer client.Close()
|
||
|
|
||
|
// Perform the text-to-speech request on the text input with the selected
|
||
|
// voice parameters and audio file type.
|
||
|
req := texttospeechpb.SynthesizeSpeechRequest{
|
||
|
// Set the text input to be synthesized.
|
||
|
Input: &texttospeechpb.SynthesisInput{
|
||
|
InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: text},
|
||
|
},
|
||
|
// Build the voice request, select the language code ("en-US") and the SSML
|
||
|
// voice gender ("neutral").
|
||
|
Voice: &texttospeechpb.VoiceSelectionParams{
|
||
|
LanguageCode: "ko-KR",
|
||
|
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
|
||
|
},
|
||
|
// Select the type of audio file you want returned.
|
||
|
AudioConfig: &texttospeechpb.AudioConfig{
|
||
|
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
|
||
|
SampleRateHertz: SampleRate,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
resp, err := client.SynthesizeSpeech(ctx, &req)
|
||
|
if err != nil {
|
||
|
icserror.ICSERRTTSFail.SetError(err)
|
||
|
return nil, icserror.ICSERRTTSFail
|
||
|
}
|
||
|
if len(resp.AudioContent) <= 0 {
|
||
|
return nil, icserror.ICSERRTTSFailEmpty
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
// The resp's AudioContent is binary.
|
||
|
filename := "output.mp3"
|
||
|
err = ioutil.WriteFile(filename, resp.AudioContent, 0644)
|
||
|
if err != nil {
|
||
|
log.Fatal(err)
|
||
|
}
|
||
|
fmt.Printf("Audio content written to file: %v\n", filename)
|
||
|
*/
|
||
|
|
||
|
return resp.AudioContent, nil
|
||
|
}
|