voicebot/tts/tts.go

package tts

import (
	"context"

	texttospeech "cloud.google.com/go/texttospeech/apiv1"
	"gitlab.com/cinnamon/voiceagent/icserror"
	texttospeechpb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1"
)


func TTS(ssml string, SampleRate int32) ([]byte, *icserror.IcsError) {
	return tts(ssml, SampleRate)
	/*
		// Instantiates a client.
		ctx := context.Background()

		client, err := texttospeech.NewClient(ctx)
		if err != nil {
			icserror.ICSERRTTSFail.SetError(err)
			return nil, icserror.ICSERRTTSFail
		}
		defer client.Close()

		// Perform the text-to-speech request on the text input with the selected
		// voice parameters and audio file type.
		req := texttospeechpb.SynthesizeSpeechRequest{
			// Set the text input to be synthesized.
			Input: &texttospeechpb.SynthesisInput{
				InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: ssml},
			},
			// Build the voice request, select the language code ("en-US") and the SSML
			// voice gender ("neutral").
			Voice: &texttospeechpb.VoiceSelectionParams{
				LanguageCode: "ko-KR",
				SsmlGender:   texttospeechpb.SsmlVoiceGender_FEMALE,
			},
			// Select the type of audio file you want returned.
			AudioConfig: &texttospeechpb.AudioConfig{
				AudioEncoding:   texttospeechpb.AudioEncoding_LINEAR16,
				SampleRateHertz: SampleRate,
			},
		}

		resp, err := client.SynthesizeSpeech(ctx, &req)
		if err != nil {
			icserror.ICSERRTTSFail.SetError(err)
			return nil, icserror.ICSERRTTSFail
		}
		if len(resp.AudioContent) <= 0 {
			return nil, icserror.ICSERRTTSFailEmpty
		}

		return resp.AudioContent, nil
	*/
}

func tts(text string, SampleRate int32) ([]byte, *icserror.IcsError) {
	// Instantiates a client.
	ctx := context.Background()

	client, err := texttospeech.NewClient(ctx)
	if err != nil {
		icserror.ICSERRTTSFail.SetError(err)
		return nil, icserror.ICSERRTTSFail
	}
	defer client.Close()

	// Perform the text-to-speech request on the text input with the selected
	// voice parameters and audio file type.
	req := texttospeechpb.SynthesizeSpeechRequest{
		// Set the text input to be synthesized.
		Input: &texttospeechpb.SynthesisInput{
			InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: text},
		},
		// Build the voice request, select the language code ("en-US") and the SSML
		// voice gender ("neutral").
		Voice: &texttospeechpb.VoiceSelectionParams{
			LanguageCode: "ko-KR",
			SsmlGender:   texttospeechpb.SsmlVoiceGender_FEMALE,
		},
		// Select the type of audio file you want returned.
		AudioConfig: &texttospeechpb.AudioConfig{
			AudioEncoding:   texttospeechpb.AudioEncoding_LINEAR16,
			SampleRateHertz: SampleRate,
		},
	}

	resp, err := client.SynthesizeSpeech(ctx, &req)
	if err != nil {
		icserror.ICSERRTTSFail.SetError(err)
		return nil, icserror.ICSERRTTSFail
	}
	if len(resp.AudioContent) <= 0 {
		return nil, icserror.ICSERRTTSFailEmpty
	}

	/*
		// The resp's AudioContent is binary.
		filename := "output.mp3"
		err = ioutil.WriteFile(filename, resp.AudioContent, 0644)
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("Audio content written to file: %v\n", filename)
	*/

	return resp.AudioContent, nil
}