You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
voicebot/stt/sttselvas.go

294 lines
7.7 KiB
Go

3 years ago
//for STT SELVAS STT
package stt
/*
#cgo LDFLAGS: -lstdc++ -lssl -lcrypto /home/leejj9612/dev/voiceagent/extlib/selvasstt/SDK/LIB/c_linux/x64/libASRLIB.a
#cgo CFLAGS: -I /home/leejj9612/dev/voiceagent/extlib/selvasstt/SDK/INCLUDE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <Lvcsr_Api.h>
char* getResultData(LVCSR_DATA_RESULT* pDataResult, long long nCount) {
char* skip1 = "<eps>";
char* space = " ";
int len = 0;
for (long long i = 0; i < nCount; i++) {
len += strlen(pDataResult[i].pTokenStr) + 1;
}
char* result = malloc(sizeof(char) * len);
strcpy(result, "");
for (long long i = 0; i < nCount; i++) {
if (strcmp(pDataResult[i].pTokenStr,skip1) == 0) {
} else {
strcat(result, space);
strcat(result, pDataResult[i].pTokenStr);
}
}
return result;
}
*/
import "C"
import (
"fmt"
"os"
"strings"
"sync"
"unsafe"
"gitlab.com/cinnamon/voiceagent/icserror"
)
const (
CONNECT_TIMEOUT = 3
READ_CONNECT_TIMEOUT = 5
// model Info
MODEL_ID = 0
KWD_ID = -1
CODEC_TYPE = 0 // 8k
LANGUAGE = 1 // utf-8
USED_EPD = 1 // epd used
USED_SCORE = 0 // used off
)
type STTSelvas struct {
handle int
authCode string
ch int
text string
voiceBuf []byte
voiceBufCur int64
silencenum int
validnum int64 //rms counter
uDataSize int
uEndOfSpeech int
STTInfo STTInfo
}
type STTInfo struct {
LVCSR_SOCK_HEAD C.LVCSR_SOCK_HEAD
LVCSR_EPD_INFO C.LVCSR_EPD_INFO
LVCSR_DATA_AUTHENTICATION C.LVCSR_DATA_AUTHENTICATION
LVCSR_RECOG_RESULT C.LVCSR_RECOG_RESULT
LVCSR_DATA_RESULT C.LVCSR_DATA_RESULT
LVCSR_RECOG_MID_RESULT C.LVCSR_RECOG_MID_RESULT
LVCSR_DATA_INFO C.LVCSR_DATA_INFO
}
type STTSResult struct {
result string
error *icserror.IcsError
}
// connect SELVAS STT Server
func NewSTTS(IP string, port int) (*STTSelvas, *icserror.IcsError) {
if len(IP) <= 0 || port <= 0 {
return nil, icserror.ICSERRInvalidParam
}
// os.Exit(9)
stts := STTSelvas{handle: -1, authCode: "LGUPlusManager", uDataSize: 1600, uEndOfSpeech: 0}
csIP := C.CString(IP)
csPort := C.long(port)
csConTimeout := C.long(CONNECT_TIMEOUT)
csReadTimeout := C.long(READ_CONNECT_TIMEOUT)
defer func() {
C.free(unsafe.Pointer(csIP))
}()
rc := C.ASR_SVC_OPEN(csIP, csPort, csConTimeout, csReadTimeout, &stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
if int(rc) == -1 {
return nil, icserror.ICSERRSTTConnectTimeout
} else if int(rc) == -2 {
return nil, icserror.ICSERRSTTFailInit
}
// Auth
csAuthCode := C.CString(stts.authCode)
csAuthLen := C.longlong(len(stts.authCode))
stts.STTInfo.LVCSR_DATA_AUTHENTICATION.nAuthenticationLen = csAuthLen
stts.STTInfo.LVCSR_DATA_AUTHENTICATION.pAuthentication = csAuthCode
defer C.free(unsafe.Pointer(csAuthCode))
rc = C.ASR_SVC_SET_AUTH(&stts.STTInfo.LVCSR_SOCK_HEAD, &stts.STTInfo.LVCSR_DATA_AUTHENTICATION)
if int(rc) < 0 {
return nil, icserror.ICSERRSTTFailInit
}
// Channel Connect
rc = C.ASR_SVC_RECG_OPEN(&stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
if int(rc) < 0 {
return nil, icserror.ICSERRSTTFailInit
}
return &stts, nil
}
func (s *STTSelvas) Close() *icserror.IcsError {
if s.handle < 0 || s.ch < 0 {
return icserror.ICSERRSTTNotInit
}
// Channel Connection Close
rc := C.ASR_SVC_RECG_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
if int(rc) < 0 {
return icserror.ICSERRTTSNotInit
}
// Server Close
rc = C.ASR_SVC_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // aadd LVCSR_SOCK_HEAD
if int(rc) < 0 {
return icserror.ICSERRTTSNotInit
}
return nil
}
func (s *STTSelvas) SendSTT(voicedata []byte) (string, *icserror.IcsError) {
var result string
var sendCount int
// file open
// buff := make([]byte, 1600)
file, err := os.Open("./9001-RX-1648533187911907029.pcm")
if err != nil {
fmt.Println(err)
}
defer file.Close()
// Search model list
/*
rc := C.ASR_SVC_RECG_LIST_VIEW(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_MODEL)
if int(rc) < 0 {
return icserror.ICSERRSTTFailInit
}
fmt.Printf("Model List : %d \n", s.STTInfo.LVCSR_DATA_MODEL.nModelCnt)
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo)
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelID)
fmt.Println("Model List : ", *s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.pModelName)
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelType)
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nSamplingRate)
// for i := 0; i < s.STTInfo.LVCSR_DATA_MODEL.nModelCnt; i++ {
// fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo[i])
// }
rc = C.ASR_SVC_RECG_LIST_VIEW_FREE(&s.STTInfo.LVCSR_DATA_MODEL)
if int(rc) < 0 {
return icserror.ICSERRSTTFailInit
}
*/
// Set Model List
s.STTInfo.LVCSR_DATA_INFO.nModelId = MODEL_ID
s.STTInfo.LVCSR_DATA_INFO.nKwdId = KWD_ID
s.STTInfo.LVCSR_DATA_INFO.nCodecType = CODEC_TYPE
s.STTInfo.LVCSR_DATA_INFO.nCharSet = LANGUAGE
s.STTInfo.LVCSR_DATA_INFO.bEpdUsed = USED_EPD
s.STTInfo.LVCSR_DATA_INFO.bScoreUsed = USED_SCORE
rc := C.ASR_SVC_RECG_SET_LIST(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_INFO)
if int(rc) < 0 {
return "", icserror.ICSERRSTTFailInit
}
// file split
sendCount = len(voicedata) / 1600
// send voice(voicedata)
for j := 0; j < sendCount; j++ {
buff1 := voicedata[1600*j : 1600*(j+1)]
csUDataSize := C.long(s.uDataSize)
csUEndSpeech := C.long(s.uEndOfSpeech)
csBuff := (*C.char)(unsafe.Pointer(&buff1[0]))
rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
if int(rc) < 0 {
return "", icserror.ICSERRSTTSendFail
}
if s.STTInfo.LVCSR_EPD_INFO == 2 {
break
}
// time.Sleep(time.Millisecond * 100) // for real time
}
// Send voice
// for {
// bytesRead, err := file.Read(buff)
// if err != nil {
// if err == io.EOF {
// s.uEndOfSpeech = 1
// csUEndSpeech := C.long(s.uEndOfSpeech)
// rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
// if int(rc) < 0 {
// return "", icserror.ICSERRSTTSendFail
// }
// break
// }
// break
// }
// if bytesRead <= 0 {
// s.uDataSize = 0
// }
// csUDataSize := C.long(s.uDataSize)
// csUEndSpeech := C.long(s.uEndOfSpeech)
// csBuff := (*C.char)(unsafe.Pointer(&buff[0]))
// rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
// if int(rc) < 0 {
// return "", icserror.ICSERRSTTSendFail
// }
// if s.STTInfo.LVCSR_EPD_INFO == 2 {
// break
// }
// time.Sleep(time.Millisecond * 100) // for real time
// }
if s.STTInfo.LVCSR_EPD_INFO != 2 {
s.uEndOfSpeech = 1
csUEndSpeech := C.long(s.uEndOfSpeech)
rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
if int(rc) < 0 {
return "", icserror.ICSERRSTTSendFail
}
}
rc = C.ASR_SVC_RECG_PROC(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_RECOG_RESULT)
if int(rc) < 0 {
return "", icserror.ICSERRSTTSendFail
}
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
result = C.GoString(C.getResultData(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount))
// result = C.GoString(C.getResultData1(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount))
strings.TrimLeft(result, "")
wg.Done()
}()
wg.Wait()
// recog memory free
rc = C.ASR_SVC_RECG_PROC_FREE(&s.STTInfo.LVCSR_RECOG_RESULT)
if int(rc) < 0 {
return "", icserror.ICSERRSTTSendFail
}
return result, icserror.ICSERRSTTContinue
}
// result return
func NewSTTSResult(result string, err *icserror.IcsError) *STTSResult {
return &STTSResult{result, err}
}