|
|
|
//for STT SELVAS STT
|
|
|
|
package stt
|
|
|
|
|
|
|
|
/*
|
|
|
|
#cgo LDFLAGS: -lstdc++ -lssl -lcrypto /home/leejj9612/dev/voiceagent/extlib/selvasstt/SDK/LIB/c_linux/x64/libASRLIB.a
|
|
|
|
#cgo CFLAGS: -I /home/leejj9612/dev/voiceagent/extlib/selvasstt/SDK/INCLUDE
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <Lvcsr_Api.h>
|
|
|
|
|
|
|
|
char* getResultData(LVCSR_DATA_RESULT* pDataResult, long long nCount) {
|
|
|
|
char* skip1 = "<eps>";
|
|
|
|
char* space = " ";
|
|
|
|
int len = 0;
|
|
|
|
long long i;
|
|
|
|
|
|
|
|
for (i = 0; i < nCount; i++) {
|
|
|
|
len += strlen(pDataResult[i].pTokenStr) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
char* result = malloc(sizeof(char) * len);
|
|
|
|
strcpy(result, "");
|
|
|
|
|
|
|
|
for (i = 0; i < nCount; i++) {
|
|
|
|
if (strcmp(pDataResult[i].pTokenStr,skip1) == 0) {
|
|
|
|
} else {
|
|
|
|
strcat(result, space);
|
|
|
|
strcat(result, pDataResult[i].pTokenStr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|
|
|
|
import "C"
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"gitlab.com/cinnamon/voiceagent/icserror"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
CONNECT_TIMEOUT = 3
|
|
|
|
READ_CONNECT_TIMEOUT = 5
|
|
|
|
|
|
|
|
// model Info
|
|
|
|
MODEL_ID = 0
|
|
|
|
KWD_ID = -1
|
|
|
|
CODEC_TYPE = 0 // 8k
|
|
|
|
LANGUAGE = 1 // utf-8
|
|
|
|
USED_EPD = 1 // epd used
|
|
|
|
USED_SCORE = 0 // used off
|
|
|
|
)
|
|
|
|
|
|
|
|
type STTSelvas struct {
|
|
|
|
handle int
|
|
|
|
authCode string
|
|
|
|
ch int
|
|
|
|
text string
|
|
|
|
voiceBuf []byte
|
|
|
|
voiceBufCur int64
|
|
|
|
silencenum int
|
|
|
|
validnum int64 //rms counter
|
|
|
|
uDataSize int
|
|
|
|
uEndOfSpeech int
|
|
|
|
STTInfo STTInfo
|
|
|
|
}
|
|
|
|
|
|
|
|
type STTInfo struct {
|
|
|
|
LVCSR_SOCK_HEAD C.LVCSR_SOCK_HEAD
|
|
|
|
LVCSR_EPD_INFO C.LVCSR_EPD_INFO
|
|
|
|
LVCSR_DATA_AUTHENTICATION C.LVCSR_DATA_AUTHENTICATION
|
|
|
|
LVCSR_RECOG_RESULT C.LVCSR_RECOG_RESULT
|
|
|
|
LVCSR_DATA_RESULT C.LVCSR_DATA_RESULT
|
|
|
|
LVCSR_RECOG_MID_RESULT C.LVCSR_RECOG_MID_RESULT
|
|
|
|
LVCSR_DATA_INFO C.LVCSR_DATA_INFO
|
|
|
|
}
|
|
|
|
|
|
|
|
type STTSResult struct {
|
|
|
|
result string
|
|
|
|
error *icserror.IcsError
|
|
|
|
}
|
|
|
|
|
|
|
|
// connect SELVAS STT Server
|
|
|
|
func NewSTTS(IP string, port int) (*STTSelvas, *icserror.IcsError) {
|
|
|
|
if len(IP) <= 0 || port <= 0 {
|
|
|
|
return nil, icserror.ICSERRInvalidParam
|
|
|
|
}
|
|
|
|
// os.Exit(9)
|
|
|
|
|
|
|
|
stts := STTSelvas{handle: -1, authCode: "LGUPlusManager", uDataSize: 1600, uEndOfSpeech: 0}
|
|
|
|
|
|
|
|
csIP := C.CString(IP)
|
|
|
|
csPort := C.long(port)
|
|
|
|
csConTimeout := C.long(CONNECT_TIMEOUT)
|
|
|
|
csReadTimeout := C.long(READ_CONNECT_TIMEOUT)
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
C.free(unsafe.Pointer(csIP))
|
|
|
|
}()
|
|
|
|
|
|
|
|
rc := C.ASR_SVC_OPEN(csIP, csPort, csConTimeout, csReadTimeout, &stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
|
|
|
|
if int(rc) == -1 {
|
|
|
|
return nil, icserror.ICSERRSTTConnectTimeout
|
|
|
|
} else if int(rc) == -2 {
|
|
|
|
return nil, icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
|
|
|
|
// Auth
|
|
|
|
csAuthCode := C.CString(stts.authCode)
|
|
|
|
csAuthLen := C.longlong(len(stts.authCode))
|
|
|
|
stts.STTInfo.LVCSR_DATA_AUTHENTICATION.nAuthenticationLen = csAuthLen
|
|
|
|
stts.STTInfo.LVCSR_DATA_AUTHENTICATION.pAuthentication = csAuthCode
|
|
|
|
defer C.free(unsafe.Pointer(csAuthCode))
|
|
|
|
|
|
|
|
rc = C.ASR_SVC_SET_AUTH(&stts.STTInfo.LVCSR_SOCK_HEAD, &stts.STTInfo.LVCSR_DATA_AUTHENTICATION)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return nil, icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
|
|
|
|
// Channel Connect
|
|
|
|
rc = C.ASR_SVC_RECG_OPEN(&stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return nil, icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
|
|
|
|
return &stts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *STTSelvas) Close() *icserror.IcsError {
|
|
|
|
if s.handle < 0 || s.ch < 0 {
|
|
|
|
return icserror.ICSERRSTTNotInit
|
|
|
|
}
|
|
|
|
|
|
|
|
// Channel Connection Close
|
|
|
|
rc := C.ASR_SVC_RECG_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return icserror.ICSERRTTSNotInit
|
|
|
|
}
|
|
|
|
|
|
|
|
// Server Close
|
|
|
|
rc = C.ASR_SVC_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // aadd LVCSR_SOCK_HEAD
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return icserror.ICSERRTTSNotInit
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *STTSelvas) SendSTT(voicedata []byte) (string, *icserror.IcsError) {
|
|
|
|
var result string
|
|
|
|
var sendCount int
|
|
|
|
|
|
|
|
// file open
|
|
|
|
// buff := make([]byte, 1600)
|
|
|
|
|
|
|
|
file, err := os.Open("./9001-RX-1648533187911907029.pcm")
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Search model list
|
|
|
|
/*
|
|
|
|
rc := C.ASR_SVC_RECG_LIST_VIEW(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_MODEL)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
fmt.Printf("Model List : %d \n", s.STTInfo.LVCSR_DATA_MODEL.nModelCnt)
|
|
|
|
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo)
|
|
|
|
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelID)
|
|
|
|
fmt.Println("Model List : ", *s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.pModelName)
|
|
|
|
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelType)
|
|
|
|
fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nSamplingRate)
|
|
|
|
// for i := 0; i < s.STTInfo.LVCSR_DATA_MODEL.nModelCnt; i++ {
|
|
|
|
// fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo[i])
|
|
|
|
// }
|
|
|
|
|
|
|
|
rc = C.ASR_SVC_RECG_LIST_VIEW_FREE(&s.STTInfo.LVCSR_DATA_MODEL)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Set Model List
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.nModelId = MODEL_ID
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.nKwdId = KWD_ID
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.nCodecType = CODEC_TYPE
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.nCharSet = LANGUAGE
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.bEpdUsed = USED_EPD
|
|
|
|
s.STTInfo.LVCSR_DATA_INFO.bScoreUsed = USED_SCORE
|
|
|
|
|
|
|
|
rc := C.ASR_SVC_RECG_SET_LIST(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_INFO)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return "", icserror.ICSERRSTTFailInit
|
|
|
|
}
|
|
|
|
|
|
|
|
// file split
|
|
|
|
sendCount = len(voicedata) / 1600
|
|
|
|
|
|
|
|
// send voice(voicedata)
|
|
|
|
for j := 0; j < sendCount; j++ {
|
|
|
|
buff1 := voicedata[1600*j : 1600*(j+1)]
|
|
|
|
csUDataSize := C.long(s.uDataSize)
|
|
|
|
csUEndSpeech := C.long(s.uEndOfSpeech)
|
|
|
|
csBuff := (*C.char)(unsafe.Pointer(&buff1[0]))
|
|
|
|
rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return "", icserror.ICSERRSTTSendFail
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.STTInfo.LVCSR_EPD_INFO == 2 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
// time.Sleep(time.Millisecond * 100) // for real time
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send voice
|
|
|
|
// for {
|
|
|
|
// bytesRead, err := file.Read(buff)
|
|
|
|
// if err != nil {
|
|
|
|
// if err == io.EOF {
|
|
|
|
// s.uEndOfSpeech = 1
|
|
|
|
// csUEndSpeech := C.long(s.uEndOfSpeech)
|
|
|
|
// rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
|
|
|
|
// if int(rc) < 0 {
|
|
|
|
// return "", icserror.ICSERRSTTSendFail
|
|
|
|
// }
|
|
|
|
// break
|
|
|
|
// }
|
|
|
|
// break
|
|
|
|
// }
|
|
|
|
// if bytesRead <= 0 {
|
|
|
|
// s.uDataSize = 0
|
|
|
|
// }
|
|
|
|
// csUDataSize := C.long(s.uDataSize)
|
|
|
|
// csUEndSpeech := C.long(s.uEndOfSpeech)
|
|
|
|
// csBuff := (*C.char)(unsafe.Pointer(&buff[0]))
|
|
|
|
|
|
|
|
// rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
|
|
|
|
// if int(rc) < 0 {
|
|
|
|
// return "", icserror.ICSERRSTTSendFail
|
|
|
|
// }
|
|
|
|
|
|
|
|
// if s.STTInfo.LVCSR_EPD_INFO == 2 {
|
|
|
|
// break
|
|
|
|
// }
|
|
|
|
|
|
|
|
// time.Sleep(time.Millisecond * 100) // for real time
|
|
|
|
// }
|
|
|
|
|
|
|
|
if s.STTInfo.LVCSR_EPD_INFO != 2 {
|
|
|
|
s.uEndOfSpeech = 1
|
|
|
|
csUEndSpeech := C.long(s.uEndOfSpeech)
|
|
|
|
|
|
|
|
rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return "", icserror.ICSERRSTTSendFail
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = C.ASR_SVC_RECG_PROC(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_RECOG_RESULT)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return "", icserror.ICSERRSTTSendFail
|
|
|
|
}
|
|
|
|
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
result = C.GoString(C.getResultData(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount))
|
|
|
|
// result = C.GoString(C.getResultData1(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount))
|
|
|
|
strings.TrimLeft(result, "")
|
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
// recog memory free
|
|
|
|
rc = C.ASR_SVC_RECG_PROC_FREE(&s.STTInfo.LVCSR_RECOG_RESULT)
|
|
|
|
if int(rc) < 0 {
|
|
|
|
return "", icserror.ICSERRSTTSendFail
|
|
|
|
}
|
|
|
|
|
|
|
|
return result, icserror.ICSERRSTTContinue
|
|
|
|
}
|
|
|
|
|
|
|
|
// result return
|
|
|
|
func NewSTTSResult(result string, err *icserror.IcsError) *STTSResult {
|
|
|
|
return &STTSResult{result, err}
|
|
|
|
}
|