//for STT SELVAS STT package stt /* #cgo LDFLAGS: -lstdc++ -ldl ./extlib/selvasstt/SDK/LIB/c_linux/x64/libASRLIB.a ./extlib/selvasstt/SDK/OpenSSL/Linux/Linux_x64/libssl.a ./extlib/selvasstt/SDK/OpenSSL/Linux/Linux_x64/libcrypto.a #cgo CFLAGS: -I ../extlib/selvasstt/SDK/INCLUDE #include #include #include #include char* getResultData(LVCSR_DATA_RESULT* pDataResult, long long nCount) { char* skip1 = ""; char* space = " "; int len = 0; long long i; for (i = 0; i < nCount; i++) { len += strlen(pDataResult[i].pTokenStr) + 1; } char* result = malloc(sizeof(char) * len); strcpy(result, ""); for (i = 0; i < nCount; i++) { if (strcmp(pDataResult[i].pTokenStr,skip1) == 0) { } else { strcat(result, space); strcat(result, pDataResult[i].pTokenStr); } } return result; } */ import "C" import ( "fmt" "os" "strings" "sync" "unsafe" "gitlab.com/cinnamon/voiceagent/icserror" ) const ( CONNECT_TIMEOUT = 3 READ_CONNECT_TIMEOUT = 5 // model Info MODEL_ID = 0 KWD_ID = -1 CODEC_TYPE = 0 // 8k LANGUAGE = 1 // utf-8 USED_EPD = 1 // epd used USED_SCORE = 0 // used off ) type STTSelvas struct { handle int authCode string ch int text string voiceBuf []byte voiceBufCur int64 silencenum int validnum int64 //rms counter uDataSize int uEndOfSpeech int STTInfo STTInfo } type STTInfo struct { LVCSR_SOCK_HEAD C.LVCSR_SOCK_HEAD LVCSR_EPD_INFO C.LVCSR_EPD_INFO LVCSR_DATA_AUTHENTICATION C.LVCSR_DATA_AUTHENTICATION LVCSR_RECOG_RESULT C.LVCSR_RECOG_RESULT LVCSR_DATA_RESULT C.LVCSR_DATA_RESULT LVCSR_RECOG_MID_RESULT C.LVCSR_RECOG_MID_RESULT LVCSR_DATA_INFO C.LVCSR_DATA_INFO } type STTSResult struct { result string error *icserror.IcsError } // connect SELVAS STT Server func NewSTTS(IP string, port int) (*STTSelvas, *icserror.IcsError) { if len(IP) <= 0 || port <= 0 { return nil, icserror.ICSERRInvalidParam } // os.Exit(9) stts := STTSelvas{handle: -1, authCode: "LGUPlusManager", uDataSize: 1600, uEndOfSpeech: 0} csIP := C.CString(IP) csPort := C.long(port) csConTimeout := C.long(CONNECT_TIMEOUT) csReadTimeout := C.long(READ_CONNECT_TIMEOUT) defer func() { C.free(unsafe.Pointer(csIP)) }() rc := C.ASR_SVC_OPEN(csIP, csPort, csConTimeout, csReadTimeout, &stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD if int(rc) == -1 { return nil, icserror.ICSERRSTTConnectTimeout } else if int(rc) == -2 { return nil, icserror.ICSERRSTTFailInit } // Auth csAuthCode := C.CString(stts.authCode) csAuthLen := C.longlong(len(stts.authCode)) stts.STTInfo.LVCSR_DATA_AUTHENTICATION.nAuthenticationLen = csAuthLen stts.STTInfo.LVCSR_DATA_AUTHENTICATION.pAuthentication = csAuthCode defer C.free(unsafe.Pointer(csAuthCode)) rc = C.ASR_SVC_SET_AUTH(&stts.STTInfo.LVCSR_SOCK_HEAD, &stts.STTInfo.LVCSR_DATA_AUTHENTICATION) if int(rc) < 0 { return nil, icserror.ICSERRSTTFailInit } // Channel Connect rc = C.ASR_SVC_RECG_OPEN(&stts.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD if int(rc) < 0 { return nil, icserror.ICSERRSTTFailInit } return &stts, nil } func (s *STTSelvas) Close() *icserror.IcsError { if s.handle < 0 || s.ch < 0 { return icserror.ICSERRSTTNotInit } // Channel Connection Close rc := C.ASR_SVC_RECG_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // add LVCSR_SOCK_HEAD if int(rc) < 0 { return icserror.ICSERRTTSNotInit } // Server Close rc = C.ASR_SVC_CLOS(&s.STTInfo.LVCSR_SOCK_HEAD) // aadd LVCSR_SOCK_HEAD if int(rc) < 0 { return icserror.ICSERRTTSNotInit } return nil } func (s *STTSelvas) SendSTT(voicedata []byte) (string, *icserror.IcsError) { var result string var sendCount int // file open // buff := make([]byte, 1600) file, err := os.Open("./9001-RX-1648533187911907029.pcm") if err != nil { fmt.Println(err) } defer file.Close() // Search model list /* rc := C.ASR_SVC_RECG_LIST_VIEW(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_MODEL) if int(rc) < 0 { return icserror.ICSERRSTTFailInit } fmt.Printf("Model List : %d \n", s.STTInfo.LVCSR_DATA_MODEL.nModelCnt) fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo) fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelID) fmt.Println("Model List : ", *s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.pModelName) fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nModelType) fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo.nSamplingRate) // for i := 0; i < s.STTInfo.LVCSR_DATA_MODEL.nModelCnt; i++ { // fmt.Println("Model List : ", s.STTInfo.LVCSR_DATA_MODEL.pModelInfo[i]) // } rc = C.ASR_SVC_RECG_LIST_VIEW_FREE(&s.STTInfo.LVCSR_DATA_MODEL) if int(rc) < 0 { return icserror.ICSERRSTTFailInit } */ // Set Model List s.STTInfo.LVCSR_DATA_INFO.nModelId = MODEL_ID s.STTInfo.LVCSR_DATA_INFO.nKwdId = KWD_ID s.STTInfo.LVCSR_DATA_INFO.nCodecType = CODEC_TYPE s.STTInfo.LVCSR_DATA_INFO.nCharSet = LANGUAGE s.STTInfo.LVCSR_DATA_INFO.bEpdUsed = USED_EPD s.STTInfo.LVCSR_DATA_INFO.bScoreUsed = USED_SCORE rc := C.ASR_SVC_RECG_SET_LIST(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_DATA_INFO) if int(rc) < 0 { return "", icserror.ICSERRSTTFailInit } // file split sendCount = len(voicedata) / 1600 // send voice(voicedata) for j := 0; j < sendCount; j++ { buff1 := voicedata[1600*j : 1600*(j+1)] csUDataSize := C.long(s.uDataSize) csUEndSpeech := C.long(s.uEndOfSpeech) csBuff := (*C.char)(unsafe.Pointer(&buff1[0])) rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO) if int(rc) < 0 { return "", icserror.ICSERRSTTSendFail } if s.STTInfo.LVCSR_EPD_INFO == 2 { break } // time.Sleep(time.Millisecond * 100) // for real time } // Send voice // for { // bytesRead, err := file.Read(buff) // if err != nil { // if err == io.EOF { // s.uEndOfSpeech = 1 // csUEndSpeech := C.long(s.uEndOfSpeech) // rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO) // if int(rc) < 0 { // return "", icserror.ICSERRSTTSendFail // } // break // } // break // } // if bytesRead <= 0 { // s.uDataSize = 0 // } // csUDataSize := C.long(s.uDataSize) // csUEndSpeech := C.long(s.uEndOfSpeech) // csBuff := (*C.char)(unsafe.Pointer(&buff[0])) // rc := C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, csBuff, csUDataSize, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO) // if int(rc) < 0 { // return "", icserror.ICSERRSTTSendFail // } // if s.STTInfo.LVCSR_EPD_INFO == 2 { // break // } // time.Sleep(time.Millisecond * 100) // for real time // } if s.STTInfo.LVCSR_EPD_INFO != 2 { s.uEndOfSpeech = 1 csUEndSpeech := C.long(s.uEndOfSpeech) rc = C.ASR_SVC_RECG_DATA(&s.STTInfo.LVCSR_SOCK_HEAD, nil, 0, csUEndSpeech, &s.STTInfo.LVCSR_EPD_INFO) if int(rc) < 0 { return "", icserror.ICSERRSTTSendFail } } rc = C.ASR_SVC_RECG_PROC(&s.STTInfo.LVCSR_SOCK_HEAD, &s.STTInfo.LVCSR_RECOG_RESULT) if int(rc) < 0 { return "", icserror.ICSERRSTTSendFail } wg := sync.WaitGroup{} wg.Add(1) go func() { result = C.GoString(C.getResultData(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount)) // result = C.GoString(C.getResultData1(s.STTInfo.LVCSR_RECOG_RESULT.pDataResult, s.STTInfo.LVCSR_RECOG_RESULT.nCount)) strings.TrimLeft(result, "") wg.Done() }() wg.Wait() // recog memory free rc = C.ASR_SVC_RECG_PROC_FREE(&s.STTInfo.LVCSR_RECOG_RESULT) if int(rc) < 0 { return "", icserror.ICSERRSTTSendFail } return result, icserror.ICSERRSTTContinue } // result return func NewSTTSResult(result string, err *icserror.IcsError) *STTSResult { return &STTSResult{result, err} }