demo-for-aicooking-ep2 / partii-client-process-wav-file.py
Santipab's picture
Upload 5 files
5763f06 verified
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import configparser
import time
import grpc
import PartiiService_pb2_grpc
import PartiiService_pb2
import wave
import numpy as np
import sys
_SERVER_IP = "127.0.0.1"
_SERVER_PORT = 27021
_AUDIO_INPUT = ""
_AUDIO_SAMPLE_RATE = 16000
_AUDIO_CODEC = "LINEAR16"
_PACKAGE_SIZE = 1024
_CHUNK_LEN_SECONE = 0.2
_VERBOSE = 0
_API_KEY = ""
_CLIENT_ID = "web-R75PbQuF"
_VAD_THRESHOLD = 0.5
_NUM_CHANNELS = 1
_DECODE_CHANEL = 0
_MODEL_KEY = "default"
_PROTOCOL = "partii"
_ENABLE_TEXTNORM = "true"
_ENABLE_PARTIAL = "true"
_ENABLE_VAD = "true"
_ENABLE_ENDPOINT = "true"
_NUMBER_TARGET = "english"
_VIEW = "sentent"
def convMilliFormat(millis):
time = (float(millis) * 1000);
mil = time % 1000;
sec = (time / 1000) % 60;
minute = (time / (1000 * 60)) % 60;
hour = (time / (1000 * 60 * 60)) % 24;
return "{:02d}:{:02d}:{:02d},{:03d}".format(int(hour), int(minute), int(sec), int(mil));
def readwave(fname):
obj = wave.open(fname,'r')
if _VERBOSE != "0" :
print( "Number of channels",obj.getnchannels())
print ( "Sample width",obj.getsampwidth())
print ( "Frame rate.",obj.getframerate())
print ( "Number of frames",obj.getnframes())
print ( "parameters:",obj.getparams())
for i in range(0, obj.getnframes(), int(_PACKAGE_SIZE)):
waveData = obj.readframes(int(_PACKAGE_SIZE))
yield PartiiService_pb2.AudioData(ByteChunk=waveData, Bytelen=len(waveData), Datatype=PartiiService_pb2.AudioDataType.SPEECH)
obj.close()
def GetAvailable():
available = 0
channel = grpc.insecure_channel('{}:{}'.format(_SERVER_IP, _SERVER_PORT))
stub = PartiiService_pb2_grpc.TranscriptionStub(channel)
metadata = (
('apikey', str(_API_KEY)),
('client-id', str(_CLIENT_ID)),
('modelkey', str(_MODEL_KEY)),
('protocol', str(_PROTOCOL))
)
response = stub.GetAvailable(PartiiService_pb2.EmptyMsg(), metadata=metadata)
available = response.Available
return available
def run():
retries = 1
timeout = 3
while retries > 0:
if GetAvailable() > 0 :
if _VERBOSE != "0" :
print("Start : ",_AUDIO_INPUT )
print('{}:{}'.format(_SERVER_IP, _SERVER_PORT))
with grpc.insecure_channel('{}:{}'.format(_SERVER_IP, _SERVER_PORT)) as channel:
stub = PartiiService_pb2_grpc.TranscriptionStub(channel)
metadata = (
('apikey', str(_API_KEY)),
('sampling-rate', str(_AUDIO_SAMPLING_RATE)),
('client-id', str(_CLIENT_ID)),
('vad-threshold', str(_VAD_THRESHOLD)),
('num-channels', str(_NUM_CHANNELS)),
('decode-channels', str(_DECODE_CHANEL)),
('modelkey', str(_MODEL_KEY)),
('audio-codec', str(_AUDIO_CODEC)),
('protocol', str(_PROTOCOL)),
('enable-textnorm', str(_ENABLE_TEXTNORM)),
('enable-partial', str(_ENABLE_PARTIAL)),
('enable-vad', str(_ENABLE_VAD)),
('enable-endpoint', str(_ENABLE_ENDPOINT)),
('number-target', str(_NUMBER_TARGET))
)
if _VERBOSE != "0" :
print(metadata)
rawbyte_list = readwave(_AUDIO_INPUT)
responses = stub.LiveTranscribe(rawbyte_list, metadata=metadata)
try:
for response in responses:
if(response.Status == PartiiService_pb2.StatusCode.Ok):
if(response.sentenceType == PartiiService_pb2.ResultType.PARTIAL):
if _VERBOSE != "0" :
print("sentenceNumber %s " %(response.sentenceNumber))
print("transcript %s " %(response.transcript))
print("confidence %s " %(response.confidence))
print("startTime %s, %s " %(response.startTime, convMilliFormat(response.startTime)))
print("endTime %s, %s " %(response.endTime, convMilliFormat(response.endTime)))
elif(response.sentenceType == PartiiService_pb2.ResultType.RESULT):
if _VIEW == "sentent" :
print("sentenceNumber %s " %(response.sentenceNumber))
print("transcript %s " %(response.transcript))
print("confidence %s " %(response.confidence))
print("startTime %s " %(response.startTime))
print("endTime %s " %(response.endTime))
if _VIEW == "word" :
for w in response.words:
print("\twordNumber %s " %(w.wordNumber))
print("\tword %s " %(w.word))
print("\tconfidence %s " %(w.confidence))
print("\tstartTime %s " %(w.startTime))
print("\tendTime %s " %(w.endTime))
if _VIEW == "phone" :
for w in response.words:
for p in w.phones:
print("\t\tphoneNumber %s " %(p.phoneNumber))
print("\t\tphone %s " %(p.phone))
print("\t\tconfidence %s " %(p.confidence))
print("\t\tstartTime %s " %(p.startTime))
print("\t\tendTime %s " %(p.endTime))
elif(response.sentenceType == PartiiService_pb2.ResultType.FINISHED):
if _VERBOSE != "0" :
print("Last respond from server [%s] " %(response.transcript))
print("Stop : ", _AUDIO_INPUT)
retries = 0
break
elif(response.Status == PartiiService_pb2.StatusCode.Failed):
print("ERROR [%s] " %(response.transcript))
print('Waiting for %s seconds'%(timeout))
time.sleep(timeout)
retries -= 1
except:
print("An exception occurred")
print('Waiting for %s seconds'%(timeout))
time.sleep(timeout)
retries -= 1
else:
print('Waiting for %s seconds'%(timeout))
time.sleep(timeout)
retries -= 1
if __name__ == '__main__':
start_time = time.time()
config = configparser.ConfigParser()
config.read('config.ini')
_SERVER_IP = config['DEFAULT']['_SERVER_IP']
_SERVER_PORT = int(config['DEFAULT']['_SERVER_PORT'])
_AUDIO_INPUT = sys.argv[1]
_AUDIO_SAMPLING_RATE = int(config['DEFAULT']['_AUDIO_SAMPLING_RATE'])
_AUDIO_CODEC = config['DEFAULT']['_AUDIO_CODEC']
_CHUNK_LEN_SECONE = config['DEFAULT']['_CHUNK_LEN_SECOND']
_VERBOSE = config['DEFAULT']['_VERBOSE']
_PACKAGE_SIZE = float(_CHUNK_LEN_SECONE) * float(_AUDIO_SAMPLE_RATE) * 2
_API_KEY = sys.argv[2]
_CLIENT_ID = config['DEFAULT']['_CLIENT_ID']
_VAD_THRESHOLD = config['DEFAULT']['_VAD_THRESHOLD']
_NUM_CHANNELS = config['DEFAULT']['_NUM_CHANNELS']
_DECODE_CHANEL = config['DEFAULT']['_DECODE_CHANEL']
_MODEL_KEY = config['DEFAULT']['_MODEL_KEY']
_PROTOCOL = config['DEFAULT']['_PROTOCOL']
_ENABLE_TEXTNORM = config['DEFAULT']['_ENABLE_TEXTNORM']
_ENABLE_PARTIAL = config['DEFAULT']['_ENABLE_PARTIAL']
_ENABLE_VAD = config['DEFAULT']['_ENABLE_VAD']
_ENABLE_ENDPOINT = config['DEFAULT']['_ENABLE_ENDPOINT']
_NUMBER_TARGET = config['DEFAULT']['_NUMBER_TARGET']
_VIEW = config['DEFAULT']['_VIEW']
run()
end_time = time.time()
print("Runtime of the program is ", (end_time - start_time))