Spaces:

Santipab
/

demo-for-aicooking-ep2

Sleeping

App Files Files Community

demo-for-aicooking-ep2 / partii-client-process-wav-file.py

Santipab

Upload 5 files

5763f06 verified 8 months ago

raw

history blame contribute delete

9.12 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --


	import configparser
	import time
	import grpc
	import PartiiService_pb2_grpc
	import PartiiService_pb2
	import wave
	import numpy as np
	import sys

	_SERVER_IP = "127.0.0.1"
	_SERVER_PORT = 27021
	_AUDIO_INPUT = ""
	_AUDIO_SAMPLE_RATE = 16000
	_AUDIO_CODEC = "LINEAR16"
	_PACKAGE_SIZE = 1024
	_CHUNK_LEN_SECONE = 0.2
	_VERBOSE = 0

	_API_KEY = ""
	_CLIENT_ID = "web-R75PbQuF"

	_VAD_THRESHOLD = 0.5
	_NUM_CHANNELS = 1
	_DECODE_CHANEL = 0
	_MODEL_KEY = "default"
	_PROTOCOL = "partii"
	_ENABLE_TEXTNORM = "true"
	_ENABLE_PARTIAL = "true"
	_ENABLE_VAD = "true"
	_ENABLE_ENDPOINT = "true"
	_NUMBER_TARGET = "english"
	_VIEW = "sentent"

	def convMilliFormat(millis):
	time = (float(millis) * 1000);
	mil = time % 1000;
	sec = (time / 1000) % 60;
	minute = (time / (1000 * 60)) % 60;
	hour = (time / (1000 * 60 * 60)) % 24;
	return "{:02d}:{:02d}:{:02d},{:03d}".format(int(hour), int(minute), int(sec), int(mil));


	def readwave(fname):
	obj = wave.open(fname,'r')
	if _VERBOSE != "0" :
	print( "Number of channels",obj.getnchannels())
	print ( "Sample width",obj.getsampwidth())
	print ( "Frame rate.",obj.getframerate())
	print ( "Number of frames",obj.getnframes())
	print ( "parameters:",obj.getparams())

	for i in range(0, obj.getnframes(), int(_PACKAGE_SIZE)):
	waveData = obj.readframes(int(_PACKAGE_SIZE))
	yield PartiiService_pb2.AudioData(ByteChunk=waveData, Bytelen=len(waveData), Datatype=PartiiService_pb2.AudioDataType.SPEECH)
	obj.close()


	def GetAvailable():
	available = 0
	channel = grpc.insecure_channel('{}:{}'.format(_SERVER_IP, _SERVER_PORT))
	stub = PartiiService_pb2_grpc.TranscriptionStub(channel)
	metadata = (
	('apikey', str(_API_KEY)),
	('client-id', str(_CLIENT_ID)),
	('modelkey', str(_MODEL_KEY)),
	('protocol', str(_PROTOCOL))
	)
	response = stub.GetAvailable(PartiiService_pb2.EmptyMsg(), metadata=metadata)
	available = response.Available
	return available

	def run():

	retries = 1
	timeout = 3

	while retries > 0:

	if GetAvailable() > 0 :
	if _VERBOSE != "0" :
	print("Start : ",_AUDIO_INPUT )
	print('{}:{}'.format(_SERVER_IP, _SERVER_PORT))

	with grpc.insecure_channel('{}:{}'.format(_SERVER_IP, _SERVER_PORT)) as channel:
	stub = PartiiService_pb2_grpc.TranscriptionStub(channel)

	metadata = (
	('apikey', str(_API_KEY)),
	('sampling-rate', str(_AUDIO_SAMPLING_RATE)),
	('client-id', str(_CLIENT_ID)),
	('vad-threshold', str(_VAD_THRESHOLD)),
	('num-channels', str(_NUM_CHANNELS)),
	('decode-channels', str(_DECODE_CHANEL)),
	('modelkey', str(_MODEL_KEY)),
	('audio-codec', str(_AUDIO_CODEC)),
	('protocol', str(_PROTOCOL)),
	('enable-textnorm', str(_ENABLE_TEXTNORM)),
	('enable-partial', str(_ENABLE_PARTIAL)),
	('enable-vad', str(_ENABLE_VAD)),
	('enable-endpoint', str(_ENABLE_ENDPOINT)),
	('number-target', str(_NUMBER_TARGET))
	)

	if _VERBOSE != "0" :
	print(metadata)

	rawbyte_list = readwave(_AUDIO_INPUT)

	responses = stub.LiveTranscribe(rawbyte_list, metadata=metadata)

	try:
	for response in responses:
	if(response.Status == PartiiService_pb2.StatusCode.Ok):
	if(response.sentenceType == PartiiService_pb2.ResultType.PARTIAL):
	if _VERBOSE != "0" :
	print("sentenceNumber %s " %(response.sentenceNumber))
	print("transcript %s " %(response.transcript))
	print("confidence %s " %(response.confidence))
	print("startTime %s, %s " %(response.startTime, convMilliFormat(response.startTime)))
	print("endTime %s, %s " %(response.endTime, convMilliFormat(response.endTime)))

	elif(response.sentenceType == PartiiService_pb2.ResultType.RESULT):
	if _VIEW == "sentent" :
	print("sentenceNumber %s " %(response.sentenceNumber))
	print("transcript %s " %(response.transcript))
	print("confidence %s " %(response.confidence))
	print("startTime %s " %(response.startTime))
	print("endTime %s " %(response.endTime))

	if _VIEW == "word" :
	for w in response.words:
	print("\twordNumber %s " %(w.wordNumber))
	print("\tword %s " %(w.word))
	print("\tconfidence %s " %(w.confidence))
	print("\tstartTime %s " %(w.startTime))
	print("\tendTime %s " %(w.endTime))

	if _VIEW == "phone" :
	for w in response.words:
	for p in w.phones:
	print("\t\tphoneNumber %s " %(p.phoneNumber))
	print("\t\tphone %s " %(p.phone))
	print("\t\tconfidence %s " %(p.confidence))
	print("\t\tstartTime %s " %(p.startTime))
	print("\t\tendTime %s " %(p.endTime))

	elif(response.sentenceType == PartiiService_pb2.ResultType.FINISHED):

	if _VERBOSE != "0" :
	print("Last respond from server [%s] " %(response.transcript))
	print("Stop : ", _AUDIO_INPUT)

	retries = 0
	break

	elif(response.Status == PartiiService_pb2.StatusCode.Failed):
	print("ERROR [%s] " %(response.transcript))
	print('Waiting for %s seconds'%(timeout))
	time.sleep(timeout)
	retries -= 1

	except:
	print("An exception occurred")
	print('Waiting for %s seconds'%(timeout))
	time.sleep(timeout)
	retries -= 1

	else:

	print('Waiting for %s seconds'%(timeout))
	time.sleep(timeout)
	retries -= 1

	if __name__ == '__main__':
	start_time = time.time()
	config = configparser.ConfigParser()
	config.read('config.ini')

	_SERVER_IP = config['DEFAULT']['_SERVER_IP']
	_SERVER_PORT = int(config['DEFAULT']['_SERVER_PORT'])
	_AUDIO_INPUT = sys.argv[1]
	_AUDIO_SAMPLING_RATE = int(config['DEFAULT']['_AUDIO_SAMPLING_RATE'])
	_AUDIO_CODEC = config['DEFAULT']['_AUDIO_CODEC']
	_CHUNK_LEN_SECONE = config['DEFAULT']['_CHUNK_LEN_SECOND']
	_VERBOSE = config['DEFAULT']['_VERBOSE']

	_PACKAGE_SIZE = float(_CHUNK_LEN_SECONE) * float(_AUDIO_SAMPLE_RATE) * 2
	_API_KEY = sys.argv[2]
	_CLIENT_ID = config['DEFAULT']['_CLIENT_ID']

	_VAD_THRESHOLD = config['DEFAULT']['_VAD_THRESHOLD']
	_NUM_CHANNELS = config['DEFAULT']['_NUM_CHANNELS']
	_DECODE_CHANEL = config['DEFAULT']['_DECODE_CHANEL']
	_MODEL_KEY = config['DEFAULT']['_MODEL_KEY']
	_PROTOCOL = config['DEFAULT']['_PROTOCOL']
	_ENABLE_TEXTNORM = config['DEFAULT']['_ENABLE_TEXTNORM']
	_ENABLE_PARTIAL = config['DEFAULT']['_ENABLE_PARTIAL']
	_ENABLE_VAD = config['DEFAULT']['_ENABLE_VAD']
	_ENABLE_ENDPOINT = config['DEFAULT']['_ENABLE_ENDPOINT']
	_NUMBER_TARGET = config['DEFAULT']['_NUMBER_TARGET']
	_VIEW = config['DEFAULT']['_VIEW']

	run()

	end_time = time.time()
	print("Runtime of the program is ", (end_time - start_time))