Spaces:
Runtime error
Runtime error
import numpy as np | |
import subprocess | |
import soundfile as sf | |
from speech_recognition import AudioFile, Recognizer | |
greeting_list = ["γγγ£γγγγΎγ", | |
"γγγ£γγγ", | |
"γγγ£γγγγΎγγΌ", | |
"γγγ«γ‘γ―", | |
"γγ―γγγγγγΎγ", | |
"γγ―γγ", | |
"γγ―γγΌ", | |
"γγ―γΌ", | |
] | |
def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array: | |
""" | |
Helper function to read an audio file through ffmpeg. | |
""" | |
ar = f"{sampling_rate}" | |
ac = "1" | |
format_for_conversion = "f32le" | |
ffmpeg_command = [ | |
"ffmpeg", | |
"-i", | |
"pipe:0", | |
"-ac", | |
ac, | |
"-ar", | |
ar, | |
"-f", | |
format_for_conversion, | |
"-hide_banner", | |
"-loglevel", | |
"quiet", | |
"pipe:1", | |
] | |
try: | |
ffmpeg_process = subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |
except FileNotFoundError: | |
raise ValueError("ffmpeg was not found but is required to load audio files from filename") | |
output_stream = ffmpeg_process.communicate(bpayload) | |
out_bytes = output_stream[0] | |
audio = np.frombuffer(out_bytes, np.float32) | |
sf.write('temp.wav', audio, sampling_rate, subtype='PCM_16') | |
return 'temp.wav' | |
def stt(audio: object, language='ja') -> str: | |
"""Converts speech to text. | |
Args: | |
audio: record of user speech | |
language (str): language of text | |
Returns: | |
text (str): recognized speech of user | |
""" | |
# Create a Recognizer object | |
r = Recognizer() | |
# Open the audio file | |
with AudioFile(audio) as source: | |
# Listen for the data (load audio to memory) | |
audio_data = r.record(source) | |
# Transcribe the audio using Google's speech-to-text API | |
text = r.recognize_google(audio_data, language=language) | |
return text | |