Spaces:
Runtime error
Runtime error
File size: 1,981 Bytes
8148b06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import numpy as np
import subprocess
import soundfile as sf
from speech_recognition import AudioFile, Recognizer
greeting_list = ["γγγ£γγγγΎγ",
"γγγ£γγγ",
"γγγ£γγγγΎγγΌ",
"γγγ«γ‘γ―",
"γγ―γγγγγγΎγ",
"γγ―γγ",
"γγ―γγΌ",
"γγ―γΌ",
]
def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
"""
Helper function to read an audio file through ffmpeg.
"""
ar = f"{sampling_rate}"
ac = "1"
format_for_conversion = "f32le"
ffmpeg_command = [
"ffmpeg",
"-i",
"pipe:0",
"-ac",
ac,
"-ar",
ar,
"-f",
format_for_conversion,
"-hide_banner",
"-loglevel",
"quiet",
"pipe:1",
]
try:
ffmpeg_process = subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
except FileNotFoundError:
raise ValueError("ffmpeg was not found but is required to load audio files from filename")
output_stream = ffmpeg_process.communicate(bpayload)
out_bytes = output_stream[0]
audio = np.frombuffer(out_bytes, np.float32)
sf.write('temp.wav', audio, sampling_rate, subtype='PCM_16')
return 'temp.wav'
def stt(audio: object, language='ja') -> str:
"""Converts speech to text.
Args:
audio: record of user speech
language (str): language of text
Returns:
text (str): recognized speech of user
"""
# Create a Recognizer object
r = Recognizer()
# Open the audio file
with AudioFile(audio) as source:
# Listen for the data (load audio to memory)
audio_data = r.record(source)
# Transcribe the audio using Google's speech-to-text API
text = r.recognize_google(audio_data, language=language)
return text
|