vumichien commited on
Commit
8148b06
β€’
1 Parent(s): 46d5f68

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +67 -0
utils.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import subprocess
3
+ import soundfile as sf
4
+ from speech_recognition import AudioFile, Recognizer
5
+
6
+ greeting_list = ["いらっしゃいませ",
7
+ "いらっしゃい",
8
+ "いらっしゃいませー",
9
+ "こんにけは",
10
+ "γŠγ―γ‚ˆγ†γ”γ–γ„γΎγ™",
11
+ "γŠγ―γ‚ˆγ†",
12
+ "γŠγ―γ‚ˆγƒΌ",
13
+ "γŠγ―γƒΌ",
14
+ ]
15
+
16
+
17
+ def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
18
+ """
19
+ Helper function to read an audio file through ffmpeg.
20
+ """
21
+ ar = f"{sampling_rate}"
22
+ ac = "1"
23
+ format_for_conversion = "f32le"
24
+ ffmpeg_command = [
25
+ "ffmpeg",
26
+ "-i",
27
+ "pipe:0",
28
+ "-ac",
29
+ ac,
30
+ "-ar",
31
+ ar,
32
+ "-f",
33
+ format_for_conversion,
34
+ "-hide_banner",
35
+ "-loglevel",
36
+ "quiet",
37
+ "pipe:1",
38
+ ]
39
+
40
+ try:
41
+ ffmpeg_process = subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
42
+ except FileNotFoundError:
43
+ raise ValueError("ffmpeg was not found but is required to load audio files from filename")
44
+ output_stream = ffmpeg_process.communicate(bpayload)
45
+ out_bytes = output_stream[0]
46
+ audio = np.frombuffer(out_bytes, np.float32)
47
+ sf.write('temp.wav', audio, sampling_rate, subtype='PCM_16')
48
+ return 'temp.wav'
49
+
50
+
51
+ def stt(audio: object, language='ja') -> str:
52
+ """Converts speech to text.
53
+ Args:
54
+ audio: record of user speech
55
+ language (str): language of text
56
+ Returns:
57
+ text (str): recognized speech of user
58
+ """
59
+ # Create a Recognizer object
60
+ r = Recognizer()
61
+ # Open the audio file
62
+ with AudioFile(audio) as source:
63
+ # Listen for the data (load audio to memory)
64
+ audio_data = r.record(source)
65
+ # Transcribe the audio using Google's speech-to-text API
66
+ text = r.recognize_google(audio_data, language=language)
67
+ return text