Baghdad99 commited on
Commit
83e3ccb
·
1 Parent(s): 8fe6fd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
  import requests
 
 
 
3
 
4
  # Define the Hugging Face Inference API URLs and headers
5
  ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
@@ -14,8 +17,14 @@ def query(api_url, payload):
14
 
15
  # Define the function to translate speech
16
  def translate_speech(audio):
 
 
 
 
 
 
17
  # Use the ASR pipeline to transcribe the audio
18
- with open(audio.name, "rb") as f:
19
  data = f.read()
20
  response = requests.post(ASR_API_URL, headers=headers, data=data)
21
  output = response.json()
@@ -34,7 +43,13 @@ def translate_speech(audio):
34
  response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
35
  audio_bytes = response.content
36
 
37
- return audio_bytes
 
 
 
 
 
 
38
 
39
  # Define the Gradio interface
40
  iface = gr.Interface(
 
1
  import gradio as gr
2
  import requests
3
+ import soundfile as sf
4
+ import numpy as np
5
+ import tempfile
6
 
7
  # Define the Hugging Face Inference API URLs and headers
8
  ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
 
17
 
18
  # Define the function to translate speech
19
  def translate_speech(audio):
20
+ # audio is a tuple (np.ndarray, int), we need to save it as a file
21
+ audio_data, sample_rate = audio
22
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
23
+ sf.write(f, audio_data, sample_rate)
24
+ audio_file = f.name
25
+
26
  # Use the ASR pipeline to transcribe the audio
27
+ with open(audio_file, "rb") as f:
28
  data = f.read()
29
  response = requests.post(ASR_API_URL, headers=headers, data=data)
30
  output = response.json()
 
43
  response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
44
  audio_bytes = response.content
45
 
46
+ # Convert the audio bytes to numpy array
47
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
48
+ f.write(audio_bytes)
49
+ audio_file = f.name
50
+ audio_data, _ = sf.read(audio_file)
51
+
52
+ return audio_data
53
 
54
  # Define the Gradio interface
55
  iface = gr.Interface(