Spaces:

Antoniskaraolis
/

AI_Application

Sleeping

Antoniskaraolis commited on Nov 28, 2023

Commit

c227f48

1 Parent(s): 3ecb0fd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,9 +5,14 @@ import librosa
 # Initialize the model
 asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")
-def transcribe(audio_file):
     # Load the audio file with librosa
-    data, samplerate = librosa.load(audio_file.name, sr=None)
     # Pass the audio data to the model for transcription
     transcription = asr_model(data, sampling_rate=samplerate)
     return transcription["text"]
@@ -15,8 +20,8 @@ def transcribe(audio_file):
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.inputs.Audio(source="microphone", type="file"),
     outputs="text"
 )
-iface.launch()

 # Initialize the model
 asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+def transcribe(audio_data):
+    # librosa expects a file path, but gradio passes a tuple (file name, file object)
+    # If the audio comes from a microphone, it's in the second position of the tuple
+    if isinstance(audio_data, tuple):
+        audio_data = audio_data[1]
     # Load the audio file with librosa
+    data, samplerate = librosa.load(audio_data, sr=None)
     # Pass the audio data to the model for transcription
     transcription = asr_model(data, sampling_rate=samplerate)
     return transcription["text"]
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="file", label="Record or Upload Audio"),
     outputs="text"
 )
+iface.launch()