Spaces:

barbaroo
/

ASR_Faroese

Running

App Files Files Community

barbaroo commited on Aug 20, 2023

Commit

502159a

1 Parent(s): 4020721

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -1,47 +1,49 @@
 import gradio as gr
 import time
-from transformers import pipeline
 import torch
 # Check if GPU is available
 use_gpu = torch.cuda.is_available()
 # Configure the pipeline to use the GPU if available
 if use_gpu:
     p = pipeline("automatic-speech-recognition",
-             model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
 else:
     p = pipeline("automatic-speech-recognition",
-             model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")
 def transcribe(audio, state="", uploaded_audio=None):
     if uploaded_audio is not None:
         audio = uploaded_audio
     if not audio:
         return state, state  # Return a meaningful message
     try:
-        time.sleep(3)
-        text = p(audio)["text"]
-        state += text + "\n"
         return state, state
     except Exception as e:
         return "An error occurred during transcription.", state  # Handle other exceptions
 gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath"),
         'state',
-        gr.inputs.Audio(label="Upload Audio File", type="filepath", source="upload")
     ],
     outputs=[
         "textbox",
         "state"
     ],
-    live=True).launch()

 import gradio as gr
 import time
 import torch
+from transformers import pipeline
+import numpy as np
 # Check if GPU is available
 use_gpu = torch.cuda.is_available()
 # Configure the pipeline to use the GPU if available
 if use_gpu:
     p = pipeline("automatic-speech-recognition",
+                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
 else:
     p = pipeline("automatic-speech-recognition",
+                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")
+chunk_size = 30  # Adjust the chunk size as needed
 def transcribe(audio, state="", uploaded_audio=None):
     if uploaded_audio is not None:
         audio = uploaded_audio
     if not audio:
         return state, state  # Return a meaningful message
     try:
+        state += "Transcribing...\n"
+        chunks = [audio[i:i + chunk_size] for i in range(0, len(audio), chunk_size)]
+        for chunk in chunks:
+            text = p(chunk)["text"]
+            state += text + "\n"
+            time.sleep(1)  # Simulate processing time for each chunk
         return state, state
     except Exception as e:
         return "An error occurred during transcription.", state  # Handle other exceptions
 gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.inputs.Audio(source="microphone", type="numpy"),
         'state',
+        gr.inputs.Audio(label="Upload Audio File", type="numpy", source="upload")
     ],
     outputs=[
         "textbox",
         "state"
     ],
+    live=True
+).launch()