Spaces:

DurreSudoku
/

Whisper_Swedish

Sleeping

DurreSudoku commited on Mar 2

Commit

d5e5cc3

verified ·

1 Parent(s): 1ff72eb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import logging
 all_images = os.listdir("assets")
 current_image = None
-pipe = pipeline(task="automatic-speech-recognition", model="DurreSudoku/whisper-small-sv", processor="openai/whisper-small")  # change to "your-username/the-name-you-picked"
 def test_func():
     random_int = random.randint(1, 100)
@@ -55,12 +55,15 @@ def transcribe(audio_input):
     transcribed_audio = transcribed_audio.replace("?", "")
     transcribed_audio = transcribed_audio.lower()
-    text_list = transcribed_audio.split(" ")
     correct_answer = current_image.split(".png")[0]
-    # Check for a perfect match.
-    if correct_answer in text_list:
         return f"Correct! The answer is {correct_answer}."
     # Check for partial match, in case the model mistakes a letter or two.
@@ -86,7 +89,7 @@ with gr.Blocks(title="Interactive Language Learning") as demo:
     """)
     with gr.Row():
         with gr.Column():
-            audio = gr.Audio(sources="microphone", type="numpy", label="Record your answer here")
         with gr.Column():
             image = gr.Image(value=open_image(),type="pil", interactive=False)
     with gr.Row():

 all_images = os.listdir("assets")
 current_image = None
+pipe = pipeline(task="automatic-speech-recognition", model="DurreSudoku/whisper-small-sv")  # change to "your-username/the-name-you-picked"
 def test_func():
     random_int = random.randint(1, 100)
     transcribed_audio = transcribed_audio.replace("?", "")
     transcribed_audio = transcribed_audio.lower()
     correct_answer = current_image.split(".png")[0]
+    text_list = transcribed_audio.split(" ")
+    ratio = SequenceMatcher(None, transcribed_audio, correct_answer).ratio()
+    if ratio >= 0.75:
+        return f"Correct! The answer is {correct_answer}."
+    elif correct_answer in text_list:
         return f"Correct! The answer is {correct_answer}."
     # Check for partial match, in case the model mistakes a letter or two.
     """)
     with gr.Row():
         with gr.Column():
+            audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
         with gr.Column():
             image = gr.Image(value=open_image(),type="pil", interactive=False)
     with gr.Row():