Spaces:

DurreSudoku
/

Whisper_Swedish

Sleeping

App Files Files

DurreSudoku commited on Dec 10, 2023

Commit

d555d0f

1 Parent(s): c55ef1c

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -8

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import random
 from transformers import pipeline
 from difflib import SequenceMatcher
 pipe = pipeline(model="DurreSudoku/whisper-small-sv")  # change to "your-username/the-name-you-picked"
 def test_func():
@@ -18,9 +19,13 @@ def empty_string():
 def open_image():
     # Open a random image
     image_dir = os.listdir("assets")
     img_name = random.choice(image_dir)
     img = Image.open(os.path.join(r"assets", img_name))
     # print(img.filename)
     return img
@@ -28,16 +33,14 @@ def open_image():
-def transcribe(audio, img):
     # Transcribe the audio and split the string into a list of words
     transcribed_audio = pipe(audio)["text"]
     transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
     text_list = transcribed_audio.split(" ")
-    # Extract the correct answer from the image filename
-    img_name = img.filename
-    correct_answer = img_name.split("\\")[1].split(".png")[0]
     # Check for a perfect match.
     if correct_answer in text_list:
@@ -48,7 +51,7 @@ def transcribe(audio, img):
         match_ratio =  SequenceMatcher(None, text, correct_answer).ratio()
         if match_ratio > 0.8:
-            return f"Partially correct. The answer is {correct_answer}."
     # If no match is found.
     return f"Incorrect. The correct answer is {correct_answer}"
@@ -69,14 +72,14 @@ with gr.Blocks(title="Interactive Language Learning") as demo:
         with gr.Column():
             audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
         with gr.Column():
-            image = gr.Image(value=open_image(),type="pil")
     with gr.Row():
         answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
     with gr.Row():
         with gr.Column():
             process_input = gr.Button("Submit Answer")
-            process_input.click(fn=transcribe, inputs=[audio, image], outputs=answer_box)
-            # process_input.click(fn=test_func, inputs=[audio, image], outputs=answer_box)
         with gr.Column():
             refresh = gr.Button("New Image")
             refresh.click(fn=open_image, inputs=None, outputs=image)

 from transformers import pipeline
 from difflib import SequenceMatcher
+current_image = None
 pipe = pipeline(model="DurreSudoku/whisper-small-sv")  # change to "your-username/the-name-you-picked"
 def test_func():
 def open_image():
+    global current_image
     # Open a random image
     image_dir = os.listdir("assets")
     img_name = random.choice(image_dir)
+    current_image = img_name
     img = Image.open(os.path.join(r"assets", img_name))
     # print(img.filename)
     return img
+def transcribe(audio):
     # Transcribe the audio and split the string into a list of words
     transcribed_audio = pipe(audio)["text"]
     transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
     text_list = transcribed_audio.split(" ")
+    correct_answer = current_image.split(".png")[0]
     # Check for a perfect match.
     if correct_answer in text_list:
         match_ratio =  SequenceMatcher(None, text, correct_answer).ratio()
         if match_ratio > 0.8:
+            return f"Partially correct. The answer is {correct_answer}, I heard {text}."
     # If no match is found.
     return f"Incorrect. The correct answer is {correct_answer}"
         with gr.Column():
             audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
         with gr.Column():
+            image = gr.Image(value=open_image(),type="pil", interactive=False)
     with gr.Row():
         answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
     with gr.Row():
         with gr.Column():
             process_input = gr.Button("Submit Answer")
+            process_input.click(fn=transcribe, inputs=audio, outputs=answer_box)
+            # process_input.click(fn=test_func, inputs=None, outputs=answer_box)
         with gr.Column():
             refresh = gr.Button("New Image")
             refresh.click(fn=open_image, inputs=None, outputs=image)