Spaces:

DurreSudoku
/

Whisper_Swedish

Sleeping

App Files Files

DurreSudoku commited on Dec 10, 2023

Commit

0f9125a

1 Parent(s): 2fd8044

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -13

app.py CHANGED Viewed

@@ -1,18 +1,85 @@
-from transformers import pipeline
 import gradio as gr
-pipe = pipeline(model="DurreSudoku/whisper-small-hi")  # change to "your-username/the-name-you-picked"
-def transcribe(audio):
-    text = pipe(audio)["text"]
-    return text
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(sources="microphone", type="filepath"),
-    outputs="text",
-    title="Whisper Small Swedish",
-    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
-)
-iface.launch()

 import gradio as gr
+from PIL import Image
+import os
+import random
+from transformers import pipeline
+from difflib import SequenceMatcher
+pipe = pipeline(model="DurreSudoku/whisper-small-sv")  # change to "your-username/the-name-you-picked"
+def test_func():
+    random_int = random.randint(1, 100)
+    string = "Test successful" + str(random_int)
+    return string
+def empty_string():
+    return ""
+def open_image():
+    # Open a random image
+    image_dir = os.listdir("assets")
+    img_name = random.choice(image_dir)
+    img = Image.open(os.path.join(r"assets", img_name))
+    # print(img.filename)
+    return img
+def transcribe(audio, img):
+    # Transcribe the audio and split the string into a list of words
+    transcribed_audio = pipe(audio)["text"]
+    transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
+    text_list = transcribed_audio.split(" ")
+    # Extract the correct answer from the image filename
+    img_name = img.filename
+    correct_answer = img_name.split("\\")[1].split(".png")[0]
+    # Check for a perfect match.
+    if correct_answer in text_list:
+        return f"Correct! The answer is {correct_answer}."
+    # Check for partial match, in case the model mistakes a letter or two.
+    for text in text_list:
+        match_ratio =  SequenceMatcher(None, text, correct_answer).ratio()
+        if match_ratio > 0.8:
+            return f"Partially correct. The answer is {correct_answer}."
+    # If no match is found.
+    return f"Incorrect. The correct answer is {correct_answer}"
+with gr.Blocks(title="Interactive Language Learning") as demo:
+    with gr.Row():
+        gr.Markdown(
+    """
+    # Interactive Language Learning Prototype
+    Hello!
+    This is a prototype app that is meant to help you learn some basic Swedish words. Observe the image,
+    record a one word answer and press the "Submit Answer" button! For a new image, press the "New Image" button.
+    """)
+    with gr.Row():
+        with gr.Column():
+            audio = gr.Audio(source="microphone", type="filepath", label="Record your answer here")
+        with gr.Column():
+            image = gr.Image(value=open_image(),type="pil")
+    with gr.Row():
+        answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
+    with gr.Row():
+        with gr.Column():
+            process_input = gr.Button("Submit Answer")
+            process_input.click(fn=transcribe, inputs=[audio, image], outputs=answer_box)
+            # process_input.click(fn=test_func, inputs=[audio, image], outputs=answer_box)
+        with gr.Column():
+            refresh = gr.Button("New Image")
+            refresh.click(fn=open_image, inputs=None, outputs=image)
+            refresh.click(fn=empty_string, inputs=None, outputs=answer_box)
+demo.launch(debug=True)