Spaces:
Sleeping
Sleeping
File size: 3,290 Bytes
87274a6 0f9125a 1e7d3ee d555d0f 0f9125a 1e7d3ee d555d0f 1e7d3ee d555d0f 0f9125a d555d0f 0f9125a 2236a27 1e7d3ee 2236a27 1e7d3ee c58f484 a5ceb81 0f9125a d555d0f 0f9125a 09dea42 0f9125a 09dea42 87274a6 0f9125a c55ef1c 0f9125a d555d0f 0f9125a a5ceb81 0f9125a d555d0f 0f9125a 1e7d3ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
from PIL import Image
import os
import random
from transformers import pipeline
from difflib import SequenceMatcher
all_images = os.listdir("assets")
current_image = None
pipe = pipeline(model="DurreSudoku/whisper-small-sv") # change to "your-username/the-name-you-picked"
def test_func():
random_int = random.randint(1, 100)
string = "Test successful" + str(random_int)
return string
def empty_string():
return ""
def open_image():
# Open a random image
global all_images
global current_image
if len(all_images) == 0:
all_images = os.listdir("assets")
img_name = random.choice(all_images)
all_images.remove(img_name)
current_image = img_name
img = Image.open(os.path.join(r"assets", img_name))
# print(img.filename)
return img
def transcribe(audio):
# Transcribe the audio and split the string into a list of words
try:
transcribed_audio = pipe(audio)["text"]
except:
return "Encountered an error. Are you sure that you recorded audio before submitting?"
transcribed_audio = transcribed_audio.replace(",", "")
transcribed_audio = transcribed_audio.replace(".", "")
transcribed_audio = transcribed_audio.replace("!", "")
transcribed_audio = transcribed_audio.replace("?", "")
transcribed_audio = transcribed_audio.lower()
text_list = transcribed_audio.split(" ")
correct_answer = current_image.split(".png")[0]
# Check for a perfect match.
if correct_answer in text_list:
return f"Correct! The answer is {correct_answer}."
# Check for partial match, in case the model mistakes a letter or two.
for text in text_list:
match_ratio = SequenceMatcher(None, text, correct_answer).ratio()
if match_ratio >= 0.8:
return f"The answer is {correct_answer}. I heard {text}."
# If no match is found.
return f"The correct answer is {correct_answer}. I heard {transcribed_audio}."
with gr.Blocks(title="Interactive Language Learning") as demo:
with gr.Row():
gr.Markdown(
"""
# Interactive Language Learning Prototype
Hello!
This is a prototype app that is meant to help you learn some basic Swedish words. Observe the image,
record a one word answer and press the "Submit Answer" button! For a new image, press the "New Image" button.
""")
with gr.Row():
with gr.Column():
audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
with gr.Column():
image = gr.Image(value=open_image(),type="pil", interactive=False)
with gr.Row():
answer_box = gr.Text(label="Answer appears here", interactive=False)
with gr.Row():
with gr.Column():
process_input = gr.Button("Submit Answer")
process_input.click(fn=transcribe, inputs=audio, outputs=answer_box)
# process_input.click(fn=test_func, inputs=None, outputs=answer_box)
with gr.Column():
refresh = gr.Button("New Image")
refresh.click(fn=open_image, inputs=None, outputs=image)
refresh.click(fn=empty_string, inputs=None, outputs=answer_box)
demo.launch(debug=True) |