Spaces:
Sleeping
Sleeping
Commit
·
d555d0f
1
Parent(s):
c55ef1c
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import random
|
|
5 |
from transformers import pipeline
|
6 |
from difflib import SequenceMatcher
|
7 |
|
|
|
8 |
pipe = pipeline(model="DurreSudoku/whisper-small-sv") # change to "your-username/the-name-you-picked"
|
9 |
|
10 |
def test_func():
|
@@ -18,9 +19,13 @@ def empty_string():
|
|
18 |
|
19 |
|
20 |
def open_image():
|
|
|
21 |
# Open a random image
|
22 |
image_dir = os.listdir("assets")
|
23 |
img_name = random.choice(image_dir)
|
|
|
|
|
|
|
24 |
img = Image.open(os.path.join(r"assets", img_name))
|
25 |
# print(img.filename)
|
26 |
return img
|
@@ -28,16 +33,14 @@ def open_image():
|
|
28 |
|
29 |
|
30 |
|
31 |
-
def transcribe(audio
|
32 |
# Transcribe the audio and split the string into a list of words
|
33 |
transcribed_audio = pipe(audio)["text"]
|
34 |
transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
|
35 |
|
36 |
text_list = transcribed_audio.split(" ")
|
37 |
|
38 |
-
|
39 |
-
img_name = img.filename
|
40 |
-
correct_answer = img_name.split("\\")[1].split(".png")[0]
|
41 |
|
42 |
# Check for a perfect match.
|
43 |
if correct_answer in text_list:
|
@@ -48,7 +51,7 @@ def transcribe(audio, img):
|
|
48 |
match_ratio = SequenceMatcher(None, text, correct_answer).ratio()
|
49 |
|
50 |
if match_ratio > 0.8:
|
51 |
-
return f"Partially correct. The answer is {correct_answer}."
|
52 |
# If no match is found.
|
53 |
return f"Incorrect. The correct answer is {correct_answer}"
|
54 |
|
@@ -69,14 +72,14 @@ with gr.Blocks(title="Interactive Language Learning") as demo:
|
|
69 |
with gr.Column():
|
70 |
audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
|
71 |
with gr.Column():
|
72 |
-
image = gr.Image(value=open_image(),type="pil")
|
73 |
with gr.Row():
|
74 |
answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
|
75 |
with gr.Row():
|
76 |
with gr.Column():
|
77 |
process_input = gr.Button("Submit Answer")
|
78 |
-
process_input.click(fn=transcribe, inputs=
|
79 |
-
# process_input.click(fn=test_func, inputs=
|
80 |
with gr.Column():
|
81 |
refresh = gr.Button("New Image")
|
82 |
refresh.click(fn=open_image, inputs=None, outputs=image)
|
|
|
5 |
from transformers import pipeline
|
6 |
from difflib import SequenceMatcher
|
7 |
|
8 |
+
current_image = None
|
9 |
pipe = pipeline(model="DurreSudoku/whisper-small-sv") # change to "your-username/the-name-you-picked"
|
10 |
|
11 |
def test_func():
|
|
|
19 |
|
20 |
|
21 |
def open_image():
|
22 |
+
global current_image
|
23 |
# Open a random image
|
24 |
image_dir = os.listdir("assets")
|
25 |
img_name = random.choice(image_dir)
|
26 |
+
|
27 |
+
current_image = img_name
|
28 |
+
|
29 |
img = Image.open(os.path.join(r"assets", img_name))
|
30 |
# print(img.filename)
|
31 |
return img
|
|
|
33 |
|
34 |
|
35 |
|
36 |
+
def transcribe(audio):
|
37 |
# Transcribe the audio and split the string into a list of words
|
38 |
transcribed_audio = pipe(audio)["text"]
|
39 |
transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
|
40 |
|
41 |
text_list = transcribed_audio.split(" ")
|
42 |
|
43 |
+
correct_answer = current_image.split(".png")[0]
|
|
|
|
|
44 |
|
45 |
# Check for a perfect match.
|
46 |
if correct_answer in text_list:
|
|
|
51 |
match_ratio = SequenceMatcher(None, text, correct_answer).ratio()
|
52 |
|
53 |
if match_ratio > 0.8:
|
54 |
+
return f"Partially correct. The answer is {correct_answer}, I heard {text}."
|
55 |
# If no match is found.
|
56 |
return f"Incorrect. The correct answer is {correct_answer}"
|
57 |
|
|
|
72 |
with gr.Column():
|
73 |
audio = gr.Audio(sources="microphone", type="filepath", label="Record your answer here")
|
74 |
with gr.Column():
|
75 |
+
image = gr.Image(value=open_image(),type="pil", interactive=False)
|
76 |
with gr.Row():
|
77 |
answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
|
78 |
with gr.Row():
|
79 |
with gr.Column():
|
80 |
process_input = gr.Button("Submit Answer")
|
81 |
+
process_input.click(fn=transcribe, inputs=audio, outputs=answer_box)
|
82 |
+
# process_input.click(fn=test_func, inputs=None, outputs=answer_box)
|
83 |
with gr.Column():
|
84 |
refresh = gr.Button("New Image")
|
85 |
refresh.click(fn=open_image, inputs=None, outputs=image)
|