DurreSudoku commited on
Commit
0f9125a
·
1 Parent(s): 2fd8044

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -13
app.py CHANGED
@@ -1,18 +1,85 @@
1
- from transformers import pipeline
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- pipe = pipeline(model="DurreSudoku/whisper-small-hi") # change to "your-username/the-name-you-picked"
5
 
6
- def transcribe(audio):
7
- text = pipe(audio)["text"]
8
- return text
9
 
10
- iface = gr.Interface(
11
- fn=transcribe,
12
- inputs=gr.Audio(sources="microphone", type="filepath"),
13
- outputs="text",
14
- title="Whisper Small Swedish",
15
- description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- iface.launch()
 
 
1
  import gradio as gr
2
+ from PIL import Image
3
+ import os
4
+ import random
5
+ from transformers import pipeline
6
+ from difflib import SequenceMatcher
7
+
8
+ pipe = pipeline(model="DurreSudoku/whisper-small-sv") # change to "your-username/the-name-you-picked"
9
+
10
+ def test_func():
11
+ random_int = random.randint(1, 100)
12
+ string = "Test successful" + str(random_int)
13
+ return string
14
+
15
+
16
+ def empty_string():
17
+ return ""
18
+
19
+
20
+ def open_image():
21
+ # Open a random image
22
+ image_dir = os.listdir("assets")
23
+ img_name = random.choice(image_dir)
24
+ img = Image.open(os.path.join(r"assets", img_name))
25
+ # print(img.filename)
26
+ return img
27
+
28
+
29
+
30
+
31
+ def transcribe(audio, img):
32
+ # Transcribe the audio and split the string into a list of words
33
+ transcribed_audio = pipe(audio)["text"]
34
+ transcribed_audio.replace(",", "").replace(".", "").replace("!", "")
35
+
36
+ text_list = transcribed_audio.split(" ")
37
+
38
+ # Extract the correct answer from the image filename
39
+ img_name = img.filename
40
+ correct_answer = img_name.split("\\")[1].split(".png")[0]
41
+
42
+ # Check for a perfect match.
43
+ if correct_answer in text_list:
44
+ return f"Correct! The answer is {correct_answer}."
45
+
46
+ # Check for partial match, in case the model mistakes a letter or two.
47
+ for text in text_list:
48
+ match_ratio = SequenceMatcher(None, text, correct_answer).ratio()
49
+
50
+ if match_ratio > 0.8:
51
+ return f"Partially correct. The answer is {correct_answer}."
52
+ # If no match is found.
53
+ return f"Incorrect. The correct answer is {correct_answer}"
54
 
 
55
 
 
 
 
56
 
57
+ with gr.Blocks(title="Interactive Language Learning") as demo:
58
+ with gr.Row():
59
+ gr.Markdown(
60
+ """
61
+ # Interactive Language Learning Prototype
62
+
63
+ Hello!
64
+
65
+ This is a prototype app that is meant to help you learn some basic Swedish words. Observe the image,
66
+ record a one word answer and press the "Submit Answer" button! For a new image, press the "New Image" button.
67
+ """)
68
+ with gr.Row():
69
+ with gr.Column():
70
+ audio = gr.Audio(source="microphone", type="filepath", label="Record your answer here")
71
+ with gr.Column():
72
+ image = gr.Image(value=open_image(),type="pil")
73
+ with gr.Row():
74
+ answer_box = gr.Text(placeholder="Answer appears here", interactive=False)
75
+ with gr.Row():
76
+ with gr.Column():
77
+ process_input = gr.Button("Submit Answer")
78
+ process_input.click(fn=transcribe, inputs=[audio, image], outputs=answer_box)
79
+ # process_input.click(fn=test_func, inputs=[audio, image], outputs=answer_box)
80
+ with gr.Column():
81
+ refresh = gr.Button("New Image")
82
+ refresh.click(fn=open_image, inputs=None, outputs=image)
83
+ refresh.click(fn=empty_string, inputs=None, outputs=answer_box)
84
+ demo.launch(debug=True)
85