Spaces:
Sleeping
Sleeping
j-tobias
commited on
Commit
Β·
90301be
1
Parent(s):
326a994
small updates
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title: VoiceBot
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
+
title: VoiceBot Game
|
3 |
+
emoji: πΉοΈ
|
4 |
colorFrom: yellow
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
app.py
CHANGED
@@ -3,20 +3,47 @@ import gradio as gr
|
|
3 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
4 |
import numpy as np
|
5 |
import librosa
|
|
|
6 |
import json
|
7 |
import os
|
8 |
|
9 |
|
10 |
from huggingface_hub import InferenceClient
|
11 |
|
12 |
-
hf_token = os.getenv("HF_Token")
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
client = InferenceClient(
|
22 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
@@ -27,6 +54,9 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
|
|
27 |
|
28 |
|
29 |
def chat(audio, chat:list, asr_model:str):
|
|
|
|
|
|
|
30 |
if asr_model == "openai/whisper-large-v2":
|
31 |
transcription = transcribe_whisper_large_v2(audio)
|
32 |
elif asr_model == "openai/whisper-tiny.en":
|
@@ -34,6 +64,9 @@ def chat(audio, chat:list, asr_model:str):
|
|
34 |
else:
|
35 |
raise ValueError(f"No Model found with the given choice: {asr_model}")
|
36 |
|
|
|
|
|
|
|
37 |
chat.append({'role':'user','content':transcription})
|
38 |
response = client.chat_completion(
|
39 |
messages=chat,
|
@@ -41,7 +74,9 @@ def chat(audio, chat:list, asr_model:str):
|
|
41 |
stream=False,
|
42 |
).choices[0].message.content
|
43 |
chat.append({'role':'assistant','content':response})
|
44 |
-
|
|
|
|
|
45 |
|
46 |
def transcribe_whisper_large_v2(audio):
|
47 |
sr, audio = audio
|
@@ -68,6 +103,9 @@ def transcribe_whisper_tiny_en(audio):
|
|
68 |
return transcription
|
69 |
|
70 |
|
|
|
|
|
|
|
71 |
def load_model(asr_model_choice:str):
|
72 |
global processor
|
73 |
global model
|
@@ -87,28 +125,30 @@ def load_model(asr_model_choice:str):
|
|
87 |
|
88 |
with gr.Blocks() as app:
|
89 |
|
90 |
-
gr.Markdown("# VoiceBot")
|
91 |
gr.Markdown("Welcome to VoiceBot π, here is how it works")
|
92 |
gr.Markdown("This Bot can only be interacted with through your voice. Press record and say something, after stopping the recoding your audio will be processed directly. You have the option to choose between different models. The model you choose influences the Bot's perfomance to understand what you have said. A better perfomance also comes with longer waiting time. π")
|
|
|
93 |
gr.Markdown("Have fun playing arround π")
|
94 |
gr.Markdown("If you have any wishes for models or a general idea, feel free to let me know π")
|
95 |
|
96 |
chatbot = gr.Chatbot(
|
97 |
value=[{
|
98 |
'role':'System',
|
99 |
-
'content':
|
100 |
}],
|
101 |
bubble_full_width=False,
|
102 |
type="messages"
|
103 |
)
|
104 |
|
105 |
-
with gr.Row():
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
112 |
|
113 |
with gr.Accordion(label="Settings", open=False):
|
114 |
|
@@ -120,6 +160,6 @@ with gr.Blocks() as app:
|
|
120 |
asr_model_choice.change(load_model, asr_model_choice)
|
121 |
|
122 |
# Event listener for when the audio recording stops
|
123 |
-
audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot, asr_model_choice], outputs=chatbot)
|
124 |
|
125 |
app.launch()
|
|
|
3 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
4 |
import numpy as np
|
5 |
import librosa
|
6 |
+
import random
|
7 |
import json
|
8 |
import os
|
9 |
|
10 |
|
11 |
from huggingface_hub import InferenceClient
|
12 |
|
13 |
+
# hf_token = os.getenv("HF_Token")
|
14 |
+
|
15 |
+
def get_token():
|
16 |
+
with open("credentials.json","r") as f:
|
17 |
+
credentials = json.load(f)
|
18 |
+
return credentials['token']
|
19 |
+
|
20 |
+
hf_token = get_token()
|
21 |
+
|
22 |
+
|
23 |
+
words_to_guess = [
|
24 |
+
"elephant",
|
25 |
+
"rainbow",
|
26 |
+
"mountain",
|
27 |
+
"ocean",
|
28 |
+
"butterfly",
|
29 |
+
"guitar",
|
30 |
+
"volcano",
|
31 |
+
"chocolate",
|
32 |
+
"kangaroo",
|
33 |
+
"spaceship",
|
34 |
+
"whisper",
|
35 |
+
"pyramid",
|
36 |
+
"sunflower",
|
37 |
+
"unicorn",
|
38 |
+
"jungle",
|
39 |
+
"diamond",
|
40 |
+
"castle",
|
41 |
+
"galaxy",
|
42 |
+
"wizard",
|
43 |
+
"tornado"
|
44 |
+
]
|
45 |
+
|
46 |
+
RANDOM_WORD = random.choice(words_to_guess)
|
47 |
|
48 |
client = InferenceClient(
|
49 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
|
|
54 |
|
55 |
|
56 |
def chat(audio, chat:list, asr_model:str):
|
57 |
+
|
58 |
+
status = ""
|
59 |
+
|
60 |
if asr_model == "openai/whisper-large-v2":
|
61 |
transcription = transcribe_whisper_large_v2(audio)
|
62 |
elif asr_model == "openai/whisper-tiny.en":
|
|
|
64 |
else:
|
65 |
raise ValueError(f"No Model found with the given choice: {asr_model}")
|
66 |
|
67 |
+
if RANDOM_WORD in transcription:
|
68 |
+
status = "# YOU WON !! ππ"
|
69 |
+
|
70 |
chat.append({'role':'user','content':transcription})
|
71 |
response = client.chat_completion(
|
72 |
messages=chat,
|
|
|
74 |
stream=False,
|
75 |
).choices[0].message.content
|
76 |
chat.append({'role':'assistant','content':response})
|
77 |
+
if RANDOM_WORD in response:
|
78 |
+
status = "# YOU LOST !! ββ"
|
79 |
+
return chat, status
|
80 |
|
81 |
def transcribe_whisper_large_v2(audio):
|
82 |
sr, audio = audio
|
|
|
103 |
return transcription
|
104 |
|
105 |
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
def load_model(asr_model_choice:str):
|
110 |
global processor
|
111 |
global model
|
|
|
125 |
|
126 |
with gr.Blocks() as app:
|
127 |
|
128 |
+
gr.Markdown("# VoiceBot Game πΉοΈ")
|
129 |
gr.Markdown("Welcome to VoiceBot π, here is how it works")
|
130 |
gr.Markdown("This Bot can only be interacted with through your voice. Press record and say something, after stopping the recoding your audio will be processed directly. You have the option to choose between different models. The model you choose influences the Bot's perfomance to understand what you have said. A better perfomance also comes with longer waiting time. π")
|
131 |
+
gr.Markdown("The Game works as follows: The Bot get's an initial word, you have to guess it. You can ask questions. If the bot says the word before you, You Lose! If you say the word first you Win!")
|
132 |
gr.Markdown("Have fun playing arround π")
|
133 |
gr.Markdown("If you have any wishes for models or a general idea, feel free to let me know π")
|
134 |
|
135 |
chatbot = gr.Chatbot(
|
136 |
value=[{
|
137 |
'role':'System',
|
138 |
+
'content':f"The User tries to guess a word. The User asks you questions about the word and you answer those questions. Try to help the user to find the word by giving very short descriptions. THE WORD TO GUESS IS: {RANDOM_WORD}"
|
139 |
}],
|
140 |
bubble_full_width=False,
|
141 |
type="messages"
|
142 |
)
|
143 |
|
|
|
144 |
|
145 |
+
audio_input = gr.Audio(
|
146 |
+
sources=['microphone'],
|
147 |
+
interactive=True,
|
148 |
+
scale=8
|
149 |
+
)
|
150 |
+
|
151 |
+
status = gr.Markdown()
|
152 |
|
153 |
with gr.Accordion(label="Settings", open=False):
|
154 |
|
|
|
160 |
asr_model_choice.change(load_model, asr_model_choice)
|
161 |
|
162 |
# Event listener for when the audio recording stops
|
163 |
+
audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot, asr_model_choice], outputs=[chatbot, status])
|
164 |
|
165 |
app.launch()
|