j-tobias commited on
Commit
90301be
Β·
1 Parent(s): 326a994

small updates

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +58 -18
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: VoiceBot
3
- emoji: πŸ¦€
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
 
1
  ---
2
+ title: VoiceBot Game
3
+ emoji: πŸ•ΉοΈ
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
app.py CHANGED
@@ -3,20 +3,47 @@ import gradio as gr
3
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
  import numpy as np
5
  import librosa
 
6
  import json
7
  import os
8
 
9
 
10
  from huggingface_hub import InferenceClient
11
 
12
- hf_token = os.getenv("HF_Token")
13
-
14
- # def get_token():
15
- # with open("credentials.json","r") as f:
16
- # credentials = json.load(f)
17
- # return credentials['token']
18
-
19
- # hf_token = get_token()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  client = InferenceClient(
22
  "meta-llama/Meta-Llama-3-8B-Instruct",
@@ -27,6 +54,9 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
27
 
28
 
29
  def chat(audio, chat:list, asr_model:str):
 
 
 
30
  if asr_model == "openai/whisper-large-v2":
31
  transcription = transcribe_whisper_large_v2(audio)
32
  elif asr_model == "openai/whisper-tiny.en":
@@ -34,6 +64,9 @@ def chat(audio, chat:list, asr_model:str):
34
  else:
35
  raise ValueError(f"No Model found with the given choice: {asr_model}")
36
 
 
 
 
37
  chat.append({'role':'user','content':transcription})
38
  response = client.chat_completion(
39
  messages=chat,
@@ -41,7 +74,9 @@ def chat(audio, chat:list, asr_model:str):
41
  stream=False,
42
  ).choices[0].message.content
43
  chat.append({'role':'assistant','content':response})
44
- return chat
 
 
45
 
46
  def transcribe_whisper_large_v2(audio):
47
  sr, audio = audio
@@ -68,6 +103,9 @@ def transcribe_whisper_tiny_en(audio):
68
  return transcription
69
 
70
 
 
 
 
71
  def load_model(asr_model_choice:str):
72
  global processor
73
  global model
@@ -87,28 +125,30 @@ def load_model(asr_model_choice:str):
87
 
88
  with gr.Blocks() as app:
89
 
90
- gr.Markdown("# VoiceBot")
91
  gr.Markdown("Welcome to VoiceBot πŸ‘‹, here is how it works")
92
  gr.Markdown("This Bot can only be interacted with through your voice. Press record and say something, after stopping the recoding your audio will be processed directly. You have the option to choose between different models. The model you choose influences the Bot's perfomance to understand what you have said. A better perfomance also comes with longer waiting time. πŸ˜•")
 
93
  gr.Markdown("Have fun playing arround πŸŽ‰")
94
  gr.Markdown("If you have any wishes for models or a general idea, feel free to let me know πŸ™Œ")
95
 
96
  chatbot = gr.Chatbot(
97
  value=[{
98
  'role':'System',
99
- 'content':'You are a funny assitant for an Audio based Chatbot. You are having a great time with the user, just playing arround.'
100
  }],
101
  bubble_full_width=False,
102
  type="messages"
103
  )
104
 
105
- with gr.Row():
106
 
107
- audio_input = gr.Audio(
108
- sources=['microphone'],
109
- interactive=True,
110
- scale=8
111
- )
 
 
112
 
113
  with gr.Accordion(label="Settings", open=False):
114
 
@@ -120,6 +160,6 @@ with gr.Blocks() as app:
120
  asr_model_choice.change(load_model, asr_model_choice)
121
 
122
  # Event listener for when the audio recording stops
123
- audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot, asr_model_choice], outputs=chatbot)
124
 
125
  app.launch()
 
3
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
  import numpy as np
5
  import librosa
6
+ import random
7
  import json
8
  import os
9
 
10
 
11
  from huggingface_hub import InferenceClient
12
 
13
+ # hf_token = os.getenv("HF_Token")
14
+
15
+ def get_token():
16
+ with open("credentials.json","r") as f:
17
+ credentials = json.load(f)
18
+ return credentials['token']
19
+
20
+ hf_token = get_token()
21
+
22
+
23
+ words_to_guess = [
24
+ "elephant",
25
+ "rainbow",
26
+ "mountain",
27
+ "ocean",
28
+ "butterfly",
29
+ "guitar",
30
+ "volcano",
31
+ "chocolate",
32
+ "kangaroo",
33
+ "spaceship",
34
+ "whisper",
35
+ "pyramid",
36
+ "sunflower",
37
+ "unicorn",
38
+ "jungle",
39
+ "diamond",
40
+ "castle",
41
+ "galaxy",
42
+ "wizard",
43
+ "tornado"
44
+ ]
45
+
46
+ RANDOM_WORD = random.choice(words_to_guess)
47
 
48
  client = InferenceClient(
49
  "meta-llama/Meta-Llama-3-8B-Instruct",
 
54
 
55
 
56
  def chat(audio, chat:list, asr_model:str):
57
+
58
+ status = ""
59
+
60
  if asr_model == "openai/whisper-large-v2":
61
  transcription = transcribe_whisper_large_v2(audio)
62
  elif asr_model == "openai/whisper-tiny.en":
 
64
  else:
65
  raise ValueError(f"No Model found with the given choice: {asr_model}")
66
 
67
+ if RANDOM_WORD in transcription:
68
+ status = "# YOU WON !! πŸŽ‰πŸŽŠ"
69
+
70
  chat.append({'role':'user','content':transcription})
71
  response = client.chat_completion(
72
  messages=chat,
 
74
  stream=False,
75
  ).choices[0].message.content
76
  chat.append({'role':'assistant','content':response})
77
+ if RANDOM_WORD in response:
78
+ status = "# YOU LOST !! ❌❌"
79
+ return chat, status
80
 
81
  def transcribe_whisper_large_v2(audio):
82
  sr, audio = audio
 
103
  return transcription
104
 
105
 
106
+
107
+
108
+
109
  def load_model(asr_model_choice:str):
110
  global processor
111
  global model
 
125
 
126
  with gr.Blocks() as app:
127
 
128
+ gr.Markdown("# VoiceBot Game πŸ•ΉοΈ")
129
  gr.Markdown("Welcome to VoiceBot πŸ‘‹, here is how it works")
130
  gr.Markdown("This Bot can only be interacted with through your voice. Press record and say something, after stopping the recoding your audio will be processed directly. You have the option to choose between different models. The model you choose influences the Bot's perfomance to understand what you have said. A better perfomance also comes with longer waiting time. πŸ˜•")
131
+ gr.Markdown("The Game works as follows: The Bot get's an initial word, you have to guess it. You can ask questions. If the bot says the word before you, You Lose! If you say the word first you Win!")
132
  gr.Markdown("Have fun playing arround πŸŽ‰")
133
  gr.Markdown("If you have any wishes for models or a general idea, feel free to let me know πŸ™Œ")
134
 
135
  chatbot = gr.Chatbot(
136
  value=[{
137
  'role':'System',
138
+ 'content':f"The User tries to guess a word. The User asks you questions about the word and you answer those questions. Try to help the user to find the word by giving very short descriptions. THE WORD TO GUESS IS: {RANDOM_WORD}"
139
  }],
140
  bubble_full_width=False,
141
  type="messages"
142
  )
143
 
 
144
 
145
+ audio_input = gr.Audio(
146
+ sources=['microphone'],
147
+ interactive=True,
148
+ scale=8
149
+ )
150
+
151
+ status = gr.Markdown()
152
 
153
  with gr.Accordion(label="Settings", open=False):
154
 
 
160
  asr_model_choice.change(load_model, asr_model_choice)
161
 
162
  # Event listener for when the audio recording stops
163
+ audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot, asr_model_choice], outputs=[chatbot, status])
164
 
165
  app.launch()