gospacedev commited on
Commit
ed73c38
·
verified ·
1 Parent(s): ee3af02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -23
app.py CHANGED
@@ -6,17 +6,13 @@ from gtts import gTTS
6
  from transformers import pipeline
7
  from huggingface_hub import InferenceClient
8
 
9
-
10
  ASR_MODEL_NAME = "openai/whisper-small"
11
  LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
12
 
13
-
14
- system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant and You respond with one to two sentences. [/INST] Hello there! I'm friday how can I help you?</s>"""
15
 
16
  instruct_history = system_prompt + """"""
17
 
18
- formatted_history = """"""
19
-
20
  client = InferenceClient(LLM_MODEL_NAME)
21
 
22
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -27,7 +23,6 @@ pipe = pipeline(
27
  device=device,
28
  )
29
 
30
-
31
  def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
32
  temperature = float(temperature)
33
  if temperature < 1e-2:
@@ -48,49 +43,48 @@ def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95,
48
 
49
  return output
50
 
51
-
52
  @spaces.GPU(duration=60)
53
- def transcribe(audio, instruct_history=instruct_history, formatted_history=formatted_history):
54
  sr, y = audio
55
  y = y.astype(np.float32)
56
  y /= np.max(np.abs(y))
57
 
 
58
  transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
59
 
60
- formatted_history += f"""Human: {transcribed_user_audio}\n\n"""
61
-
62
  instruct_history += f"""<s>[INST] {transcribed_user_audio} [/INST] """
63
 
 
64
  llm_response = generate(instruct_history)
65
 
 
66
  instruct_history += f""" {llm_response}</s>"""
67
-
68
  formatted_history += f"""Friday: {llm_response}\n\n"""
69
 
 
70
  audio_response = gTTS(llm_response)
71
  audio_response.save("response.mp3")
72
 
73
- print(instruct_history)
74
-
75
- return "response.mp3", formatted_history
76
 
 
77
 
78
  with gr.Blocks() as demo:
79
- gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")
80
 
81
  with gr.Row():
82
- audio_input = gr.Audio(label="Human", sources="microphone")
83
- output_audio = gr.Audio(label="Friday", type="filepath",
84
- interactive=False,
85
- autoplay=True,
86
- elem_classes="audio")
87
 
88
  transcribe_btn = gr.Button("Transcribe")
89
 
90
- transcription_box = gr.Textbox(label="Transcription")
 
91
 
92
- transcribe_btn.click(fn=transcribe, inputs=[audio_input],
93
- outputs=[output_audio, transcription_box])
94
 
95
  if __name__ == "__main__":
96
  demo.queue()
 
6
  from transformers import pipeline
7
  from huggingface_hub import InferenceClient
8
 
 
9
  ASR_MODEL_NAME = "openai/whisper-small"
10
  LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
11
 
12
+ system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
 
13
 
14
  instruct_history = system_prompt + """"""
15
 
 
 
16
  client = InferenceClient(LLM_MODEL_NAME)
17
 
18
  device = 0 if torch.cuda.is_available() else "cpu"
 
23
  device=device,
24
  )
25
 
 
26
  def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
27
  temperature = float(temperature)
28
  if temperature < 1e-2:
 
43
 
44
  return output
45
 
 
46
  @spaces.GPU(duration=60)
47
+ def transcribe(audio, instruct_history=instruct_history):
48
  sr, y = audio
49
  y = y.astype(np.float32)
50
  y /= np.max(np.abs(y))
51
 
52
+ # Transcribe user audio
53
  transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
54
 
55
+ # Append user input to history
56
+ formatted_history = f"""Human: {transcribed_user_audio}\n\n"""
57
  instruct_history += f"""<s>[INST] {transcribed_user_audio} [/INST] """
58
 
59
+ # Generate LLM response
60
  llm_response = generate(instruct_history)
61
 
62
+ # Append AI response to history
63
  instruct_history += f""" {llm_response}</s>"""
 
64
  formatted_history += f"""Friday: {llm_response}\n\n"""
65
 
66
+ # Convert AI response to audio
67
  audio_response = gTTS(llm_response)
68
  audio_response.save("response.mp3")
69
 
70
+ # Display the full conversation history
71
+ full_history = formatted_history
 
72
 
73
+ return "response.mp3", full_history
74
 
75
  with gr.Blocks() as demo:
76
+ gr.HTML("<center><h1>Friday: AI Virtual Assistant</h1><center>")
77
 
78
  with gr.Row():
79
+ audio_input = gr.Audio(label="Human", source="microphone")
80
+ output_audio = gr.Audio(label="Friday", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
 
 
 
81
 
82
  transcribe_btn = gr.Button("Transcribe")
83
 
84
+ # Textbox to display the full conversation history
85
+ transcription_box = gr.Textbox(label="Transcription", lines=10, placeholder="Conversation History...")
86
 
87
+ transcribe_btn.click(fn=transcribe, inputs=[audio_input], outputs=[output_audio, transcription_box])
 
88
 
89
  if __name__ == "__main__":
90
  demo.queue()