sagar007 commited on
Commit
d880060
·
verified ·
1 Parent(s): ae87ff0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -85,7 +85,11 @@ def stream_text_chat(message, history, system_prompt, temperature=0.8, max_new_t
85
  buffer = ""
86
  for new_text in streamer:
87
  buffer += new_text
88
- yield history + [[message, buffer]]
 
 
 
 
89
 
90
  @spaces.GPU
91
  def process_vision_query(image, text_input):
@@ -112,7 +116,7 @@ def generate_speech(prompt, description):
112
  generation = tts_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
113
  audio_arr = generation.cpu().numpy().squeeze()
114
 
115
- output_path = "output_audio.wav"
116
  sf.write(output_path, audio_arr, tts_model.config.sampling_rate)
117
 
118
  return output_path
@@ -189,9 +193,12 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
189
 
190
  submit_btn = gr.Button("Submit", variant="primary")
191
  clear_btn = gr.Button("Clear Chat", variant="secondary")
 
192
 
193
- submit_btn.click(stream_text_chat, [msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k], [chatbot])
194
- clear_btn.click(lambda: None, None, chatbot, queue=False)
 
 
195
 
196
  with gr.Tab("Vision Model (Phi-3.5-vision)"):
197
  with gr.Row():
 
85
  buffer = ""
86
  for new_text in streamer:
87
  buffer += new_text
88
+ yield history + [[message, buffer]], None # Yield None for audio initially
89
+
90
+ # Generate speech for the final response
91
+ audio_path = generate_speech(buffer, "A clear and concise voice reads out the response.")
92
+ yield history + [[message, buffer]], audio_path
93
 
94
  @spaces.GPU
95
  def process_vision_query(image, text_input):
 
116
  generation = tts_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
117
  audio_arr = generation.cpu().numpy().squeeze()
118
 
119
+ output_path = f"output_audio_{hash(prompt)}.wav"
120
  sf.write(output_path, audio_arr, tts_model.config.sampling_rate)
121
 
122
  return output_path
 
193
 
194
  submit_btn = gr.Button("Submit", variant="primary")
195
  clear_btn = gr.Button("Clear Chat", variant="secondary")
196
+ audio_output = gr.Audio(label="AI Response Audio")
197
 
198
+ submit_btn.click(stream_text_chat,
199
+ inputs=[msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k],
200
+ outputs=[chatbot, audio_output])
201
+ clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
202
 
203
  with gr.Tab("Vision Model (Phi-3.5-vision)"):
204
  with gr.Row():