Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,69 @@ whisper_model = whisper.load_model("base", device='cuda')
|
|
28 |
# Initialize an empty chat history
|
29 |
chat_history = []
|
30 |
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# Define the Gradio interface
|
34 |
with gr.Blocks() as demo:
|
|
|
28 |
# Initialize an empty chat history
|
29 |
chat_history = []
|
30 |
|
31 |
+
async def text_to_speech_stream(text):
|
32 |
+
"""Convert text to speech using edge_tts and return the audio file path."""
|
33 |
+
communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
|
34 |
+
audio_data = b""
|
35 |
+
|
36 |
+
async for chunk in communicate.stream():
|
37 |
+
if chunk["type"] == "audio":
|
38 |
+
audio_data += chunk["data"]
|
39 |
+
|
40 |
+
# Save the audio data to a temporary file
|
41 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
42 |
+
temp_file.write(audio_data)
|
43 |
+
return temp_file.name
|
44 |
+
|
45 |
+
def whisper_speech_to_text(audio):
|
46 |
+
"""Convert speech to text using Whisper model."""
|
47 |
+
try:
|
48 |
+
result = whisper_model.transcribe(audio)
|
49 |
+
text = result['text']
|
50 |
+
return text
|
51 |
+
except Exception as e:
|
52 |
+
print(f"Whisper Error: {e}")
|
53 |
+
return None
|
54 |
+
finally:
|
55 |
+
# Clear CUDA cache
|
56 |
+
torch.cuda.empty_cache()
|
57 |
+
|
58 |
+
async def chat_with_ai(message, history):
|
59 |
+
global chat_history
|
60 |
+
|
61 |
+
# Add user message to chat history
|
62 |
+
chat_history.append({"role": "user", "content": message})
|
63 |
+
|
64 |
+
try:
|
65 |
+
# Send chat completion request
|
66 |
+
response = client.chat_completion(
|
67 |
+
messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
|
68 |
+
max_tokens=800,
|
69 |
+
temperature=0.7
|
70 |
+
)
|
71 |
+
|
72 |
+
response_text = response.choices[0].message['content']
|
73 |
+
|
74 |
+
# Add assistant's response to chat history
|
75 |
+
chat_history.append({"role": "assistant", "content": response_text})
|
76 |
+
|
77 |
+
# Generate speech for the response
|
78 |
+
audio_path = await text_to_speech_stream(response_text)
|
79 |
+
|
80 |
+
return response_text, audio_path
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error: {e}")
|
83 |
+
return str(e), None
|
84 |
+
|
85 |
+
def transcribe_and_chat(audio):
|
86 |
+
# Transcribe audio to text
|
87 |
+
text = whisper_speech_to_text(audio)
|
88 |
+
if text is None:
|
89 |
+
return "Sorry, I couldn't understand the audio.", None
|
90 |
+
|
91 |
+
# Chat with AI using the transcribed text
|
92 |
+
response, audio_path = asyncio.run(chat_with_ai(text, []))
|
93 |
+
return response, audio_path
|
94 |
|
95 |
# Define the Gradio interface
|
96 |
with gr.Blocks() as demo:
|