Spaces:

ar08
/

Voice-assitant

Running

App Files Files Community

ar08 commited on Oct 15, 2024

Commit

46bf33d

verified ·

1 Parent(s): de841d5

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -1

app.py CHANGED Viewed

@@ -28,7 +28,69 @@ whisper_model = whisper.load_model("base", device='cuda')
 # Initialize an empty chat history
 chat_history = []
-# ... (rest of the functions remain the same) ...
 # Define the Gradio interface
 with gr.Blocks() as demo:

 # Initialize an empty chat history
 chat_history = []
+async def text_to_speech_stream(text):
+    """Convert text to speech using edge_tts and return the audio file path."""
+    communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
+    audio_data = b""
+    async for chunk in communicate.stream():
+        if chunk["type"] == "audio":
+            audio_data += chunk["data"]
+    # Save the audio data to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
+        temp_file.write(audio_data)
+        return temp_file.name
+def whisper_speech_to_text(audio):
+    """Convert speech to text using Whisper model."""
+    try:
+        result = whisper_model.transcribe(audio)
+        text = result['text']
+        return text
+    except Exception as e:
+        print(f"Whisper Error: {e}")
+        return None
+    finally:
+        # Clear CUDA cache
+        torch.cuda.empty_cache()
+async def chat_with_ai(message, history):
+    global chat_history
+    # Add user message to chat history
+    chat_history.append({"role": "user", "content": message})
+    try:
+        # Send chat completion request
+        response = client.chat_completion(
+            messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
+            max_tokens=800,
+            temperature=0.7
+        )
+        response_text = response.choices[0].message['content']
+        # Add assistant's response to chat history
+        chat_history.append({"role": "assistant", "content": response_text})
+        # Generate speech for the response
+        audio_path = await text_to_speech_stream(response_text)
+        return response_text, audio_path
+    except Exception as e:
+        print(f"Error: {e}")
+        return str(e), None
+def transcribe_and_chat(audio):
+    # Transcribe audio to text
+    text = whisper_speech_to_text(audio)
+    if text is None:
+        return "Sorry, I couldn't understand the audio.", None
+    # Chat with AI using the transcribed text
+    response, audio_path = asyncio.run(chat_with_ai(text, []))
+    return response, audio_path
 # Define the Gradio interface
 with gr.Blocks() as demo: