Spaces:

ar08
/

Voice-assitant

Sleeping

App Files Files Community

ar08 commited on Oct 15, 2024

Commit

285990a

verified ·

1 Parent(s): dbfd7f5

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -57

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ import tempfile
 import logging
 import io
 from pydub import AudioSegment
-import json
-from datetime import datetime
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -29,21 +27,9 @@ headers = {"Authorization": f"Bearer {hf_token}"}
 # Initialize an empty chat history
 chat_history = []
-# New feature: Available voices
-AVAILABLE_VOICES = [
-    "en-US-BrianMultilingualNeural",
-    "en-US-JennyMultilingualNeural",
-    "en-GB-RyanMultilingualNeural",
-    "en-AU-NatashaNeural",
-    "en-IN-PrabhatNeural"
-]
-# New feature: Conversation log
-conversation_log = []
-async def text_to_speech_stream(text, voice, voice_volume=1.0):
     """Convert text to speech using edge_tts and return the audio file path."""
-    communicate = edge_tts.Communicate(text, voice)
     audio_data = b""
     async for chunk in communicate.stream():
@@ -84,14 +70,14 @@ def whisper_speech_to_text(audio_path):
         logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
         return ""
-async def chat_with_ai(message, system_prompt):
     global chat_history
     chat_history.append({"role": "user", "content": message})
     try:
         response = chat_client.chat_completion(
-            messages=[{"role": "system", "content": system_prompt}] + chat_history,
             max_tokens=800,
             temperature=0.7
         )
@@ -99,19 +85,14 @@ async def chat_with_ai(message, system_prompt):
         response_text = response.choices[0].message['content']
         chat_history.append({"role": "assistant", "content": response_text})
-        # New feature: Add to conversation log
-        conversation_log.append({
-            "timestamp": datetime.now().isoformat(),
-            "user": message,
-            "assistant": response_text
-        })
-        return response_text
     except Exception as e:
         logging.error(f"Error in chat_with_ai: {e}")
-        return str(e)
-def transcribe_and_chat(audio, system_prompt, selected_voice, voice_volume):
     if audio is None:
         return "Sorry, no audio was provided. Please try recording again.", None
@@ -119,15 +100,14 @@ def transcribe_and_chat(audio, system_prompt, selected_voice, voice_volume):
     if not text:
         return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
-    response = asyncio.run(chat_with_ai(text, system_prompt))
-    audio_path = asyncio.run(text_to_speech_stream(response, selected_voice, voice_volume))
     return response, audio_path
 def create_demo():
     with gr.Blocks() as demo:
         gr.Markdown(
             """
-            # 🗣️ Enhanced AI Voice Assistant
             Welcome to your personal voice assistant! Simply record your voice, and I will respond with both text and speech. The assistant will automatically start listening after playing its response. Powered by advanced AI models.
             """
         )
@@ -137,17 +117,6 @@ def create_demo():
                 audio_input = gr.Audio(type="filepath", label="🎤 Record your voice", elem_id="audio-input")
                 clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
                 voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
-                # New feature: Voice selection
-                voice_dropdown = gr.Dropdown(choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[0], label="Select Voice", elem_id="voice-dropdown")
-                # New feature: System prompt input
-                system_prompt = gr.Textbox(
-                    label="System Prompt",
-                    placeholder="Enter a system prompt to guide the AI's behavior...",
-                    value="You are a helpful voice assistant. Provide concise and clear responses to user queries.",
-                    elem_id="system-prompt"
-                )
             with gr.Column(scale=1):
                 chat_output = gr.Textbox(label="💬 AI Response", elem_id="chat-output", lines=5, interactive=False)
@@ -156,29 +125,20 @@ def create_demo():
         # Add some spacing and a divider
         gr.Markdown("---")
-        # New feature: Export conversation log
-        export_button = gr.Button("Export Conversation Log", elem_id="export-button")
         # Processing the audio input
-        def process_audio(audio, system_prompt, selected_voice, volume):
             logging.info(f"Received audio: {audio}")
             if audio is None:
                 return "No audio detected. Please try recording again.", None
-            response, audio_path = transcribe_and_chat(audio, system_prompt, selected_voice, volume)
-            logging.info(f"Response: {response}, Audio path: {audio_path}")
-            return response, audio_path
-        audio_input.change(process_audio, inputs=[audio_input, system_prompt, voice_dropdown, voice_volume], outputs=[chat_output, audio_output])
         clear_button.click(lambda: (None, None), None, [chat_output, audio_output])
-        # New feature: Export conversation log function
-        def export_log():
-            with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
-                json.dump(conversation_log, temp_file, indent=2)
-                return temp_file.name
-        export_button.click(export_log, inputs=None, outputs=gr.File(label="Download Conversation Log"))
         # JavaScript to handle autoplay, automatic submission, and auto-listen
         demo.load(None, js="""
             function() {

 import logging
 import io
 from pydub import AudioSegment
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Initialize an empty chat history
 chat_history = []
+async def text_to_speech_stream(text, voice_volume=1.0):
     """Convert text to speech using edge_tts and return the audio file path."""
+    communicate = edge_tts.Communicate(text, "en-US-BrianMultilingualNeural")
     audio_data = b""
     async for chunk in communicate.stream():
         logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
         return ""
+async def chat_with_ai(message):
     global chat_history
     chat_history.append({"role": "user", "content": message})
     try:
         response = chat_client.chat_completion(
+            messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
             max_tokens=800,
             temperature=0.7
         )
         response_text = response.choices[0].message['content']
         chat_history.append({"role": "assistant", "content": response_text})
+        audio_path = await text_to_speech_stream(response_text)
+        return response_text, audio_path
     except Exception as e:
         logging.error(f"Error in chat_with_ai: {e}")
+        return str(e), None
+def transcribe_and_chat(audio):
     if audio is None:
         return "Sorry, no audio was provided. Please try recording again.", None
     if not text:
         return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
+    response, audio_path = asyncio.run(chat_with_ai(text))
     return response, audio_path
 def create_demo():
     with gr.Blocks() as demo:
         gr.Markdown(
             """
+            # 🗣️ AI Voice Assistant
             Welcome to your personal voice assistant! Simply record your voice, and I will respond with both text and speech. The assistant will automatically start listening after playing its response. Powered by advanced AI models.
             """
         )
                 audio_input = gr.Audio(type="filepath", label="🎤 Record your voice", elem_id="audio-input")
                 clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
                 voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
             with gr.Column(scale=1):
                 chat_output = gr.Textbox(label="💬 AI Response", elem_id="chat-output", lines=5, interactive=False)
         # Add some spacing and a divider
         gr.Markdown("---")
         # Processing the audio input
+        def process_audio(audio, volume):
             logging.info(f"Received audio: {audio}")
             if audio is None:
                 return "No audio detected. Please try recording again.", None
+            response, audio_path = transcribe_and_chat(audio)
+            # Adjust volume for the response audio
+            adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
+            logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
+            return response, adjusted_audio_path
+        audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output])
         clear_button.click(lambda: (None, None), None, [chat_output, audio_output])
         # JavaScript to handle autoplay, automatic submission, and auto-listen
         demo.load(None, js="""
             function() {