Spaces:

ar08
/

Voice-assitant

Running

App Files Files Community

ar08 commited on Oct 15, 2024

Commit

8711dc0

verified ·

1 Parent(s): 20c4df5

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -65

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ headers = {"Authorization": f"Bearer {hf_token}"}
 # Initialize an empty chat history
 chat_history = []
-async def text_to_speech_stream(text):
     """Convert text to speech using edge_tts and return the audio file path."""
     communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
     audio_data = b""
@@ -34,68 +34,16 @@ async def text_to_speech_stream(text):
         if chunk["type"] == "audio":
             audio_data += chunk["data"]
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
-        temp_file.write(audio_data)
         return temp_file.name
-def whisper_speech_to_text(audio_path):
-    """Convert speech to text using Hugging Face Whisper API."""
-    if audio_path is None:
-        logging.error("Error: No audio file provided")
-        return ""
-    if not os.path.exists(audio_path):
-        logging.error(f"Error: Audio file not found at {audio_path}")
-        return ""
-    try:
-        with open(audio_path, "rb") as audio_file:
-            data = audio_file.read()
-        response = requests.post(WHISPER_API_URL, headers=headers, data=data)
-        response.raise_for_status()  # Raise an exception for bad status codes
-        result = response.json()
-        transcribed_text = result.get("text", "")
-        logging.info(f"Transcribed text: {transcribed_text}")
-        return transcribed_text
-    except requests.exceptions.RequestException as e:
-        logging.error(f"Error during API request: {e}")
-        return ""
-    except Exception as e:
-        logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
-        return ""
-async def chat_with_ai(message):
-    global chat_history
-    chat_history.append({"role": "user", "content": message})
-    try:
-        response = chat_client.chat_completion(
-            messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
-            max_tokens=800,
-            temperature=0.7
-        )
-        response_text = response.choices[0].message['content']
-        chat_history.append({"role": "assistant", "content": response_text})
-        audio_path = await text_to_speech_stream(response_text)
-        return response_text, audio_path
-    except Exception as e:
-        logging.error(f"Error in chat_with_ai: {e}")
-        return str(e), None
-def transcribe_and_chat(audio):
-    if audio is None:
-        return "Sorry, no audio was provided. Please try recording again.", None
-    text = whisper_speech_to_text(audio)
-    if not text:
-        return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
-    response, audio_path = asyncio.run(chat_with_ai(text))
-    return response, audio_path
 def create_demo():
     with gr.Blocks() as demo:
@@ -110,6 +58,7 @@ def create_demo():
             with gr.Column(scale=1):
                 audio_input = gr.Audio(type="filepath", label="🎤 Record your voice", elem_id="audio-input")
                 clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
             with gr.Column(scale=1):
                 chat_output = gr.Textbox(label="💬 AI Response", elem_id="chat-output", lines=5, interactive=False)
@@ -119,15 +68,17 @@ def create_demo():
         gr.Markdown("---")
         # Processing the audio input
-        def process_audio(audio):
             logging.info(f"Received audio: {audio}")
             if audio is None:
                 return "No audio detected. Please try recording again.", None, None
             response, audio_path = transcribe_and_chat(audio)
-            logging.info(f"Response: {response}, Audio path: {audio_path}")
-            return response, audio_path, None  # Return None to clear the audio input
-        audio_input.change(process_audio, inputs=[audio_input], outputs=[chat_output, audio_output, audio_input])
         clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
         # JavaScript to handle autoplay and automatic submission
@@ -156,6 +107,14 @@ def create_demo():
                 document.addEventListener('gradioUpdated', function(event) {
                     setTimeout(playAssistantAudio, 100);
                 });
             }
         """)
@@ -164,4 +123,4 @@ def create_demo():
 # Launch the Gradio app
 if __name__ == "__main__":
     demo = create_demo()
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 # Initialize an empty chat history
 chat_history = []
+async def text_to_speech_stream(text, voice_volume=1.0):
     """Convert text to speech using edge_tts and return the audio file path."""
     communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
     audio_data = b""
         if chunk["type"] == "audio":
             audio_data += chunk["data"]
+    # Adjust volume
+    from pydub import AudioSegment
+    audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
+    adjusted_audio = audio + (20 * voice_volume - 20)  # Adjust volume (0.0 to 2.0)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
+        adjusted_audio.export(temp_file.name, format="mp3")
         return temp_file.name
+# ... (rest of the functions remain the same)
 def create_demo():
     with gr.Blocks() as demo:
             with gr.Column(scale=1):
                 audio_input = gr.Audio(type="filepath", label="🎤 Record your voice", elem_id="audio-input")
                 clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
+                voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
             with gr.Column(scale=1):
                 chat_output = gr.Textbox(label="💬 AI Response", elem_id="chat-output", lines=5, interactive=False)
         gr.Markdown("---")
         # Processing the audio input
+        def process_audio(audio, volume):
             logging.info(f"Received audio: {audio}")
             if audio is None:
                 return "No audio detected. Please try recording again.", None, None
             response, audio_path = transcribe_and_chat(audio)
+            # Adjust volume for the response audio
+            adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
+            logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
+            return response, adjusted_audio_path, None  # Return None to clear the audio input
+        audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output, audio_input])
         clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
         # JavaScript to handle autoplay and automatic submission
                 document.addEventListener('gradioUpdated', function(event) {
                     setTimeout(playAssistantAudio, 100);
                 });
+                // Prevent audio from stopping when switching tabs
+                document.addEventListener("visibilitychange", function() {
+                    var audioElements = document.querySelectorAll('audio');
+                    audioElements.forEach(function(audio) {
+                        audio.play();
+                    });
+                });
             }
         """)
 # Launch the Gradio app
 if __name__ == "__main__":
     demo = create_demo()
+    demo.launch(server_name="0.0.0.0", server_port=7860)