Spaces:

ar08
/

Voice-assitant

Sleeping

App Files Files Community

ar08 commited on Oct 15, 2024

Commit

41fe818

verified ·

1 Parent(s): 8711dc0

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -2

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from huggingface_hub import InferenceClient
 import requests
 import tempfile
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -35,7 +37,6 @@ async def text_to_speech_stream(text, voice_volume=1.0):
             audio_data += chunk["data"]
     # Adjust volume
-    from pydub import AudioSegment
     audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
     adjusted_audio = audio + (20 * voice_volume - 20)  # Adjust volume (0.0 to 2.0)
@@ -43,7 +44,64 @@ async def text_to_speech_stream(text, voice_volume=1.0):
         adjusted_audio.export(temp_file.name, format="mp3")
         return temp_file.name
-# ... (rest of the functions remain the same)
 def create_demo():
     with gr.Blocks() as demo:

 import requests
 import tempfile
 import logging
+import io
+from pydub import AudioSegment
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
             audio_data += chunk["data"]
     # Adjust volume
     audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
     adjusted_audio = audio + (20 * voice_volume - 20)  # Adjust volume (0.0 to 2.0)
         adjusted_audio.export(temp_file.name, format="mp3")
         return temp_file.name
+def whisper_speech_to_text(audio_path):
+    """Convert speech to text using Hugging Face Whisper API."""
+    if audio_path is None:
+        logging.error("Error: No audio file provided")
+        return ""
+    if not os.path.exists(audio_path):
+        logging.error(f"Error: Audio file not found at {audio_path}")
+        return ""
+    try:
+        with open(audio_path, "rb") as audio_file:
+            data = audio_file.read()
+        response = requests.post(WHISPER_API_URL, headers=headers, data=data)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        result = response.json()
+        transcribed_text = result.get("text", "")
+        logging.info(f"Transcribed text: {transcribed_text}")
+        return transcribed_text
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Error during API request: {e}")
+        return ""
+    except Exception as e:
+        logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
+        return ""
+async def chat_with_ai(message):
+    global chat_history
+    chat_history.append({"role": "user", "content": message})
+    try:
+        response = chat_client.chat_completion(
+            messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
+            max_tokens=800,
+            temperature=0.7
+        )
+        response_text = response.choices[0].message['content']
+        chat_history.append({"role": "assistant", "content": response_text})
+        audio_path = await text_to_speech_stream(response_text)
+        return response_text, audio_path
+    except Exception as e:
+        logging.error(f"Error in chat_with_ai: {e}")
+        return str(e), None
+def transcribe_and_chat(audio):
+    if audio is None:
+        return "Sorry, no audio was provided. Please try recording again.", None
+    text = whisper_speech_to_text(audio)
+    if not text:
+        return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
+    response, audio_path = asyncio.run(chat_with_ai(text))
+    return response, audio_path
 def create_demo():
     with gr.Blocks() as demo: