Spaces:

ar08
/

Voice-assitant

Sleeping

App Files Files Community

ar08 commited on Oct 15, 2024

Commit

e927cd3

verified ·

1 Parent(s): 03c0141

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -20

app.py CHANGED Viewed

@@ -3,21 +3,20 @@ import asyncio
 import edge_tts
 import os
 from huggingface_hub import InferenceClient
-import whisper
-import torch
 import tempfile
 # Get the Hugging Face token from environment variable
 hf_token = os.getenv("HF_TOKEN")
 if not hf_token:
     raise ValueError("HF_TOKEN environment variable is not set")
-# Initialize the Hugging Face Inference Client
-client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=hf_token)
-# Load the Whisper model
-whisper_model = whisper.load_model("tiny.en", device='cuda' if torch.cuda.is_available() else 'cpu')
 # Initialize an empty chat history
 chat_history = []
@@ -35,17 +34,13 @@ async def text_to_speech_stream(text):
         temp_file.write(audio_data)
         return temp_file.name
-def whisper_speech_to_text(audio):
-    """Convert speech to text using Whisper model."""
-    try:
-        result = whisper_model.transcribe(audio)
-        return result['text']
-    except Exception as e:
-        print(f"Whisper Error: {e}")
-        return None
-    finally:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
 async def chat_with_ai(message):
     global chat_history
@@ -53,7 +48,7 @@ async def chat_with_ai(message):
     chat_history.append({"role": "user", "content": message})
     try:
-        response = client.chat_completion(
             messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
             max_tokens=800,
             temperature=0.7
@@ -71,7 +66,7 @@ async def chat_with_ai(message):
 def transcribe_and_chat(audio):
     text = whisper_speech_to_text(audio)
-    if text is None:
         return "Sorry, I couldn't understand the audio.", None
     response, audio_path = asyncio.run(chat_with_ai(text))

 import edge_tts
 import os
 from huggingface_hub import InferenceClient
+import requests
 import tempfile
 # Get the Hugging Face token from environment variable
 hf_token = os.getenv("HF_TOKEN")
 if not hf_token:
     raise ValueError("HF_TOKEN environment variable is not set")
+# Initialize the Hugging Face Inference Client for chat completion
+chat_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=hf_token)
+# Whisper API settings
+WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
+headers = {"Authorization": f"Bearer {hf_token}"}
 # Initialize an empty chat history
 chat_history = []
         temp_file.write(audio_data)
         return temp_file.name
+def whisper_speech_to_text(audio_path):
+    """Convert speech to text using Hugging Face Whisper API."""
+    with open(audio_path, "rb") as audio_file:
+        data = audio_file.read()
+    response = requests.post(WHISPER_API_URL, headers=headers, data=data)
+    result = response.json()
+    return result.get("text", "")
 async def chat_with_ai(message):
     global chat_history
     chat_history.append({"role": "user", "content": message})
     try:
+        response = chat_client.chat_completion(
             messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
             max_tokens=800,
             temperature=0.7
 def transcribe_and_chat(audio):
     text = whisper_speech_to_text(audio)
+    if not text:
         return "Sorry, I couldn't understand the audio.", None
     response, audio_path = asyncio.run(chat_with_ai(text))