ar08 commited on
Commit
e927cd3
·
verified ·
1 Parent(s): 03c0141

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -20
app.py CHANGED
@@ -3,21 +3,20 @@ import asyncio
3
  import edge_tts
4
  import os
5
  from huggingface_hub import InferenceClient
6
- import whisper
7
- import torch
8
  import tempfile
9
 
10
-
11
  # Get the Hugging Face token from environment variable
12
  hf_token = os.getenv("HF_TOKEN")
13
  if not hf_token:
14
  raise ValueError("HF_TOKEN environment variable is not set")
15
 
16
- # Initialize the Hugging Face Inference Client
17
- client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=hf_token)
18
 
19
- # Load the Whisper model
20
- whisper_model = whisper.load_model("tiny.en", device='cuda' if torch.cuda.is_available() else 'cpu')
 
21
 
22
  # Initialize an empty chat history
23
  chat_history = []
@@ -35,17 +34,13 @@ async def text_to_speech_stream(text):
35
  temp_file.write(audio_data)
36
  return temp_file.name
37
 
38
- def whisper_speech_to_text(audio):
39
- """Convert speech to text using Whisper model."""
40
- try:
41
- result = whisper_model.transcribe(audio)
42
- return result['text']
43
- except Exception as e:
44
- print(f"Whisper Error: {e}")
45
- return None
46
- finally:
47
- if torch.cuda.is_available():
48
- torch.cuda.empty_cache()
49
 
50
  async def chat_with_ai(message):
51
  global chat_history
@@ -53,7 +48,7 @@ async def chat_with_ai(message):
53
  chat_history.append({"role": "user", "content": message})
54
 
55
  try:
56
- response = client.chat_completion(
57
  messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
58
  max_tokens=800,
59
  temperature=0.7
@@ -71,7 +66,7 @@ async def chat_with_ai(message):
71
 
72
  def transcribe_and_chat(audio):
73
  text = whisper_speech_to_text(audio)
74
- if text is None:
75
  return "Sorry, I couldn't understand the audio.", None
76
 
77
  response, audio_path = asyncio.run(chat_with_ai(text))
 
3
  import edge_tts
4
  import os
5
  from huggingface_hub import InferenceClient
6
+ import requests
 
7
  import tempfile
8
 
 
9
  # Get the Hugging Face token from environment variable
10
  hf_token = os.getenv("HF_TOKEN")
11
  if not hf_token:
12
  raise ValueError("HF_TOKEN environment variable is not set")
13
 
14
+ # Initialize the Hugging Face Inference Client for chat completion
15
+ chat_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=hf_token)
16
 
17
+ # Whisper API settings
18
+ WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
19
+ headers = {"Authorization": f"Bearer {hf_token}"}
20
 
21
  # Initialize an empty chat history
22
  chat_history = []
 
34
  temp_file.write(audio_data)
35
  return temp_file.name
36
 
37
+ def whisper_speech_to_text(audio_path):
38
+ """Convert speech to text using Hugging Face Whisper API."""
39
+ with open(audio_path, "rb") as audio_file:
40
+ data = audio_file.read()
41
+ response = requests.post(WHISPER_API_URL, headers=headers, data=data)
42
+ result = response.json()
43
+ return result.get("text", "")
 
 
 
 
44
 
45
  async def chat_with_ai(message):
46
  global chat_history
 
48
  chat_history.append({"role": "user", "content": message})
49
 
50
  try:
51
+ response = chat_client.chat_completion(
52
  messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
53
  max_tokens=800,
54
  temperature=0.7
 
66
 
67
  def transcribe_and_chat(audio):
68
  text = whisper_speech_to_text(audio)
69
+ if not text:
70
  return "Sorry, I couldn't understand the audio.", None
71
 
72
  response, audio_path = asyncio.run(chat_with_ai(text))