Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from huggingface_hub import InferenceClient
|
|
6 |
import requests
|
7 |
import tempfile
|
8 |
import logging
|
|
|
|
|
9 |
|
10 |
# Set up logging
|
11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -35,7 +37,6 @@ async def text_to_speech_stream(text, voice_volume=1.0):
|
|
35 |
audio_data += chunk["data"]
|
36 |
|
37 |
# Adjust volume
|
38 |
-
from pydub import AudioSegment
|
39 |
audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
40 |
adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
|
41 |
|
@@ -43,7 +44,64 @@ async def text_to_speech_stream(text, voice_volume=1.0):
|
|
43 |
adjusted_audio.export(temp_file.name, format="mp3")
|
44 |
return temp_file.name
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def create_demo():
|
49 |
with gr.Blocks() as demo:
|
|
|
6 |
import requests
|
7 |
import tempfile
|
8 |
import logging
|
9 |
+
import io
|
10 |
+
from pydub import AudioSegment
|
11 |
|
12 |
# Set up logging
|
13 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
37 |
audio_data += chunk["data"]
|
38 |
|
39 |
# Adjust volume
|
|
|
40 |
audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
41 |
adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
|
42 |
|
|
|
44 |
adjusted_audio.export(temp_file.name, format="mp3")
|
45 |
return temp_file.name
|
46 |
|
47 |
+
def whisper_speech_to_text(audio_path):
|
48 |
+
"""Convert speech to text using Hugging Face Whisper API."""
|
49 |
+
if audio_path is None:
|
50 |
+
logging.error("Error: No audio file provided")
|
51 |
+
return ""
|
52 |
+
|
53 |
+
if not os.path.exists(audio_path):
|
54 |
+
logging.error(f"Error: Audio file not found at {audio_path}")
|
55 |
+
return ""
|
56 |
+
|
57 |
+
try:
|
58 |
+
with open(audio_path, "rb") as audio_file:
|
59 |
+
data = audio_file.read()
|
60 |
+
response = requests.post(WHISPER_API_URL, headers=headers, data=data)
|
61 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
62 |
+
result = response.json()
|
63 |
+
transcribed_text = result.get("text", "")
|
64 |
+
logging.info(f"Transcribed text: {transcribed_text}")
|
65 |
+
return transcribed_text
|
66 |
+
except requests.exceptions.RequestException as e:
|
67 |
+
logging.error(f"Error during API request: {e}")
|
68 |
+
return ""
|
69 |
+
except Exception as e:
|
70 |
+
logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
|
71 |
+
return ""
|
72 |
+
|
73 |
+
async def chat_with_ai(message):
|
74 |
+
global chat_history
|
75 |
+
|
76 |
+
chat_history.append({"role": "user", "content": message})
|
77 |
+
|
78 |
+
try:
|
79 |
+
response = chat_client.chat_completion(
|
80 |
+
messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
|
81 |
+
max_tokens=800,
|
82 |
+
temperature=0.7
|
83 |
+
)
|
84 |
+
|
85 |
+
response_text = response.choices[0].message['content']
|
86 |
+
chat_history.append({"role": "assistant", "content": response_text})
|
87 |
+
|
88 |
+
audio_path = await text_to_speech_stream(response_text)
|
89 |
+
|
90 |
+
return response_text, audio_path
|
91 |
+
except Exception as e:
|
92 |
+
logging.error(f"Error in chat_with_ai: {e}")
|
93 |
+
return str(e), None
|
94 |
+
|
95 |
+
def transcribe_and_chat(audio):
|
96 |
+
if audio is None:
|
97 |
+
return "Sorry, no audio was provided. Please try recording again.", None
|
98 |
+
|
99 |
+
text = whisper_speech_to_text(audio)
|
100 |
+
if not text:
|
101 |
+
return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
|
102 |
+
|
103 |
+
response, audio_path = asyncio.run(chat_with_ai(text))
|
104 |
+
return response, audio_path
|
105 |
|
106 |
def create_demo():
|
107 |
with gr.Blocks() as demo:
|