Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ headers = {"Authorization": f"Bearer {hf_token}"}
|
|
25 |
# Initialize an empty chat history
|
26 |
chat_history = []
|
27 |
|
28 |
-
async def text_to_speech_stream(text):
|
29 |
"""Convert text to speech using edge_tts and return the audio file path."""
|
30 |
communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
|
31 |
audio_data = b""
|
@@ -34,68 +34,16 @@ async def text_to_speech_stream(text):
|
|
34 |
if chunk["type"] == "audio":
|
35 |
audio_data += chunk["data"]
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
38 |
-
temp_file.
|
39 |
return temp_file.name
|
40 |
|
41 |
-
|
42 |
-
"""Convert speech to text using Hugging Face Whisper API."""
|
43 |
-
if audio_path is None:
|
44 |
-
logging.error("Error: No audio file provided")
|
45 |
-
return ""
|
46 |
-
|
47 |
-
if not os.path.exists(audio_path):
|
48 |
-
logging.error(f"Error: Audio file not found at {audio_path}")
|
49 |
-
return ""
|
50 |
-
|
51 |
-
try:
|
52 |
-
with open(audio_path, "rb") as audio_file:
|
53 |
-
data = audio_file.read()
|
54 |
-
response = requests.post(WHISPER_API_URL, headers=headers, data=data)
|
55 |
-
response.raise_for_status() # Raise an exception for bad status codes
|
56 |
-
result = response.json()
|
57 |
-
transcribed_text = result.get("text", "")
|
58 |
-
logging.info(f"Transcribed text: {transcribed_text}")
|
59 |
-
return transcribed_text
|
60 |
-
except requests.exceptions.RequestException as e:
|
61 |
-
logging.error(f"Error during API request: {e}")
|
62 |
-
return ""
|
63 |
-
except Exception as e:
|
64 |
-
logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
|
65 |
-
return ""
|
66 |
-
|
67 |
-
async def chat_with_ai(message):
|
68 |
-
global chat_history
|
69 |
-
|
70 |
-
chat_history.append({"role": "user", "content": message})
|
71 |
-
|
72 |
-
try:
|
73 |
-
response = chat_client.chat_completion(
|
74 |
-
messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
|
75 |
-
max_tokens=800,
|
76 |
-
temperature=0.7
|
77 |
-
)
|
78 |
-
|
79 |
-
response_text = response.choices[0].message['content']
|
80 |
-
chat_history.append({"role": "assistant", "content": response_text})
|
81 |
-
|
82 |
-
audio_path = await text_to_speech_stream(response_text)
|
83 |
-
|
84 |
-
return response_text, audio_path
|
85 |
-
except Exception as e:
|
86 |
-
logging.error(f"Error in chat_with_ai: {e}")
|
87 |
-
return str(e), None
|
88 |
-
|
89 |
-
def transcribe_and_chat(audio):
|
90 |
-
if audio is None:
|
91 |
-
return "Sorry, no audio was provided. Please try recording again.", None
|
92 |
-
|
93 |
-
text = whisper_speech_to_text(audio)
|
94 |
-
if not text:
|
95 |
-
return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
|
96 |
-
|
97 |
-
response, audio_path = asyncio.run(chat_with_ai(text))
|
98 |
-
return response, audio_path
|
99 |
|
100 |
def create_demo():
|
101 |
with gr.Blocks() as demo:
|
@@ -110,6 +58,7 @@ def create_demo():
|
|
110 |
with gr.Column(scale=1):
|
111 |
audio_input = gr.Audio(type="filepath", label="π€ Record your voice", elem_id="audio-input")
|
112 |
clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
|
|
|
113 |
|
114 |
with gr.Column(scale=1):
|
115 |
chat_output = gr.Textbox(label="π¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
|
@@ -119,15 +68,17 @@ def create_demo():
|
|
119 |
gr.Markdown("---")
|
120 |
|
121 |
# Processing the audio input
|
122 |
-
def process_audio(audio):
|
123 |
logging.info(f"Received audio: {audio}")
|
124 |
if audio is None:
|
125 |
return "No audio detected. Please try recording again.", None, None
|
126 |
response, audio_path = transcribe_and_chat(audio)
|
127 |
-
|
128 |
-
|
|
|
|
|
129 |
|
130 |
-
audio_input.change(process_audio, inputs=[audio_input], outputs=[chat_output, audio_output, audio_input])
|
131 |
clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
|
132 |
|
133 |
# JavaScript to handle autoplay and automatic submission
|
@@ -156,6 +107,14 @@ def create_demo():
|
|
156 |
document.addEventListener('gradioUpdated', function(event) {
|
157 |
setTimeout(playAssistantAudio, 100);
|
158 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
}
|
160 |
""")
|
161 |
|
@@ -164,4 +123,4 @@ def create_demo():
|
|
164 |
# Launch the Gradio app
|
165 |
if __name__ == "__main__":
|
166 |
demo = create_demo()
|
167 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
25 |
# Initialize an empty chat history
|
26 |
chat_history = []
|
27 |
|
28 |
+
async def text_to_speech_stream(text, voice_volume=1.0):
|
29 |
"""Convert text to speech using edge_tts and return the audio file path."""
|
30 |
communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
|
31 |
audio_data = b""
|
|
|
34 |
if chunk["type"] == "audio":
|
35 |
audio_data += chunk["data"]
|
36 |
|
37 |
+
# Adjust volume
|
38 |
+
from pydub import AudioSegment
|
39 |
+
audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
40 |
+
adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
|
41 |
+
|
42 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
43 |
+
adjusted_audio.export(temp_file.name, format="mp3")
|
44 |
return temp_file.name
|
45 |
|
46 |
+
# ... (rest of the functions remain the same)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def create_demo():
|
49 |
with gr.Blocks() as demo:
|
|
|
58 |
with gr.Column(scale=1):
|
59 |
audio_input = gr.Audio(type="filepath", label="π€ Record your voice", elem_id="audio-input")
|
60 |
clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
|
61 |
+
voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
|
62 |
|
63 |
with gr.Column(scale=1):
|
64 |
chat_output = gr.Textbox(label="π¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
|
|
|
68 |
gr.Markdown("---")
|
69 |
|
70 |
# Processing the audio input
|
71 |
+
def process_audio(audio, volume):
|
72 |
logging.info(f"Received audio: {audio}")
|
73 |
if audio is None:
|
74 |
return "No audio detected. Please try recording again.", None, None
|
75 |
response, audio_path = transcribe_and_chat(audio)
|
76 |
+
# Adjust volume for the response audio
|
77 |
+
adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
|
78 |
+
logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
|
79 |
+
return response, adjusted_audio_path, None # Return None to clear the audio input
|
80 |
|
81 |
+
audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output, audio_input])
|
82 |
clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
|
83 |
|
84 |
# JavaScript to handle autoplay and automatic submission
|
|
|
107 |
document.addEventListener('gradioUpdated', function(event) {
|
108 |
setTimeout(playAssistantAudio, 100);
|
109 |
});
|
110 |
+
|
111 |
+
// Prevent audio from stopping when switching tabs
|
112 |
+
document.addEventListener("visibilitychange", function() {
|
113 |
+
var audioElements = document.querySelectorAll('audio');
|
114 |
+
audioElements.forEach(function(audio) {
|
115 |
+
audio.play();
|
116 |
+
});
|
117 |
+
});
|
118 |
}
|
119 |
""")
|
120 |
|
|
|
123 |
# Launch the Gradio app
|
124 |
if __name__ == "__main__":
|
125 |
demo = create_demo()
|
126 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|