ar08 commited on
Commit
8711dc0
Β·
verified Β·
1 Parent(s): 20c4df5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -65
app.py CHANGED
@@ -25,7 +25,7 @@ headers = {"Authorization": f"Bearer {hf_token}"}
25
  # Initialize an empty chat history
26
  chat_history = []
27
 
28
- async def text_to_speech_stream(text):
29
  """Convert text to speech using edge_tts and return the audio file path."""
30
  communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
31
  audio_data = b""
@@ -34,68 +34,16 @@ async def text_to_speech_stream(text):
34
  if chunk["type"] == "audio":
35
  audio_data += chunk["data"]
36
 
 
 
 
 
 
37
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
38
- temp_file.write(audio_data)
39
  return temp_file.name
40
 
41
- def whisper_speech_to_text(audio_path):
42
- """Convert speech to text using Hugging Face Whisper API."""
43
- if audio_path is None:
44
- logging.error("Error: No audio file provided")
45
- return ""
46
-
47
- if not os.path.exists(audio_path):
48
- logging.error(f"Error: Audio file not found at {audio_path}")
49
- return ""
50
-
51
- try:
52
- with open(audio_path, "rb") as audio_file:
53
- data = audio_file.read()
54
- response = requests.post(WHISPER_API_URL, headers=headers, data=data)
55
- response.raise_for_status() # Raise an exception for bad status codes
56
- result = response.json()
57
- transcribed_text = result.get("text", "")
58
- logging.info(f"Transcribed text: {transcribed_text}")
59
- return transcribed_text
60
- except requests.exceptions.RequestException as e:
61
- logging.error(f"Error during API request: {e}")
62
- return ""
63
- except Exception as e:
64
- logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
65
- return ""
66
-
67
- async def chat_with_ai(message):
68
- global chat_history
69
-
70
- chat_history.append({"role": "user", "content": message})
71
-
72
- try:
73
- response = chat_client.chat_completion(
74
- messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
75
- max_tokens=800,
76
- temperature=0.7
77
- )
78
-
79
- response_text = response.choices[0].message['content']
80
- chat_history.append({"role": "assistant", "content": response_text})
81
-
82
- audio_path = await text_to_speech_stream(response_text)
83
-
84
- return response_text, audio_path
85
- except Exception as e:
86
- logging.error(f"Error in chat_with_ai: {e}")
87
- return str(e), None
88
-
89
- def transcribe_and_chat(audio):
90
- if audio is None:
91
- return "Sorry, no audio was provided. Please try recording again.", None
92
-
93
- text = whisper_speech_to_text(audio)
94
- if not text:
95
- return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
96
-
97
- response, audio_path = asyncio.run(chat_with_ai(text))
98
- return response, audio_path
99
 
100
  def create_demo():
101
  with gr.Blocks() as demo:
@@ -110,6 +58,7 @@ def create_demo():
110
  with gr.Column(scale=1):
111
  audio_input = gr.Audio(type="filepath", label="🎀 Record your voice", elem_id="audio-input")
112
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
 
113
 
114
  with gr.Column(scale=1):
115
  chat_output = gr.Textbox(label="πŸ’¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
@@ -119,15 +68,17 @@ def create_demo():
119
  gr.Markdown("---")
120
 
121
  # Processing the audio input
122
- def process_audio(audio):
123
  logging.info(f"Received audio: {audio}")
124
  if audio is None:
125
  return "No audio detected. Please try recording again.", None, None
126
  response, audio_path = transcribe_and_chat(audio)
127
- logging.info(f"Response: {response}, Audio path: {audio_path}")
128
- return response, audio_path, None # Return None to clear the audio input
 
 
129
 
130
- audio_input.change(process_audio, inputs=[audio_input], outputs=[chat_output, audio_output, audio_input])
131
  clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
132
 
133
  # JavaScript to handle autoplay and automatic submission
@@ -156,6 +107,14 @@ def create_demo():
156
  document.addEventListener('gradioUpdated', function(event) {
157
  setTimeout(playAssistantAudio, 100);
158
  });
 
 
 
 
 
 
 
 
159
  }
160
  """)
161
 
@@ -164,4 +123,4 @@ def create_demo():
164
  # Launch the Gradio app
165
  if __name__ == "__main__":
166
  demo = create_demo()
167
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
25
  # Initialize an empty chat history
26
  chat_history = []
27
 
28
+ async def text_to_speech_stream(text, voice_volume=1.0):
29
  """Convert text to speech using edge_tts and return the audio file path."""
30
  communicate = edge_tts.Communicate(text, "en-US-AvaMultilingualNeural")
31
  audio_data = b""
 
34
  if chunk["type"] == "audio":
35
  audio_data += chunk["data"]
36
 
37
+ # Adjust volume
38
+ from pydub import AudioSegment
39
+ audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
40
+ adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
41
+
42
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
43
+ adjusted_audio.export(temp_file.name, format="mp3")
44
  return temp_file.name
45
 
46
+ # ... (rest of the functions remain the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def create_demo():
49
  with gr.Blocks() as demo:
 
58
  with gr.Column(scale=1):
59
  audio_input = gr.Audio(type="filepath", label="🎀 Record your voice", elem_id="audio-input")
60
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
61
+ voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
62
 
63
  with gr.Column(scale=1):
64
  chat_output = gr.Textbox(label="πŸ’¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
 
68
  gr.Markdown("---")
69
 
70
  # Processing the audio input
71
+ def process_audio(audio, volume):
72
  logging.info(f"Received audio: {audio}")
73
  if audio is None:
74
  return "No audio detected. Please try recording again.", None, None
75
  response, audio_path = transcribe_and_chat(audio)
76
+ # Adjust volume for the response audio
77
+ adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
78
+ logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
79
+ return response, adjusted_audio_path, None # Return None to clear the audio input
80
 
81
+ audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output, audio_input])
82
  clear_button.click(lambda: (None, None, None), None, [chat_output, audio_output, audio_input])
83
 
84
  # JavaScript to handle autoplay and automatic submission
 
107
  document.addEventListener('gradioUpdated', function(event) {
108
  setTimeout(playAssistantAudio, 100);
109
  });
110
+
111
+ // Prevent audio from stopping when switching tabs
112
+ document.addEventListener("visibilitychange", function() {
113
+ var audioElements = document.querySelectorAll('audio');
114
+ audioElements.forEach(function(audio) {
115
+ audio.play();
116
+ });
117
+ });
118
  }
119
  """)
120
 
 
123
  # Launch the Gradio app
124
  if __name__ == "__main__":
125
  demo = create_demo()
126
+ demo.launch(server_name="0.0.0.0", server_port=7860)