ar08 commited on
Commit
41fe818
·
verified ·
1 Parent(s): 8711dc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -2
app.py CHANGED
@@ -6,6 +6,8 @@ from huggingface_hub import InferenceClient
6
  import requests
7
  import tempfile
8
  import logging
 
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -35,7 +37,6 @@ async def text_to_speech_stream(text, voice_volume=1.0):
35
  audio_data += chunk["data"]
36
 
37
  # Adjust volume
38
- from pydub import AudioSegment
39
  audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
40
  adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
41
 
@@ -43,7 +44,64 @@ async def text_to_speech_stream(text, voice_volume=1.0):
43
  adjusted_audio.export(temp_file.name, format="mp3")
44
  return temp_file.name
45
 
46
- # ... (rest of the functions remain the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def create_demo():
49
  with gr.Blocks() as demo:
 
6
  import requests
7
  import tempfile
8
  import logging
9
+ import io
10
+ from pydub import AudioSegment
11
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
37
  audio_data += chunk["data"]
38
 
39
  # Adjust volume
 
40
  audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
41
  adjusted_audio = audio + (20 * voice_volume - 20) # Adjust volume (0.0 to 2.0)
42
 
 
44
  adjusted_audio.export(temp_file.name, format="mp3")
45
  return temp_file.name
46
 
47
+ def whisper_speech_to_text(audio_path):
48
+ """Convert speech to text using Hugging Face Whisper API."""
49
+ if audio_path is None:
50
+ logging.error("Error: No audio file provided")
51
+ return ""
52
+
53
+ if not os.path.exists(audio_path):
54
+ logging.error(f"Error: Audio file not found at {audio_path}")
55
+ return ""
56
+
57
+ try:
58
+ with open(audio_path, "rb") as audio_file:
59
+ data = audio_file.read()
60
+ response = requests.post(WHISPER_API_URL, headers=headers, data=data)
61
+ response.raise_for_status() # Raise an exception for bad status codes
62
+ result = response.json()
63
+ transcribed_text = result.get("text", "")
64
+ logging.info(f"Transcribed text: {transcribed_text}")
65
+ return transcribed_text
66
+ except requests.exceptions.RequestException as e:
67
+ logging.error(f"Error during API request: {e}")
68
+ return ""
69
+ except Exception as e:
70
+ logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
71
+ return ""
72
+
73
+ async def chat_with_ai(message):
74
+ global chat_history
75
+
76
+ chat_history.append({"role": "user", "content": message})
77
+
78
+ try:
79
+ response = chat_client.chat_completion(
80
+ messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
81
+ max_tokens=800,
82
+ temperature=0.7
83
+ )
84
+
85
+ response_text = response.choices[0].message['content']
86
+ chat_history.append({"role": "assistant", "content": response_text})
87
+
88
+ audio_path = await text_to_speech_stream(response_text)
89
+
90
+ return response_text, audio_path
91
+ except Exception as e:
92
+ logging.error(f"Error in chat_with_ai: {e}")
93
+ return str(e), None
94
+
95
+ def transcribe_and_chat(audio):
96
+ if audio is None:
97
+ return "Sorry, no audio was provided. Please try recording again.", None
98
+
99
+ text = whisper_speech_to_text(audio)
100
+ if not text:
101
+ return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
102
+
103
+ response, audio_path = asyncio.run(chat_with_ai(text))
104
+ return response, audio_path
105
 
106
  def create_demo():
107
  with gr.Blocks() as demo: