Pijush2023 commited on
Commit
f26ca23
·
verified ·
1 Parent(s): 26117f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -76,7 +76,7 @@ def structured_retriever(question: str) -> str:
76
 
77
  # Function to generate audio with Eleven Labs TTS
78
  def generate_audio_elevenlabs(text):
79
- XI_API_KEY = os.environ['ELEVENLABS_API']
80
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
81
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
82
  headers = {
@@ -93,20 +93,27 @@ def generate_audio_elevenlabs(text):
93
  "use_speaker_boost": False
94
  }
95
  }
96
- response = requests.post(tts_url, headers=headers, json=data, stream=True)
97
- if response.ok:
98
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
99
- for chunk in response.iter_content(chunk_size=1024):
100
- if chunk:
101
- f.write(chunk)
102
- audio_path = f.name
103
- logging.debug(f"Audio saved to {audio_path}")
104
- return audio_path # Return audio path for automatic playback
105
- else:
106
- logging.error(f"Error generating audio: {response.text}")
 
 
 
 
 
 
 
 
107
  return None
108
 
109
-
110
  # Define the ASR model with Whisper
111
  model_id = 'openai/whisper-large-v3'
112
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -165,12 +172,10 @@ def transcribe_and_respond(audio):
165
 
166
  return audio_path, response_text
167
 
168
-
169
  # Function to clear the transcription state
170
  def clear_transcription_state():
171
  return None, None
172
 
173
-
174
  # Define the Gradio interface with only audio input and output
175
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
176
  with gr.Row():
 
76
 
77
  # Function to generate audio with Eleven Labs TTS
78
  def generate_audio_elevenlabs(text):
79
+ XI_API_KEY = os.environ.get('ELEVENLABS_API')
80
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
81
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
82
  headers = {
 
93
  "use_speaker_boost": False
94
  }
95
  }
96
+
97
+ try:
98
+ logging.debug(f"Sending request to Eleven Labs with text: {text[:100]}...")
99
+ response = requests.post(tts_url, headers=headers, json=data, stream=True)
100
+
101
+ if response.ok:
102
+ logging.debug("Received successful response from Eleven Labs API.")
103
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
104
+ for chunk in response.iter_content(chunk_size=1024):
105
+ if chunk:
106
+ f.write(chunk)
107
+ audio_path = f.name
108
+ logging.debug(f"Audio successfully saved to {audio_path}")
109
+ return audio_path
110
+ else:
111
+ logging.error(f"Error generating audio: {response.status_code} - {response.text}")
112
+ return None
113
+ except Exception as e:
114
+ logging.error(f"Exception during audio generation: {str(e)}")
115
  return None
116
 
 
117
  # Define the ASR model with Whisper
118
  model_id = 'openai/whisper-large-v3'
119
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
172
 
173
  return audio_path, response_text
174
 
 
175
  # Function to clear the transcription state
176
  def clear_transcription_state():
177
  return None, None
178
 
 
179
  # Define the Gradio interface with only audio input and output
180
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
181
  with gr.Row():