ciyidogan commited on
Commit
5d32118
Β·
verified Β·
1 Parent(s): 292760f

Update audio_routes.py

Browse files
Files changed (1) hide show
  1. audio_routes.py +103 -7
audio_routes.py CHANGED
@@ -141,15 +141,111 @@ async def get_tts_status():
141
  }
142
  }
143
 
144
- # ===================== STT Endpoints (Future) =====================
145
  @router.post("/stt/transcribe")
146
  async def transcribe_audio(request: STTRequest):
147
- """Transcribe audio to text - to be implemented"""
148
- # TODO: Implement when STT factory is ready
149
- raise HTTPException(
150
- status_code=501,
151
- detail="STT transcription not yet implemented"
152
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  @router.get("/stt/status")
155
  async def get_stt_status():
 
141
  }
142
  }
143
 
144
+ # ===================== STT Endpoints =====================
145
  @router.post("/stt/transcribe")
146
  async def transcribe_audio(request: STTRequest):
147
+ """Transcribe audio to text"""
148
+ try:
149
+ from stt_factory import STTFactory
150
+ from stt_interface import STTConfig
151
+ import base64
152
+
153
+ # Create STT provider
154
+ stt_provider = STTFactory.create_provider()
155
+
156
+ if not stt_provider or not stt_provider.supports_realtime():
157
+ log_warning("πŸ“΅ STT disabled or doesn't support transcription")
158
+ raise HTTPException(
159
+ status_code=503,
160
+ detail="STT service not available"
161
+ )
162
+
163
+ # Get config
164
+ cfg = ConfigProvider.get()
165
+ stt_config = cfg.global_config.stt_provider.settings
166
+
167
+ # Decode audio data
168
+ audio_bytes = base64.b64decode(request.audio_data)
169
+
170
+ # Create STT config
171
+ config = STTConfig(
172
+ language=request.language or stt_config.get("language", "tr-TR"),
173
+ sample_rate=16000,
174
+ encoding=request.format.upper() if request.format else "WEBM_OPUS",
175
+ enable_punctuation=stt_config.get("enable_punctuation", True),
176
+ enable_word_timestamps=False,
177
+ model=stt_config.get("model", "latest_long"),
178
+ use_enhanced=stt_config.get("use_enhanced", True),
179
+ single_utterance=True,
180
+ interim_results=False
181
+ )
182
+
183
+ # Start streaming session
184
+ await stt_provider.start_streaming(config)
185
+
186
+ # Process audio
187
+ transcription = ""
188
+ confidence = 0.0
189
+
190
+ try:
191
+ async for result in stt_provider.stream_audio(audio_bytes):
192
+ if result.is_final:
193
+ transcription = result.text
194
+ confidence = result.confidence
195
+ break
196
+ finally:
197
+ # Stop streaming
198
+ await stt_provider.stop_streaming()
199
+
200
+ log_info(f"βœ… STT transcription completed: '{transcription[:50]}...'")
201
+
202
+ return {
203
+ "text": transcription,
204
+ "confidence": confidence,
205
+ "language": request.language,
206
+ "provider": stt_provider.get_provider_name()
207
+ }
208
+
209
+ except HTTPException:
210
+ raise
211
+ except Exception as e:
212
+ log_error("❌ STT transcription error", e)
213
+ raise HTTPException(
214
+ status_code=500,
215
+ detail=f"Transcription failed: {str(e)}"
216
+ )
217
+
218
+ @router.get("/stt/languages")
219
+ async def get_stt_languages():
220
+ """Get supported STT languages"""
221
+ try:
222
+ from stt_factory import STTFactory
223
+
224
+ stt_provider = STTFactory.create_provider()
225
+
226
+ if not stt_provider:
227
+ return {
228
+ "languages": [],
229
+ "provider": "none",
230
+ "enabled": False
231
+ }
232
+
233
+ languages = stt_provider.get_supported_languages()
234
+
235
+ return {
236
+ "languages": languages,
237
+ "provider": stt_provider.get_provider_name(),
238
+ "enabled": True
239
+ }
240
+
241
+ except Exception as e:
242
+ log_error("❌ Error getting STT languages", e)
243
+ return {
244
+ "languages": [],
245
+ "provider": "error",
246
+ "enabled": False,
247
+ "error": str(e)
248
+ }
249
 
250
  @router.get("/stt/status")
251
  async def get_stt_status():