Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 12 days ago

Commit

7022f3d

verified ·

1 Parent(s): 5c3c2cf

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +38 -86

stt/stt_google.py CHANGED Viewed

@@ -347,10 +347,7 @@ class GoogleCloudSTT(STTInterface):
                 """Generate streaming requests"""
                 chunk_count = 0
                 total_bytes = 0
-                first_chunk_processed = False
-                last_chunk_time = time.time()
-                silence_timeout = 2.0  # 2 saniye sessizlik timeout
                 while not self.stop_event.is_set():
                     try:
                         chunk = self.audio_queue.get(timeout=0.1)
@@ -360,37 +357,23 @@ class GoogleCloudSTT(STTInterface):
                         chunk_count += 1
                         total_bytes += len(chunk)
-                        last_chunk_time = time.time()  # Update last chunk time
                         # İlk chunk'ta audio format kontrolü
                         if chunk_count == 1:
                             log_info(f"📤 First chunk - size: {len(chunk)} bytes")
-                            # Audio header kontrolü (WEBM magic bytes)
-                            if len(chunk) >= 4:
-                                if chunk[:4] == b'\x1a\x45\xdf\xa3':
-                                    log_info("✅ Valid WEBM header detected")
-                                    first_chunk_processed = True
-                                else:
-                                    log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
-                                    # Hatalı format, stream'i durdur
-                                    break
-                        # İlk chunk geçerliyse devam et
-                        if chunk_count == 1 and not first_chunk_processed:
-                            break
-                        # Her 100 chunk'ta durum raporu
-                        if chunk_count % 100 == 0:
-                            avg_chunk_size = total_bytes / chunk_count
-                            log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
                         yield speech.StreamingRecognizeRequest(audio_content=chunk)
                     except queue.Empty:
-                        # Check for silence timeout
-                        if time.time() - last_chunk_time > silence_timeout:
-                            log_info(f"🔇 Silence timeout reached ({silence_timeout}s), ending stream")
-                            break
                         continue
                     except Exception as e:
                         log_error(f"❌ Error in request generator: {e}")
@@ -398,108 +381,77 @@ class GoogleCloudSTT(STTInterface):
             # Create streaming client
             requests = request_generator()
             log_info("🎤 Creating Google STT streaming client...")
             try:
                 responses = self.client.streaming_recognize(
                     self.streaming_config,
                     requests,
-                    timeout=300 # 5 dakika timeout
                 )
                 log_info("✅ Google STT streaming client created")
-                # Response timeout kontrolü
-                last_response_time = time.time()
-                RESPONSE_TIMEOUT = 30  # 30 saniye içinde response gelmezse
-                # Process responses
-                response_count = 0
-                empty_response_count = 0
                 for response in responses:
-                    last_response_time = time.time()
-                    response_count += 1
-                    # Response type'ı logla
-                    if response_count == 1:
-                        log_info(f"📨 First response received from Google STT")
                     if self.stop_event.is_set():
-                        log_info("🛑 Stop event detected, breaking response loop")
                         break
-                    # Response içeriğini kontrol et
                     if not response.results:
-                        empty_response_count += 1
-                        if empty_response_count == 1:
-                            log_debug("📭 Received empty response (no results)")
                         continue
-                    for i, result in enumerate(response.results):
-                        log_debug(f"📋 Result {i}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
                         if not result.alternatives:
-                            log_debug(f"📋 Result {i} has no alternatives")
                             continue
-                        # İlk alternatifi al
                         alternative = result.alternatives[0]
-                        # Sadece anlamlı text'leri işle
                         if alternative.transcript.strip():
                             # Create transcription result
                             transcription = TranscriptionResult(
                                 text=alternative.transcript,
                                 is_final=result.is_final,
-                                confidence=alternative.confidence if hasattr(alternative, 'confidence') and alternative.confidence else 0.0,
                                 timestamp=datetime.now().timestamp()
                             )
                             # Put result in queue
                             self._put_result(transcription)
-                            # SADECE final result'ları logla
                             if result.is_final:
-                                log_info(f"🎯 GOOGLE STT FINAL: '{alternative.transcript}'")
-                                # ✅ Single utterance modunda stream otomatik kapanacak
                                 if self.streaming_config.single_utterance:
-                                    log_info("🏁 Single utterance completed - Stream will auto-close")
-                                    # Google STT single utterance modda otomatik kapatır
-                                    # Ama biz de clean bir şekilde çıkalım
-                                    self.is_streaming = False
-                                    return
-                        else:
-                            log_debug(f"📋 Result {i} has empty transcript")
-                            continue
-                    if time.time() - last_response_time > RESPONSE_TIMEOUT:
-                        log_error(f"❌ No response from Google STT for {RESPONSE_TIMEOUT} seconds")
-                log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
             except Exception as e:
                 error_msg = str(e)
-                # Detaylı hata mesajları
-                if "Exceeded maximum allowed stream duration" in error_msg:
-                    log_warning("⚠️ Stream duration limit exceeded (5 minutes). This is expected for long sessions.")
-                elif "Bad language code" in error_msg:
-                    log_error(f"❌ Invalid language code in STT config. Check locale settings.")
-                elif "invalid_argument" in error_msg:
-                    log_error(f"❌ Invalid STT configuration. Check encoding and sample rate.")
-                elif "Deadline Exceeded" in error_msg:
-                    log_error(f"❌ Google STT response timeout - possibly network issue or slow connection")
-                elif "503" in error_msg or "Service Unavailable" in error_msg:
-                    log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
                 else:
-                    log_error(f"❌ Google STT stream error: {error_msg}")
         except Exception as e:
-            log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc())
         finally:
             log_info("🎤 Google STT stream thread ended")
-            # Thread bittiğinde streaming flag'ini kapat
             self.is_streaming = False

                 """Generate streaming requests"""
                 chunk_count = 0
                 total_bytes = 0
                 while not self.stop_event.is_set():
                     try:
                         chunk = self.audio_queue.get(timeout=0.1)
                         chunk_count += 1
                         total_bytes += len(chunk)
                         # İlk chunk'ta audio format kontrolü
                         if chunk_count == 1:
                             log_info(f"📤 First chunk - size: {len(chunk)} bytes")
+                            if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
+                                log_info("✅ Valid WEBM header detected")
+                            else:
+                                log_error(f"❌ Invalid audio format")
+                                break
+                        # Her 50 chunk'ta durum raporu
+                        if chunk_count % 50 == 0:
+                            log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total")
                         yield speech.StreamingRecognizeRequest(audio_content=chunk)
                     except queue.Empty:
                         continue
                     except Exception as e:
                         log_error(f"❌ Error in request generator: {e}")
             # Create streaming client
             requests = request_generator()
             log_info("🎤 Creating Google STT streaming client...")
             try:
                 responses = self.client.streaming_recognize(
                     self.streaming_config,
                     requests,
+                    timeout=300
                 )
                 log_info("✅ Google STT streaming client created")
                 for response in responses:
                     if self.stop_event.is_set():
+                        log_info("🛑 Stop event detected")
                         break
+                    # Check for speech events (VAD)
+                    if hasattr(response, 'speech_event_type'):
+                        event_type = response.speech_event_type
+                        if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
+                            log_info("🏁 Google STT: End of single utterance detected")
+                            # Google otomatik olarak stream'i kapatacak
+                            break
+                    # Process results
                     if not response.results:
                         continue
+                    for result in response.results:
                         if not result.alternatives:
                             continue
                         alternative = result.alternatives[0]
                         if alternative.transcript.strip():
                             # Create transcription result
                             transcription = TranscriptionResult(
                                 text=alternative.transcript,
                                 is_final=result.is_final,
+                                confidence=getattr(alternative, 'confidence', 0.0),
                                 timestamp=datetime.now().timestamp()
                             )
                             # Put result in queue
                             self._put_result(transcription)
                             if result.is_final:
+                                log_info(f"🎯 FINAL TRANSCRIPT: '{alternative.transcript}'")
+                                # Single utterance modunda Google STT otomatik kapanır
                                 if self.streaming_config.single_utterance:
+                                    log_info("✅ Single utterance mode - Google STT will close stream")
+                                    # Loop otomatik sonlanacak
+                            else:
+                                log_debug(f"📝 Interim: '{alternative.transcript}'")
+                log_info("📊 Google STT stream ended normally")
             except Exception as e:
                 error_msg = str(e)
+                # Google STT'nin normal kapanma durumları
+                if "iterating requests" in error_msg:
+                    log_info("✅ Google STT stream closed normally (end of utterance)")
+                elif "Exceeded maximum allowed stream duration" in error_msg:
+                    log_warning("⚠️ Stream duration limit (5 min)")
                 else:
+                    log_error(f"❌ Google STT error: {error_msg}")
         except Exception as e:
+            log_error(f"❌ Fatal error in STT stream", error=str(e))
         finally:
             log_info("🎤 Google STT stream thread ended")
             self.is_streaming = False