Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 8 days ago

Commit

89d0af3

verified ·

1 Parent(s): c4954b5

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +85 -85

stt/stt_google.py CHANGED Viewed

@@ -177,96 +177,96 @@ class GoogleSTT(STTInterface):
             log_error(f"❌ Silence trimming failed: {e}")
             return audio_data
-async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
-        """Transcribe audio data using Google Cloud Speech API"""
-        try:
-            # Check if we have audio to transcribe
-            if not audio_data:
-                log_warning("⚠️ No audio data provided")
-                return None
-            log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
-            # ✅ Audio analizi
-            self._analyze_audio_content(audio_data)
-            # ✅ Silence trimming ekle
-            trimmed_audio = self._trim_silence(audio_data)
-            if len(trimmed_audio) < 8000:  # 0.5 saniyeden az
-                log_warning("⚠️ Audio too short after trimming")
-                return None
-            # ✅ Test kodundan EXACT aynı format - wave modülü kullan
-            wav_audio = self._create_wav_like_test(trimmed_audio, config.sample_rate)
-            log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
-            # Configure recognition - TEST KODUNDAN EXACT AYNI
-            recognition_config = RecognitionConfig(
-                encoding=RecognitionConfig.AudioEncoding.LINEAR16,
-                sample_rate_hertz=config.sample_rate,
-                language_code="tr-TR",  # Hardcode tr-TR like test
-                audio_channel_count=1,
-                enable_separate_recognition_per_channel=False,
-            )
-            log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
-            # ✅ Create audio object with WAV data
-            audio = RecognitionAudio(content=wav_audio)
-            # Perform synchronous recognition
-            log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
-            response = self.client.recognize(config=recognition_config, audio=audio)
-            # ✅ Detaylı response analizi
-            log_debug(f"API Response: {response}")
-            log_info(f"🔍 Google response details:")
-            log_info(f"- Has results: {bool(response.results)}")
-            log_info(f"- Results count: {len(response.results)}")
-            # ✅ Request ID'yi logla
-            if hasattr(response, '_pb') and hasattr(response._pb, 'request_id'):
-                log_info(f"- Request ID: {response._pb.request_id}")
-            if hasattr(response, 'total_billed_time'):
-                billed_seconds = response.total_billed_time.total_seconds()
-                log_info(f"- Billed time: {billed_seconds}s")
-                # ✅ Eğer billed time 0 ise, Google hiç audio işlememiş demektir
-                if billed_seconds == 0:
-                    log_error("❌ Google didn't process any audio - possible format issue")
                     return None
-            else:
-                log_info(f"- Billed time: 0s (no audio processed)")
-            # Process results
-            if response.results:
-                for i, result in enumerate(response.results):
-                    log_debug(f"Result {i}: {result}")
-                    if result.alternatives:
-                        alternative = result.alternatives[0]
-                        transcription = TranscriptionResult(
-                            text=alternative.transcript,
-                            confidence=alternative.confidence,
-                            timestamp=datetime.now().timestamp(),
-                            language="tr-TR",
-                            word_timestamps=None
-                        )
-                        log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
-                        return transcription
-            log_warning("⚠️ No transcription results - Google couldn't recognize speech")
-            return None
-        except Exception as e:
-            log_error(f"❌ Error during transcription: {str(e)}")
-            import traceback
-            log_error(f"Traceback: {traceback.format_exc()}")
-            return None
     def _create_wav_like_test(self, audio_data: bytes, sample_rate: int) -> bytes:
         """Create WAV exactly like test code using wave module"""

             log_error(f"❌ Silence trimming failed: {e}")
             return audio_data
+    async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
+            """Transcribe audio data using Google Cloud Speech API"""
+            try:
+                # Check if we have audio to transcribe
+                if not audio_data:
+                    log_warning("⚠️ No audio data provided")
+                    return None
+                log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
+                # ✅ Audio analizi
+                self._analyze_audio_content(audio_data)
+                # ✅ Silence trimming ekle
+                trimmed_audio = self._trim_silence(audio_data)
+                if len(trimmed_audio) < 8000:  # 0.5 saniyeden az
+                    log_warning("⚠️ Audio too short after trimming")
                     return None
+                # ✅ Test kodundan EXACT aynı format - wave modülü kullan
+                wav_audio = self._create_wav_like_test(trimmed_audio, config.sample_rate)
+                log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
+                # Configure recognition - TEST KODUNDAN EXACT AYNI
+                recognition_config = RecognitionConfig(
+                    encoding=RecognitionConfig.AudioEncoding.LINEAR16,
+                    sample_rate_hertz=config.sample_rate,
+                    language_code="tr-TR",  # Hardcode tr-TR like test
+                    audio_channel_count=1,
+                    enable_separate_recognition_per_channel=False,
+                )
+                log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
+                # ✅ Create audio object with WAV data
+                audio = RecognitionAudio(content=wav_audio)
+                # Perform synchronous recognition
+                log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
+                response = self.client.recognize(config=recognition_config, audio=audio)
+                # ✅ Detaylı response analizi
+                log_debug(f"API Response: {response}")
+                log_info(f"🔍 Google response details:")
+                log_info(f"- Has results: {bool(response.results)}")
+                log_info(f"- Results count: {len(response.results)}")
+                # ✅ Request ID'yi logla
+                if hasattr(response, '_pb') and hasattr(response._pb, 'request_id'):
+                    log_info(f"- Request ID: {response._pb.request_id}")
+                if hasattr(response, 'total_billed_time'):
+                    billed_seconds = response.total_billed_time.total_seconds()
+                    log_info(f"- Billed time: {billed_seconds}s")
+                    # ✅ Eğer billed time 0 ise, Google hiç audio işlememiş demektir
+                    if billed_seconds == 0:
+                        log_error("❌ Google didn't process any audio - possible format issue")
+                        return None
+                else:
+                    log_info(f"- Billed time: 0s (no audio processed)")
+                # Process results
+                if response.results:
+                    for i, result in enumerate(response.results):
+                        log_debug(f"Result {i}: {result}")
+                        if result.alternatives:
+                            alternative = result.alternatives[0]
+                            transcription = TranscriptionResult(
+                                text=alternative.transcript,
+                                confidence=alternative.confidence,
+                                timestamp=datetime.now().timestamp(),
+                                language="tr-TR",
+                                word_timestamps=None
+                            )
+                            log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
+                            return transcription
+                log_warning("⚠️ No transcription results - Google couldn't recognize speech")
+                return None
+            except Exception as e:
+                log_error(f"❌ Error during transcription: {str(e)}")
+                import traceback
+                log_error(f"Traceback: {traceback.format_exc()}")
+                return None
     def _create_wav_like_test(self, audio_data: bytes, sample_rate: int) -> bytes:
         """Create WAV exactly like test code using wave module"""