Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +7 -8
stt/stt_google.py
CHANGED
@@ -65,9 +65,11 @@ class GoogleSTT(STTInterface):
|
|
65 |
|
66 |
# Default to the language itself if not in map
|
67 |
return language_map.get(language, language)
|
68 |
-
|
69 |
async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
|
|
|
70 |
try:
|
|
|
71 |
if not audio_data:
|
72 |
log_warning("⚠️ No audio data provided")
|
73 |
return None
|
@@ -138,10 +140,7 @@ class GoogleSTT(STTInterface):
|
|
138 |
log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
|
139 |
return None
|
140 |
|
141 |
-
#
|
142 |
-
wav_audio = self._convert_to_wav(audio_data, config.sample_rate)
|
143 |
-
|
144 |
-
# Configure recognition
|
145 |
recognition_config = RecognitionConfig(
|
146 |
encoding=RecognitionConfig.AudioEncoding.LINEAR16,
|
147 |
sample_rate_hertz=16000,
|
@@ -151,11 +150,11 @@ class GoogleSTT(STTInterface):
|
|
151 |
enable_automatic_punctuation=True,
|
152 |
)
|
153 |
|
154 |
-
#
|
155 |
-
audio = RecognitionAudio(content=
|
156 |
|
157 |
# Perform synchronous recognition
|
158 |
-
log_info(f"🔄 Sending
|
159 |
response = self.client.recognize(config=recognition_config, audio=audio)
|
160 |
|
161 |
# ✅ Detaylı response analizi
|
|
|
65 |
|
66 |
# Default to the language itself if not in map
|
67 |
return language_map.get(language, language)
|
68 |
+
|
69 |
async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
|
70 |
+
"""Transcribe audio data using Google Cloud Speech API"""
|
71 |
try:
|
72 |
+
# Check if we have audio to transcribe
|
73 |
if not audio_data:
|
74 |
log_warning("⚠️ No audio data provided")
|
75 |
return None
|
|
|
140 |
log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
|
141 |
return None
|
142 |
|
143 |
+
# ✅ Configure recognition - RAW PCM için
|
|
|
|
|
|
|
144 |
recognition_config = RecognitionConfig(
|
145 |
encoding=RecognitionConfig.AudioEncoding.LINEAR16,
|
146 |
sample_rate_hertz=16000,
|
|
|
150 |
enable_automatic_punctuation=True,
|
151 |
)
|
152 |
|
153 |
+
# ✅ RAW audio gönder, WAV conversion yapmadan
|
154 |
+
audio = RecognitionAudio(content=audio_data) # Direkt raw PCM
|
155 |
|
156 |
# Perform synchronous recognition
|
157 |
+
log_info(f"🔄 Sending {len(audio_data)} bytes RAW PCM to Google Cloud Speech API...")
|
158 |
response = self.client.recognize(config=recognition_config, audio=audio)
|
159 |
|
160 |
# ✅ Detaylı response analizi
|