ciyidogan commited on
Commit
b728d57
·
verified ·
1 Parent(s): 6abf273

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +7 -8
stt/stt_google.py CHANGED
@@ -65,9 +65,11 @@ class GoogleSTT(STTInterface):
65
 
66
  # Default to the language itself if not in map
67
  return language_map.get(language, language)
68
-
69
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
 
70
  try:
 
71
  if not audio_data:
72
  log_warning("⚠️ No audio data provided")
73
  return None
@@ -138,10 +140,7 @@ class GoogleSTT(STTInterface):
138
  log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
139
  return None
140
 
141
- # Convert to WAV format
142
- wav_audio = self._convert_to_wav(audio_data, config.sample_rate)
143
-
144
- # Configure recognition
145
  recognition_config = RecognitionConfig(
146
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
147
  sample_rate_hertz=16000,
@@ -151,11 +150,11 @@ class GoogleSTT(STTInterface):
151
  enable_automatic_punctuation=True,
152
  )
153
 
154
- # Create audio object
155
- audio = RecognitionAudio(content=wav_audio)
156
 
157
  # Perform synchronous recognition
158
- log_info(f"🔄 Sending audio to Google Cloud Speech API...")
159
  response = self.client.recognize(config=recognition_config, audio=audio)
160
 
161
  # ✅ Detaylı response analizi
 
65
 
66
  # Default to the language itself if not in map
67
  return language_map.get(language, language)
68
+
69
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
70
+ """Transcribe audio data using Google Cloud Speech API"""
71
  try:
72
+ # Check if we have audio to transcribe
73
  if not audio_data:
74
  log_warning("⚠️ No audio data provided")
75
  return None
 
140
  log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
141
  return None
142
 
143
+ # Configure recognition - RAW PCM için
 
 
 
144
  recognition_config = RecognitionConfig(
145
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
146
  sample_rate_hertz=16000,
 
150
  enable_automatic_punctuation=True,
151
  )
152
 
153
+ # RAW audio gönder, WAV conversion yapmadan
154
+ audio = RecognitionAudio(content=audio_data) # Direkt raw PCM
155
 
156
  # Perform synchronous recognition
157
+ log_info(f"🔄 Sending {len(audio_data)} bytes RAW PCM to Google Cloud Speech API...")
158
  response = self.client.recognize(config=recognition_config, audio=audio)
159
 
160
  # ✅ Detaylı response analizi