ciyidogan commited on
Commit
98d7635
·
verified ·
1 Parent(s): dcc497e

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +45 -15
stt/stt_google.py CHANGED
@@ -140,7 +140,9 @@ class GoogleSTT(STTInterface):
140
  log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
141
  return None
142
 
143
- # Configure recognition - RAW PCM için
 
 
144
  recognition_config = RecognitionConfig(
145
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
146
  sample_rate_hertz=16000,
@@ -150,6 +152,9 @@ class GoogleSTT(STTInterface):
150
  enable_automatic_punctuation=True,
151
  )
152
 
 
 
 
153
  # ✅ RAW audio gönder, WAV conversion yapmadan
154
  audio = RecognitionAudio(content=audio_data) # Direkt raw PCM
155
 
@@ -201,20 +206,45 @@ class GoogleSTT(STTInterface):
201
  return None
202
 
203
  def _convert_to_wav(self, audio_data: bytes, sample_rate: int) -> bytes:
204
- """Convert raw PCM audio to WAV format"""
205
- # Create WAV file in memory
206
- wav_buffer = io.BytesIO()
207
-
208
- with wave.open(wav_buffer, 'wb') as wav_file:
209
- # Set WAV parameters
210
- wav_file.setnchannels(1) # Mono
211
- wav_file.setsampwidth(2) # 16-bit
212
- wav_file.setframerate(sample_rate)
213
- wav_file.writeframes(audio_data)
214
-
215
- # Get WAV data
216
- wav_buffer.seek(0)
217
- return wav_buffer.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  def get_supported_languages(self) -> List[str]:
220
  """Get list of supported language codes"""
 
140
  log_warning(f"⚠️ Audio is mostly zeros: {zero_count/total_samples:.1%}")
141
  return None
142
 
143
+ wav_audio = self._convert_to_wav(audio_data, 16000)
144
+
145
+ # Configure recognition
146
  recognition_config = RecognitionConfig(
147
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
148
  sample_rate_hertz=16000,
 
152
  enable_automatic_punctuation=True,
153
  )
154
 
155
+ # ✅ WAV audio gönder
156
+ audio = RecognitionAudio(content=wav_audio)
157
+
158
  # ✅ RAW audio gönder, WAV conversion yapmadan
159
  audio = RecognitionAudio(content=audio_data) # Direkt raw PCM
160
 
 
206
  return None
207
 
208
  def _convert_to_wav(self, audio_data: bytes, sample_rate: int) -> bytes:
209
+ """Convert raw PCM to proper WAV format"""
210
+ try:
211
+ import struct
212
+
213
+ # WAV file parameters
214
+ channels = 1
215
+ sample_width = 2 # 16-bit
216
+ frame_rate = sample_rate
217
+ audio_length = len(audio_data)
218
+
219
+ # Create proper WAV header
220
+ wav_header = struct.pack('<4sI4s4sIHHIIHH4sI',
221
+ b'RIFF', # ChunkID
222
+ 36 + audio_length, # ChunkSize
223
+ b'WAVE', # Format
224
+ b'fmt ', # Subchunk1ID
225
+ 16, # Subchunk1Size (PCM)
226
+ 1, # AudioFormat (PCM = 1)
227
+ channels, # NumChannels
228
+ frame_rate, # SampleRate
229
+ frame_rate * channels * sample_width, # ByteRate
230
+ channels * sample_width, # BlockAlign
231
+ sample_width * 8, # BitsPerSample
232
+ b'data', # Subchunk2ID
233
+ audio_length # Subchunk2Size
234
+ )
235
+
236
+ # Combine header and audio data
237
+ wav_data = wav_header + audio_data
238
+
239
+ log_info(f"🔧 WAV conversion: {len(audio_data)} PCM → {len(wav_data)} WAV")
240
+ log_info(f"🔧 WAV specs: {channels}ch, {frame_rate}Hz, {sample_width*8}bit")
241
+
242
+ return wav_data
243
+
244
+ except Exception as e:
245
+ log_error(f"WAV conversion failed: {e}")
246
+ # Fallback to raw PCM
247
+ return audio_data
248
 
249
  def get_supported_languages(self) -> List[str]:
250
  """Get list of supported language codes"""