Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +11 -13
stt/stt_google.py
CHANGED
@@ -343,30 +343,28 @@ class GoogleCloudSTT(STTInterface):
|
|
343 |
self.client = speech.SpeechClient()
|
344 |
log_info("β
Created new Google Speech client")
|
345 |
|
346 |
-
|
347 |
-
recognition_config = speech.RecognitionConfig(
|
348 |
encoding=self._get_encoding(config.encoding),
|
349 |
sample_rate_hertz=config.sample_rate,
|
350 |
language_code=config.language,
|
351 |
enable_automatic_punctuation=config.enable_punctuation,
|
352 |
-
model=
|
353 |
-
use_enhanced=
|
354 |
-
max_alternatives=1,
|
355 |
-
metadata=speech.RecognitionMetadata(
|
356 |
-
interaction_type=speech.RecognitionMetadata.InteractionType.VOICE_SEARCH,
|
357 |
-
microphone_distance=speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD,
|
358 |
-
recording_device_type=speech.RecognitionMetadata.RecordingDeviceType.PC,
|
359 |
-
)
|
360 |
)
|
361 |
|
362 |
-
# Create streaming config with VAD
|
363 |
self.streaming_config = speech.StreamingRecognitionConfig(
|
364 |
config=recognition_config,
|
365 |
interim_results=config.interim_results,
|
366 |
-
single_utterance=config.single_utterance
|
367 |
-
enable_voice_activity_events=True # β
VAD events enabled
|
368 |
)
|
369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
log_info(f"π Streaming config created: interim_results={config.interim_results}, "
|
371 |
f"single_utterance={config.single_utterance}, "
|
372 |
f"VAD_events=True")
|
|
|
343 |
self.client = speech.SpeechClient()
|
344 |
log_info("β
Created new Google Speech client")
|
345 |
|
346 |
+
recognition_config = speech.RecognitionConfig(
|
|
|
347 |
encoding=self._get_encoding(config.encoding),
|
348 |
sample_rate_hertz=config.sample_rate,
|
349 |
language_code=config.language,
|
350 |
enable_automatic_punctuation=config.enable_punctuation,
|
351 |
+
model="latest_long",
|
352 |
+
use_enhanced=True
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
)
|
354 |
|
|
|
355 |
self.streaming_config = speech.StreamingRecognitionConfig(
|
356 |
config=recognition_config,
|
357 |
interim_results=config.interim_results,
|
358 |
+
single_utterance=config.single_utterance
|
|
|
359 |
)
|
360 |
|
361 |
+
self.is_streaming = True
|
362 |
+
self.stop_event.clear()
|
363 |
+
|
364 |
+
# Start streaming thread
|
365 |
+
self.stream_thread = threading.Thread(target=self._run_stream)
|
366 |
+
self.stream_thread.start()
|
367 |
+
|
368 |
log_info(f"π Streaming config created: interim_results={config.interim_results}, "
|
369 |
f"single_utterance={config.single_utterance}, "
|
370 |
f"VAD_events=True")
|