Spaces:
Paused
Paused
Update backend/main.py
Browse filesPassing sampling rate parameter to asr processor
- backend/main.py +1 -1
backend/main.py
CHANGED
@@ -292,7 +292,7 @@ async def incoming_audio(sid, data, call_id):
|
|
292 |
tgt_sid = next(id for id in rooms[call_id] if id != sid)
|
293 |
tgt_lang = clients[tgt_sid].target_language
|
294 |
# following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
|
295 |
-
output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt").to(device)
|
296 |
model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
|
297 |
asr_text = processor.decode(model_output, skip_special_tokens=True)
|
298 |
print(f"ASR TEXT = {asr_text}")
|
|
|
292 |
tgt_sid = next(id for id in rooms[call_id] if id != sid)
|
293 |
tgt_lang = clients[tgt_sid].target_language
|
294 |
# following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
|
295 |
+
output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt", sampling_rate=TARGET_SAMPLING_RATE).to(device)
|
296 |
model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
|
297 |
asr_text = processor.decode(model_output, skip_special_tokens=True)
|
298 |
print(f"ASR TEXT = {asr_text}")
|