Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import soundfile as sf
|
|
9 |
transcriber = pipeline("automatic-speech-recognition", model="facebook/s2t-small-librispeech-asr")
|
10 |
|
11 |
# Initialize LLM pipeline
|
12 |
-
generator = pipeline("text-generation", model="
|
13 |
|
14 |
# Initialize TTS tokenizer and model
|
15 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
@@ -33,7 +33,7 @@ def transcribe_and_generate_audio(audio):
|
|
33 |
outputs = model(**inputs)
|
34 |
waveform = outputs.waveform[0]
|
35 |
waveform_path = "output.wav"
|
36 |
-
sf.write(waveform_path, waveform.numpy(),
|
37 |
|
38 |
return waveform_path
|
39 |
|
|
|
9 |
transcriber = pipeline("automatic-speech-recognition", model="facebook/s2t-small-librispeech-asr")
|
10 |
|
11 |
# Initialize LLM pipeline
|
12 |
+
generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct")
|
13 |
|
14 |
# Initialize TTS tokenizer and model
|
15 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
|
|
33 |
outputs = model(**inputs)
|
34 |
waveform = outputs.waveform[0]
|
35 |
waveform_path = "output.wav"
|
36 |
+
sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
|
37 |
|
38 |
return waveform_path
|
39 |
|