Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
-
from nemo.collections.asr.models import EncDecMultiTaskModel
|
5 |
-
from transformers import VitsTokenizer, VitsModel
|
6 |
|
7 |
# Load Canary ASR model
|
8 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
@@ -10,8 +8,8 @@ decode_cfg = canary_model.cfg.decoding
|
|
10 |
decode_cfg.beam.beam_size = 1
|
11 |
canary_model.change_decoding_strategy(decode_cfg)
|
12 |
|
13 |
-
# Load Phi-3 Mini-
|
14 |
-
phi_3_model_id = "microsoft/Phi-3-mini-
|
15 |
phi_3_pipeline = pipeline("text-generation", model=phi_3_model_id, trust_remote_code=True)
|
16 |
|
17 |
# Load VITS TTS model
|
@@ -23,7 +21,7 @@ def transcribe_audio(audio):
|
|
23 |
return transcribed_text
|
24 |
|
25 |
def generate_response(prompt):
|
26 |
-
response = phi_3_pipeline(prompt)[0]['generated_text']
|
27 |
return response
|
28 |
|
29 |
def synthesize_speech(text):
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import pipeline
|
|
|
|
|
4 |
|
5 |
# Load Canary ASR model
|
6 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
|
|
8 |
decode_cfg.beam.beam_size = 1
|
9 |
canary_model.change_decoding_strategy(decode_cfg)
|
10 |
|
11 |
+
# Load Phi-3 Mini-4K-Instruct LLM model
|
12 |
+
phi_3_model_id = "microsoft/Phi-3-mini-4k-instruct"
|
13 |
phi_3_pipeline = pipeline("text-generation", model=phi_3_model_id, trust_remote_code=True)
|
14 |
|
15 |
# Load VITS TTS model
|
|
|
21 |
return transcribed_text
|
22 |
|
23 |
def generate_response(prompt):
|
24 |
+
response = phi_3_pipeline(prompt, max_length=50, num_return_sequences=1)[0]['generated_text']
|
25 |
return response
|
26 |
|
27 |
def synthesize_speech(text):
|