Spaces:
Runtime error
Runtime error
Update duplex.py
Browse files
duplex.py
CHANGED
@@ -19,6 +19,7 @@ from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2P
|
|
19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
21 |
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
|
|
22 |
|
23 |
HEADER = """
|
24 |
# Poor Man's Duplex
|
@@ -34,8 +35,8 @@ FOOTER = """
|
|
34 |
""".strip()
|
35 |
|
36 |
asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
37 |
-
model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es)
|
38 |
-
processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es)
|
39 |
asr_es = pipeline(
|
40 |
"automatic-speech-recognition",
|
41 |
model=model_instance_es,
|
@@ -44,7 +45,7 @@ asr_es = pipeline(
|
|
44 |
decoder=processor_es.decoder
|
45 |
)
|
46 |
tts_model_name = "facebook/tts_transformer-es-css10"
|
47 |
-
speak_es = gr.Interface.load(f"huggingface/{tts_model_name}")
|
48 |
transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
49 |
def generate_es(text, **kwargs):
|
50 |
# max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
|
@@ -68,13 +69,13 @@ asr_en = pipeline(
|
|
68 |
decoder=processor_en.decoder
|
69 |
)
|
70 |
tts_model_name = "facebook/fastspeech2-en-ljspeech"
|
71 |
-
speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
|
72 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
73 |
-
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
74 |
|
75 |
empty_audio = 'empty.flac'
|
76 |
sf.write(empty_audio, [], 16000)
|
77 |
-
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
|
78 |
|
79 |
def generate_en(text, **kwargs):
|
80 |
response = generate_iface(text)
|
|
|
19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
21 |
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
22 |
+
HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
|
23 |
|
24 |
HEADER = """
|
25 |
# Poor Man's Duplex
|
|
|
35 |
""".strip()
|
36 |
|
37 |
asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
38 |
+
model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
|
39 |
+
processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
|
40 |
asr_es = pipeline(
|
41 |
"automatic-speech-recognition",
|
42 |
model=model_instance_es,
|
|
|
45 |
decoder=processor_es.decoder
|
46 |
)
|
47 |
tts_model_name = "facebook/tts_transformer-es-css10"
|
48 |
+
speak_es = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
|
49 |
transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
50 |
def generate_es(text, **kwargs):
|
51 |
# max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
|
|
|
69 |
decoder=processor_en.decoder
|
70 |
)
|
71 |
tts_model_name = "facebook/fastspeech2-en-ljspeech"
|
72 |
+
speak_en = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
|
73 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
74 |
+
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B", api_key=HF_AUTH_TOKEN)
|
75 |
|
76 |
empty_audio = 'empty.flac'
|
77 |
sf.write(empty_audio, [], 16000)
|
78 |
+
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser", api_key=HF_AUTH_TOKEN)
|
79 |
|
80 |
def generate_en(text, **kwargs):
|
81 |
response = generate_iface(text)
|