Spaces:

owaski-demo
/

Demo

Sleeping

App Files Files Community

owaski commited on Feb 27, 2023

Commit

766a3e3

1 Parent(s): 843cbe3

add mRASP2

Browse files

Files changed (1) hide show

app.py +66 -19

app.py CHANGED Viewed

@@ -19,12 +19,24 @@ language_id_lookup = {
             "French"    : "fr",
             }
 # The predict function. audio, language and mic_audio are all parameters directly passed by gradio
 # which means they are user inputted. They are specified in gr.inputs[] block at the bottom. The
 # gr.outputs[] block will specify the output type.
-def predict(audio, language, mic_audio=None):
     # checks if mic_audio is used, otherwise feeds model uploaded audio
     if mic_audio is not None:
@@ -43,22 +55,51 @@ def predict(audio, language, mic_audio=None):
     # if model is supposed to detect language, set outLanguage to None
     # otherwise set to specified language
-    if(language == "Detect Language"):
-        outLanguage = None
     else:
-        outLanguage = language_id_lookup[language.split()[0]]
     # Runs the audio through the whisper model and gets the DecodingResult object, which has the features:
     # audio_features (Tensor), language, language_probs, tokens, text, avg_logprob, no_speech_prob, temperature, compression_ratio
-    options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
     result = whisper.decode(model, mel, options)
-    outLanguage = result.language
-    # Returns the text and the language
-    return result.text, outLanguage
@@ -75,14 +116,20 @@ gr.Interface(
     fn=predict,
     inputs=[
         gr.Audio(label="Upload Speech", source="upload", type="filepath"),
-        gr.inputs.Dropdown(['Arabic Text',
-                            'Chinese Text',
-                            'English Text',
-                            'German Text',
-                            'Spanish Text',
-                            'Russian Text',
-                            'French Text',
-                            'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
         gr.Audio(label="Record Speech", source="microphone", type="filepath"),
     ],
     # To change to output audio, replace the outputs line with

             "French"    : "fr",
             }
+# load mRASP2
+os.system("git clone https://github.com/PANXiao1994/mRASP2.git")
+os.system('mv -n mRASP2/* ./')
+os.system("rm -rf mRASP2")
+os.system("pip install -r requirements.txt")
+os.system("git clone https://github.com/pytorch/fairseq")
+os.system("pip install --editable ./fairseq/")
+model_name = "6e6d_no_mono.pt"
+os.system("wget https://lf3-nlp-opensource.bytetos.com/obj/nlp-opensource/acl2021/mrasp2/" + model_name)
+os.system("wget https://lf3-nlp-opensource.bytetos.com/obj/nlp-opensource/acl2021/mrasp2/bpe_vocab")
+os.system("wget https://lf3-nlp-opensource.bytetos.com/obj/nlp-opensource/emnlp2020/mrasp/pretrain/dataset/codes.bpe.32000")
 # The predict function. audio, language and mic_audio are all parameters directly passed by gradio
 # which means they are user inputted. They are specified in gr.inputs[] block at the bottom. The
 # gr.outputs[] block will specify the output type.
+def predict(audio, src_language, tgt_language, mic_audio=None):
     # checks if mic_audio is used, otherwise feeds model uploaded audio
     if mic_audio is not None:
     # if model is supposed to detect language, set outLanguage to None
     # otherwise set to specified language
+    if(src_language == "Detect Language"):
+        src_language = None
     else:
+        src_language = language_id_lookup[src_language.split()[0]]
     # Runs the audio through the whisper model and gets the DecodingResult object, which has the features:
     # audio_features (Tensor), language, language_probs, tokens, text, avg_logprob, no_speech_prob, temperature, compression_ratio
+    options = whisper.DecodingOptions(fp16 = False, language = src_language)
     result = whisper.decode(model, mel, options)
+    if src_language is None:
+        src_language = result.language
+    with open("input." + src_language, 'w') as w:
+        w.write(result.text)
+    with open("input." + tgt_language, 'w') as w:
+        w.write('LANG_TOK_' + src_language.upper())
+    os.system("python fairseq/fairseq_cli/preprocess.py --dataset-impl raw \
+              --srcdict bpe_vocab --tgtdict bpe_vocab --testpref input -s {} -t {}".format( \
+        src_language, tgt_language))
+    os.system("python fairseq/fairseq_cli/interactive.py /mnt/data2/siqiouyang/demo/mRASP2/data-bin \
+              --user-dir mcolt \
+              -s zh \
+              -t en \
+              --skip-invalid-size-inputs-valid-test \
+              --path {} \
+              --max-tokens 1024 \
+              --task translation_w_langtok \
+              --lang-prefix-tok \"LANG_TOK_{}\" \
+              --max-source-positions 1024 \
+              --max-target-positions 1024 \
+              --nbest 1 \
+              --bpe subword_nmt \
+              --bpe-codes codes.bpe.32000 \
+              --post-process --tokenizer moses \
+              --input input.{} | grep -E '[D]-[0-9]+' > output".format(
+        model_name, tgt_language.upper(), src_language))
+    with open("output", 'r') as r:
+        translation = (' '.join(r.readline().split(' ')[3:])).strip()
+    # Returns the text
+    return translation
     fn=predict,
     inputs=[
         gr.Audio(label="Upload Speech", source="upload", type="filepath"),
+        gr.inputs.Dropdown(['Arabic',
+                            'Chinese',
+                            'English',
+                            'Spanish',
+                            'Russian',
+                            'French',
+                            'Detect Language'], type="value", default='English', label="Select the language of input"),
+        gr.inputs.Dropdown(['Arabic',
+                            'Chinese',
+                            'English',
+                            'Spanish',
+                            'Russian',
+                            'French',
+                            'Detect Language'], type="value", default='English', label="Select the language of output"),
         gr.Audio(label="Record Speech", source="microphone", type="filepath"),
     ],
     # To change to output audio, replace the outputs line with