Spaces:

Dy3257
/

translate

Sleeping

App Files Files Community

Dy3257 commited on May 18

Commit

535a983

•

1 Parent(s): 12daa16

Upload 19 files

Browse files

Files changed (20) hide show

.gitattributes +2 -0
app.py +55 -0
en-zh_model/config.json +10 -0
en-zh_model/model.bin +3 -0
en-zh_model/source_vocabulary.json +0 -0
en-zh_model/target_vocabulary.json +3 -0
en2zh_cmodel/config.json +10 -0
en2zh_cmodel/model.bin +3 -0
en2zh_cmodel/source_vocabulary.json +0 -0
en2zh_cmodel/target_vocabulary.json +0 -0
requirements.txt +1 -0
split.py +21 -0
zh-en_model/config.json +10 -0
zh-en_model/model.bin +3 -0
zh-en_model/source_vocabulary.json +3 -0
zh-en_model/target_vocabulary.json +0 -0
zh2en_cmodel/config.json +10 -0
zh2en_cmodel/model.bin +3 -0
zh2en_cmodel/source_vocabulary.json +0 -0
zh2en_cmodel/target_vocabulary.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+en-zh_model/target_vocabulary.json filter=lfs diff=lfs merge=lfs -text
+zh-en_model/source_vocabulary.json filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+#该应用创建工具共包含三个区域，顶部工具栏，左侧代码区，右侧交互效果区，其中右侧交互效果是通过左侧代码生成的，存在对照关系。
+#顶部工具栏：运行、保存、新开浏览器打开、实时预览开关，针对运行和在浏览器打开选项进行重要说明：
+#[运行]：交互效果并非实时更新，代码变更后，需点击运行按钮获得最新交互效果。
+#[在浏览器打开]：新建页面查看交互效果。
+#以下为应用创建工具的示例代码
+import gradio as gr
+import ctranslate2
+from split import split_string
+translator_zh2en = ctranslate2.Translator("zh-en_model/", device="cpu")##路径
+translator2_zh2en = ctranslate2.Translator("zh2en_cmodel/", device="cpu")##路径
+translator_en2zh = ctranslate2.Translator("en-zh_model/", device="cpu")##路径
+translator2_en2zh = ctranslate2.Translator("en2zh_cmodel", device="cpu")##路径
+def translate(input_tokens, input_tokens2, mode):
+    input_tokens = input_tokens.split()
+    input_tokens2 = input_tokens2.split()
+    source = split_string(input_tokens)
+    lenth = len(source)
+    source2 = split_string(input_tokens2)
+    lenth2 = len(source2)
+    results = []
+    results2 = []
+    if mode == "汉译英" :
+        results = translator_zh2en.translate_batch(source)##翻译的分词分句
+        results2 = translator2_zh2en.translate_batch(source2)##翻译的分词分句
+    else :
+        results = translator_en2zh.translate_batch(source)##翻译的分词分句
+        results2 = translator2_en2zh.translate_batch(source2)##翻译的分词分句
+    target = []
+    target2 = []
+    for i in range(0, lenth, 1):
+        target = target + results[i].hypotheses[0]
+    for i in range(0, lenth2, 1):
+        target2 = target2 + results2[i].hypotheses[0]
+    #print(results[0].hypotheses[0])##results[0]为第0句，hypotheses[0]保持0
+    ##print(results[1].hypotheses[0])
+    #return results[0].hypotheses[0]
+    return ' '.join(target),' '.join(target2)
+demo = gr.Interface(fn=translate,
+                     inputs=["text", "text", gr.Dropdown(["汉译英", "英译汉"])],
+                     outputs=["text", "text"],)
+demo.launch()

en-zh_model/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": false,
+  "bos_token": "<s>",
+  "decoder_start_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

en-zh_model/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48dcd96223ed75212fa052d1ea1cf19346c3e529e2e7ee21eb9b35dcb71df4d6
+size 3465739918

en-zh_model/source_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

en-zh_model/target_vocabulary.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17d939e964beab69401ddbc855fd6122007754cab6313906d4db73c351522894
+size 11621611

en2zh_cmodel/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": true,
+  "bos_token": "<s>",
+  "decoder_start_token": "</s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

en2zh_cmodel/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:151b3fde80e90cdd20552f0cf34e65a26b61e350405f7ed5d738b912544fd6bf
+size 468479022

en2zh_cmodel/source_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

en2zh_cmodel/target_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ctranslate2==4.1.0

split.py ADDED Viewed

	@@ -0,0 +1,21 @@

+def split_string(tokens):
+    result = []
+    done = 1
+    while(len(tokens) > 10 and done):
+        done = 0
+        for i in range(10, len(tokens), 1):
+            if i > 30 :
+                if tokens[i] in [".", "。", "!", "！", "?", "？", ",", "，", "……"]:
+                    result.append(tokens[:i+1])
+                    tokens = tokens[i+1:]
+                    done = 1
+                    break
+            else :
+                if tokens[i] in [".", "。", "!", "！", "?","？"] :
+                    result.append(tokens[:i+1])
+                    tokens = tokens[i+1:]
+                    done = 1
+                    break
+    if len(tokens) > 0 :
+        result.append(tokens)
+    return result

zh-en_model/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": false,
+  "bos_token": "<s>",
+  "decoder_start_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

zh-en_model/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03bfefa51ffe9e9e4e5f588373aaa81440eea4f082b8771a2e1523bfa81b581d
+size 3055339918

zh-en_model/source_vocabulary.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17d939e964beab69401ddbc855fd6122007754cab6313906d4db73c351522894
+size 11621611

zh-en_model/target_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

zh2en_cmodel/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": true,
+  "bos_token": "<s>",
+  "decoder_start_token": "</s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

zh2en_cmodel/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22e8c2c3b0afd5be0941250da5ded3ae3b51dd08667f55d9b072f41fb9a5e8c9
+size 458517550

zh2en_cmodel/source_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

zh2en_cmodel/target_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff