Spaces:
Sleeping
Sleeping
eaysu
commited on
Commit
·
93f6210
1
Parent(s):
e50ecc5
nltk punkt downloaded
Browse files
app.py
CHANGED
@@ -2,9 +2,13 @@ import gradio as gr
|
|
2 |
from transformers import MarianMTModel, MarianTokenizer
|
3 |
import torch
|
4 |
import nltk
|
|
|
5 |
|
6 |
# Download punkt for sentence tokenization
|
7 |
-
|
|
|
|
|
|
|
8 |
|
9 |
from nltk.tokenize import sent_tokenize
|
10 |
|
@@ -59,7 +63,20 @@ def translate_text(model_name, text):
|
|
59 |
model_options = [
|
60 |
("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
|
61 |
("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
]
|
64 |
|
65 |
# Create Gradio interface
|
|
|
2 |
from transformers import MarianMTModel, MarianTokenizer
|
3 |
import torch
|
4 |
import nltk
|
5 |
+
import os
|
6 |
|
7 |
# Download punkt for sentence tokenization
|
8 |
+
nltk_data_path = "/tmp/nltk_data" # Use "./nltk_data" if you want persistence
|
9 |
+
os.makedirs(nltk_data_path, exist_ok=True)
|
10 |
+
nltk.download('punkt', download_dir=nltk_data_path)
|
11 |
+
nltk.data.path.append(nltk_data_path)
|
12 |
|
13 |
from nltk.tokenize import sent_tokenize
|
14 |
|
|
|
63 |
model_options = [
|
64 |
("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
|
65 |
("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
|
66 |
+
("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
|
67 |
+
("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
|
68 |
+
("English to German", "Helsinki-NLP/opus-mt-en-de"),
|
69 |
+
("German to English", "Helsinki-NLP/opus-mt-de-en"),
|
70 |
+
("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
|
71 |
+
("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
|
72 |
+
("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
|
73 |
+
("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
|
74 |
+
("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
|
75 |
+
("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
|
76 |
+
("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
|
77 |
+
("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
|
78 |
+
("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
|
79 |
+
("Chinese to English", "Helsinki-NLP/opus-mt-zh-en")
|
80 |
]
|
81 |
|
82 |
# Create Gradio interface
|