eaysu commited on
Commit
93f6210
·
1 Parent(s): e50ecc5

nltk punkt downloaded

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -2,9 +2,13 @@ import gradio as gr
2
  from transformers import MarianMTModel, MarianTokenizer
3
  import torch
4
  import nltk
 
5
 
6
  # Download punkt for sentence tokenization
7
- nltk.download('punkt')
 
 
 
8
 
9
  from nltk.tokenize import sent_tokenize
10
 
@@ -59,7 +63,20 @@ def translate_text(model_name, text):
59
  model_options = [
60
  ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
61
  ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
62
- # Add other models here...
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ]
64
 
65
  # Create Gradio interface
 
2
  from transformers import MarianMTModel, MarianTokenizer
3
  import torch
4
  import nltk
5
+ import os
6
 
7
  # Download punkt for sentence tokenization
8
+ nltk_data_path = "/tmp/nltk_data" # Use "./nltk_data" if you want persistence
9
+ os.makedirs(nltk_data_path, exist_ok=True)
10
+ nltk.download('punkt', download_dir=nltk_data_path)
11
+ nltk.data.path.append(nltk_data_path)
12
 
13
  from nltk.tokenize import sent_tokenize
14
 
 
63
  model_options = [
64
  ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
65
  ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
66
+ ("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
67
+ ("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
68
+ ("English to German", "Helsinki-NLP/opus-mt-en-de"),
69
+ ("German to English", "Helsinki-NLP/opus-mt-de-en"),
70
+ ("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
71
+ ("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
72
+ ("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
73
+ ("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
74
+ ("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
75
+ ("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
76
+ ("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
77
+ ("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
78
+ ("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
79
+ ("Chinese to English", "Helsinki-NLP/opus-mt-zh-en")
80
  ]
81
 
82
  # Create Gradio interface