BounharAbdelaziz commited on
Commit
70e7462
·
verified ·
1 Parent(s): 2c7bfb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -18
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
3
  import os
4
  import torch
5
  import spaces
 
 
 
6
 
7
  # Define model paths
8
  MODEL_PATHS = {
@@ -12,14 +15,25 @@ MODEL_PATHS = {
12
  "Terjman-Supreme-v2": "BounharAbdelaziz/Terjman-Supreme-v2.0"
13
  }
14
 
15
- # Load environment token
16
  TOKEN = os.environ['TOKEN']
17
 
18
- # Preload models and tokenizers
 
 
 
 
 
 
 
 
 
 
19
  def preload_models():
 
20
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
21
  print(f"[INFO] Using device: {device}")
22
-
23
  # Load Nano and Large models
24
  nano_large_models = {}
25
  for model_name in ["Terjman-Nano-v2", "Terjman-Large-v2"]:
@@ -31,7 +45,7 @@ def preload_models():
31
  device=device if device.startswith("cuda") else -1
32
  )
33
  nano_large_models[model_name] = translator
34
-
35
  # Load Ultra and Supreme models
36
  ultra_supreme_models = {}
37
  for model_name in ["Terjman-Ultra-v2", "Terjman-Supreme-v2"]:
@@ -47,15 +61,46 @@ def preload_models():
47
  tgt_lang="ary_Arab"
48
  )
49
  ultra_supreme_models[model_name] = translator
50
-
51
  return nano_large_models, ultra_supreme_models
52
 
53
- # Preload all models
54
- nano_large_models, ultra_supreme_models = preload_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Translation function for Nano and Large models
57
  @spaces.GPU
58
  def translate_nano_large(text, model_name):
 
59
  translator = nano_large_models[model_name]
60
  translated = translator(
61
  text,
@@ -70,47 +115,94 @@ def translate_nano_large(text, model_name):
70
  )
71
  return translated[0]["translation_text"]
72
 
73
- # Translation function for Ultra and Supreme models
74
  @spaces.GPU
75
  def translate_ultra_supreme(text, model_name):
 
76
  translator = ultra_supreme_models[model_name]
77
  translation = translator(text)[0]['translation_text']
78
  return translation
79
 
80
- # Main translation function
81
  def translate_text(text, model_choice):
 
 
 
 
 
 
 
 
82
  if model_choice in ["Terjman-Nano-v2", "Terjman-Large-v2"]:
83
- return translate_nano_large(text, model_choice)
84
  elif model_choice in ["Terjman-Ultra-v2", "Terjman-Supreme-v2"]:
85
- return translate_ultra_supreme(text, model_choice)
86
  else:
87
  return "Invalid model selection."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Gradio app
90
  def gradio_app():
91
  with gr.Blocks() as app:
92
  gr.Markdown("# 🇲🇦 Terjman-v2")
93
  gr.Markdown("Choose a model and enter the English text you want to translate to Moroccan Darija.")
94
-
95
  model_choice = gr.Dropdown(
96
  label="Select Model",
97
  choices=["Terjman-Nano-v2", "Terjman-Large-v2", "Terjman-Ultra-v2", "Terjman-Supreme-v2"],
98
  value="Terjman-Ultra-v2"
99
  )
 
100
  input_text = gr.Textbox(label="Input Text", placeholder="Enter text to translate...", lines=3)
101
  output_text = gr.Textbox(label="Translated Text", interactive=False, lines=3)
 
102
  translate_button = gr.Button("Translate")
103
-
 
 
 
104
  # Link input and output
 
 
 
 
105
  translate_button.click(
106
- fn=translate_text,
107
  inputs=[input_text, model_choice],
108
- outputs=output_text
109
  )
110
-
 
 
 
 
 
 
 
 
 
111
  return app
112
 
113
  # Run the app
114
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
115
  app = gradio_app()
116
  app.launch()
 
3
  import os
4
  import torch
5
  import spaces
6
+ from datasets import Dataset
7
+ import time
8
+ import datetime
9
 
10
  # Define model paths
11
  MODEL_PATHS = {
 
15
  "Terjman-Supreme-v2": "BounharAbdelaziz/Terjman-Supreme-v2.0"
16
  }
17
 
18
+ # Load environment tokens
19
  TOKEN = os.environ['TOKEN']
20
 
21
+ # Dataset configuration
22
+ DATASET_REPO = "BounharAbdelaziz/terjman-v2-live-translations"
23
+ # Number of translations to collect before pushing
24
+ BATCH_SIZE = 10
25
+ # Time in seconds between pushes (1 hour)
26
+ UPDATE_INTERVAL = 3600
27
+
28
+ # Initialize dataset tracking
29
+ translations_buffer = []
30
+ last_push_time = time.time()
31
+
32
  def preload_models():
33
+ """ Preload models and tokenizers """
34
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
35
  print(f"[INFO] Using device: {device}")
36
+
37
  # Load Nano and Large models
38
  nano_large_models = {}
39
  for model_name in ["Terjman-Nano-v2", "Terjman-Large-v2"]:
 
45
  device=device if device.startswith("cuda") else -1
46
  )
47
  nano_large_models[model_name] = translator
48
+
49
  # Load Ultra and Supreme models
50
  ultra_supreme_models = {}
51
  for model_name in ["Terjman-Ultra-v2", "Terjman-Supreme-v2"]:
 
61
  tgt_lang="ary_Arab"
62
  )
63
  ultra_supreme_models[model_name] = translator
64
+
65
  return nano_large_models, ultra_supreme_models
66
 
67
+ def push_to_hf_dataset():
68
+ """ Save translations in HF dataset for monitoring """
69
+ global translations_buffer, last_push_time
70
+
71
+ if not translations_buffer:
72
+ return
73
+
74
+ try:
75
+ print(f"[INFO] Pushing {len(translations_buffer)} translations to Hugging Face dataset...")
76
+
77
+ # Create dataset from buffer
78
+ ds = Dataset.from_dict({
79
+ "source_text": [item["source_text"] for item in translations_buffer],
80
+ "translated_text": [item["translated_text"] for item in translations_buffer],
81
+ "model_used": [item["model_used"] for item in translations_buffer],
82
+ "timestamp": [item["timestamp"] for item in translations_buffer]
83
+ })
84
+
85
+ # Push to hub
86
+ ds.push_to_hub(
87
+ DATASET_REPO,
88
+ token=TOKEN,
89
+ split=f"translations_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}",
90
+ private=True,
91
+ )
92
+
93
+ # Clear buffer and reset timer
94
+ translations_buffer = []
95
+ last_push_time = time.time()
96
+ print("[INFO] Successfully pushed translations to Hugging Face dataset")
97
+
98
+ except Exception as e:
99
+ print(f"[ERROR] Failed to push dataset to Hugging Face: {str(e)}")
100
 
 
101
  @spaces.GPU
102
  def translate_nano_large(text, model_name):
103
+ """ Translation function for Nano and Large models """
104
  translator = nano_large_models[model_name]
105
  translated = translator(
106
  text,
 
115
  )
116
  return translated[0]["translation_text"]
117
 
 
118
  @spaces.GPU
119
  def translate_ultra_supreme(text, model_name):
120
+ """ Translation function for Ultra and Supreme models """
121
  translator = ultra_supreme_models[model_name]
122
  translation = translator(text)[0]['translation_text']
123
  return translation
124
 
 
125
  def translate_text(text, model_choice):
126
+ """ Main translation function """
127
+ global translations_buffer, last_push_time
128
+
129
+ # Skip empty text
130
+ if not text or text.strip() == "":
131
+ return "Please enter text to translate."
132
+
133
+ # Perform translation
134
  if model_choice in ["Terjman-Nano-v2", "Terjman-Large-v2"]:
135
+ translation = translate_nano_large(text, model_choice)
136
  elif model_choice in ["Terjman-Ultra-v2", "Terjman-Supreme-v2"]:
137
+ translation = translate_ultra_supreme(text, model_choice)
138
  else:
139
  return "Invalid model selection."
140
+
141
+ # Add to buffer
142
+ translations_buffer.append({
143
+ "source_text": text,
144
+ "translated_text": translation,
145
+ "model_used": model_choice,
146
+ "timestamp": datetime.datetime.now().isoformat()
147
+ })
148
+
149
+ # Check if it's time to push to HF
150
+ current_time = time.time()
151
+ if len(translations_buffer) >= BATCH_SIZE or (current_time - last_push_time) >= UPDATE_INTERVAL:
152
+ push_to_hf_dataset()
153
+
154
+ return translation
155
 
 
156
  def gradio_app():
157
  with gr.Blocks() as app:
158
  gr.Markdown("# 🇲🇦 Terjman-v2")
159
  gr.Markdown("Choose a model and enter the English text you want to translate to Moroccan Darija.")
160
+
161
  model_choice = gr.Dropdown(
162
  label="Select Model",
163
  choices=["Terjman-Nano-v2", "Terjman-Large-v2", "Terjman-Ultra-v2", "Terjman-Supreme-v2"],
164
  value="Terjman-Ultra-v2"
165
  )
166
+
167
  input_text = gr.Textbox(label="Input Text", placeholder="Enter text to translate...", lines=3)
168
  output_text = gr.Textbox(label="Translated Text", interactive=False, lines=3)
169
+
170
  translate_button = gr.Button("Translate")
171
+
172
+ # Status message
173
+ status = gr.Markdown(f"Translations in buffer: 0")
174
+
175
  # Link input and output
176
+ def translate_and_update_status(text, model):
177
+ translation = translate_text(text, model)
178
+ return translation, f"Translations in buffer: {len(translations_buffer)} (Will push when reaching {BATCH_SIZE} or after {UPDATE_INTERVAL/3600} hours)"
179
+
180
  translate_button.click(
181
+ fn=translate_and_update_status,
182
  inputs=[input_text, model_choice],
183
+ outputs=[output_text, status]
184
  )
185
+
186
+ # Add a manual push button for admins
187
+ with gr.Accordion("Admin Controls", open=False):
188
+ push_button = gr.Button("Push Current Buffer to HF Dataset")
189
+ push_button.click(
190
+ fn=lambda: (push_to_hf_dataset(), f"Pushed translations to HF. Buffer size: {len(translations_buffer)}"),
191
+ inputs=[],
192
+ outputs=[status]
193
+ )
194
+
195
  return app
196
 
197
  # Run the app
198
  if __name__ == "__main__":
199
+ # Register shutdown handler to save remaining translations
200
+ import atexit
201
+ atexit.register(push_to_hf_dataset)
202
+
203
+ # Preload all models
204
+ nano_large_models, ultra_supreme_models = preload_models()
205
+
206
+ # Launch the app
207
  app = gradio_app()
208
  app.launch()