Spaces:
Running
on
Zero
Running
on
Zero
fix data lost
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
3 |
import os
|
4 |
import torch
|
5 |
import spaces
|
6 |
-
from datasets import Dataset
|
7 |
import time
|
8 |
import datetime
|
9 |
|
@@ -65,7 +65,7 @@ def preload_models():
|
|
65 |
return nano_large_models, ultra_supreme_models
|
66 |
|
67 |
def push_to_hf_dataset():
|
68 |
-
""" Save translations in HF dataset for monitoring """
|
69 |
global translations_buffer, last_push_time
|
70 |
|
71 |
if not translations_buffer:
|
@@ -75,18 +75,31 @@ def push_to_hf_dataset():
|
|
75 |
print(f"[INFO] Pushing {len(translations_buffer)} translations to Hugging Face dataset...")
|
76 |
|
77 |
# Create dataset from buffer
|
78 |
-
|
79 |
"source_text": [item["source_text"] for item in translations_buffer],
|
80 |
"translated_text": [item["translated_text"] for item in translations_buffer],
|
81 |
"model_used": [item["model_used"] for item in translations_buffer],
|
82 |
"timestamp": [item["timestamp"] for item in translations_buffer]
|
83 |
})
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Push to hub
|
86 |
-
|
87 |
DATASET_REPO,
|
88 |
token=TOKEN,
|
89 |
-
split=
|
90 |
private=True,
|
91 |
)
|
92 |
|
@@ -169,18 +182,15 @@ def gradio_app():
|
|
169 |
|
170 |
translate_button = gr.Button("Translate")
|
171 |
|
172 |
-
# Status message -> hidden
|
173 |
-
# status = gr.Markdown(f"Translations in buffer: 0")
|
174 |
-
|
175 |
# Link input and output
|
176 |
def translate_and_update_status(text, model):
|
177 |
translation = translate_text(text, model)
|
178 |
-
return translation
|
179 |
|
180 |
translate_button.click(
|
181 |
fn=translate_and_update_status,
|
182 |
inputs=[input_text, model_choice],
|
183 |
-
outputs=[output_text]
|
184 |
)
|
185 |
|
186 |
return app
|
|
|
3 |
import os
|
4 |
import torch
|
5 |
import spaces
|
6 |
+
from datasets import Dataset, load_dataset
|
7 |
import time
|
8 |
import datetime
|
9 |
|
|
|
65 |
return nano_large_models, ultra_supreme_models
|
66 |
|
67 |
def push_to_hf_dataset():
|
68 |
+
""" Save translations in HF dataset for monitoring, preserving previous data """
|
69 |
global translations_buffer, last_push_time
|
70 |
|
71 |
if not translations_buffer:
|
|
|
75 |
print(f"[INFO] Pushing {len(translations_buffer)} translations to Hugging Face dataset...")
|
76 |
|
77 |
# Create dataset from buffer
|
78 |
+
new_data = Dataset.from_dict({
|
79 |
"source_text": [item["source_text"] for item in translations_buffer],
|
80 |
"translated_text": [item["translated_text"] for item in translations_buffer],
|
81 |
"model_used": [item["model_used"] for item in translations_buffer],
|
82 |
"timestamp": [item["timestamp"] for item in translations_buffer]
|
83 |
})
|
84 |
|
85 |
+
# Try to load existing dataset
|
86 |
+
try:
|
87 |
+
existing_dataset = load_dataset(DATASET_REPO, split="live_translations", token=TOKEN)
|
88 |
+
print(f"[INFO] Loaded existing dataset with {len(existing_dataset)} entries")
|
89 |
+
|
90 |
+
# Concatenate existing data with new data
|
91 |
+
combined_dataset = existing_dataset.concatenate(new_data)
|
92 |
+
print(f"[INFO] Combined dataset now has {len(combined_dataset)} entries")
|
93 |
+
except Exception as e:
|
94 |
+
print(f"[INFO] No existing dataset found or error loading: {str(e)}")
|
95 |
+
print(f"[INFO] Creating new dataset")
|
96 |
+
combined_dataset = new_data
|
97 |
+
|
98 |
# Push to hub
|
99 |
+
combined_dataset.push_to_hub(
|
100 |
DATASET_REPO,
|
101 |
token=TOKEN,
|
102 |
+
split="live_translations",
|
103 |
private=True,
|
104 |
)
|
105 |
|
|
|
182 |
|
183 |
translate_button = gr.Button("Translate")
|
184 |
|
|
|
|
|
|
|
185 |
# Link input and output
|
186 |
def translate_and_update_status(text, model):
|
187 |
translation = translate_text(text, model)
|
188 |
+
return translation
|
189 |
|
190 |
translate_button.click(
|
191 |
fn=translate_and_update_status,
|
192 |
inputs=[input_text, model_choice],
|
193 |
+
outputs=[output_text]
|
194 |
)
|
195 |
|
196 |
return app
|