Update upload_suggestions.py
Browse files- upload_suggestions.py +62 -15
upload_suggestions.py
CHANGED
@@ -3,15 +3,13 @@ import sqlite3
|
|
3 |
import json
|
4 |
import hashlib
|
5 |
from datetime import datetime
|
6 |
-
from huggingface_hub import HfApi
|
7 |
|
8 |
# Settings
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
10 |
-
REPO_ID = "axxam/libretranslate-suggestions"
|
11 |
DEST_PATH_IN_REPO = "suggestions.json"
|
12 |
REPO_TYPE = "dataset"
|
13 |
-
|
14 |
-
# Safe paths
|
15 |
JSON_OUTPUT_PATH = "/tmp/suggestions.json"
|
16 |
CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
|
17 |
|
@@ -55,23 +53,66 @@ def extract_suggestions(db_path):
|
|
55 |
print(f"SQLite error: {e}")
|
56 |
return suggestions
|
57 |
|
58 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
existing = {}
|
60 |
-
|
|
|
61 |
try:
|
62 |
-
with open(
|
63 |
for item in json.load(f):
|
64 |
-
existing[item["id"]] =
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
except Exception as e:
|
66 |
print(f"Failed to read existing JSON: {e}")
|
67 |
|
|
|
68 |
for s in suggestions:
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
with open(
|
72 |
-
json.dump(
|
73 |
|
74 |
-
return
|
75 |
|
76 |
def get_checksum(filepath):
|
77 |
if not os.path.exists(filepath):
|
@@ -80,6 +121,9 @@ def get_checksum(filepath):
|
|
80 |
return hashlib.md5(f.read()).hexdigest()
|
81 |
|
82 |
def upload_if_updated(json_path):
|
|
|
|
|
|
|
83 |
new_checksum = get_checksum(json_path)
|
84 |
old_checksum = None
|
85 |
|
@@ -104,9 +148,11 @@ def upload_if_updated(json_path):
|
|
104 |
except Exception as e:
|
105 |
print("Upload failed:", e)
|
106 |
else:
|
107 |
-
print("No changes in
|
108 |
|
109 |
def main():
|
|
|
|
|
110 |
if not HF_TOKEN:
|
111 |
print("HF_TOKEN not set β skipping upload.")
|
112 |
return
|
@@ -120,8 +166,9 @@ def main():
|
|
120 |
print("No suggestions found β skipping.")
|
121 |
return
|
122 |
|
123 |
-
|
124 |
-
|
|
|
125 |
|
126 |
if __name__ == "__main__":
|
127 |
main()
|
|
|
3 |
import json
|
4 |
import hashlib
|
5 |
from datetime import datetime
|
6 |
+
from huggingface_hub import HfApi, hf_hub_download
|
7 |
|
8 |
# Settings
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
10 |
+
REPO_ID = "axxam/libretranslate-suggestions"
|
11 |
DEST_PATH_IN_REPO = "suggestions.json"
|
12 |
REPO_TYPE = "dataset"
|
|
|
|
|
13 |
JSON_OUTPUT_PATH = "/tmp/suggestions.json"
|
14 |
CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
|
15 |
|
|
|
53 |
print(f"SQLite error: {e}")
|
54 |
return suggestions
|
55 |
|
56 |
+
def download_existing_suggestions():
|
57 |
+
try:
|
58 |
+
path = hf_hub_download(
|
59 |
+
repo_id=REPO_ID,
|
60 |
+
repo_type=REPO_TYPE,
|
61 |
+
filename=DEST_PATH_IN_REPO,
|
62 |
+
token=HF_TOKEN,
|
63 |
+
local_dir="/tmp",
|
64 |
+
local_dir_use_symlinks=False
|
65 |
+
)
|
66 |
+
print("Downloaded existing suggestions from Hugging Face.")
|
67 |
+
return path
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Could not fetch existing suggestions from HF: {e}")
|
70 |
+
return None
|
71 |
+
|
72 |
+
def merge_with_existing(suggestions, existing_json_path):
|
73 |
existing = {}
|
74 |
+
|
75 |
+
if existing_json_path and os.path.exists(existing_json_path):
|
76 |
try:
|
77 |
+
with open(existing_json_path, "r", encoding="utf-8") as f:
|
78 |
for item in json.load(f):
|
79 |
+
existing[item["id"]] = {
|
80 |
+
"source_text": item["source_text"],
|
81 |
+
"suggested_text": item["suggested_text"],
|
82 |
+
"source_lang": item["source_lang"],
|
83 |
+
"target_lang": item["target_lang"],
|
84 |
+
"timestamp": item.get("timestamp", datetime.now().isoformat())
|
85 |
+
}
|
86 |
except Exception as e:
|
87 |
print(f"Failed to read existing JSON: {e}")
|
88 |
|
89 |
+
changed = False
|
90 |
for s in suggestions:
|
91 |
+
s_clean = {
|
92 |
+
"source_text": s["source_text"],
|
93 |
+
"suggested_text": s["suggested_text"],
|
94 |
+
"source_lang": s["source_lang"],
|
95 |
+
"target_lang": s["target_lang"],
|
96 |
+
}
|
97 |
+
|
98 |
+
existing_entry = existing.get(s["id"])
|
99 |
+
if not existing_entry:
|
100 |
+
changed = True
|
101 |
+
existing[s["id"]] = {**s_clean, "timestamp": datetime.now().isoformat()}
|
102 |
+
|
103 |
+
if not changed:
|
104 |
+
print("No new suggestions β skipping write/upload.")
|
105 |
+
return None
|
106 |
+
|
107 |
+
# Write merged file
|
108 |
+
final = []
|
109 |
+
for id_, data in existing.items():
|
110 |
+
final.append({**data, "id": id_})
|
111 |
|
112 |
+
with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
|
113 |
+
json.dump(final, f, indent=2, ensure_ascii=False)
|
114 |
|
115 |
+
return JSON_OUTPUT_PATH
|
116 |
|
117 |
def get_checksum(filepath):
|
118 |
if not os.path.exists(filepath):
|
|
|
121 |
return hashlib.md5(f.read()).hexdigest()
|
122 |
|
123 |
def upload_if_updated(json_path):
|
124 |
+
if not json_path or not os.path.exists(json_path):
|
125 |
+
return
|
126 |
+
|
127 |
new_checksum = get_checksum(json_path)
|
128 |
old_checksum = None
|
129 |
|
|
|
148 |
except Exception as e:
|
149 |
print("Upload failed:", e)
|
150 |
else:
|
151 |
+
print("No changes in file β skipping upload.")
|
152 |
|
153 |
def main():
|
154 |
+
print(f"===== Application Startup at {datetime.now().isoformat()} =====")
|
155 |
+
|
156 |
if not HF_TOKEN:
|
157 |
print("HF_TOKEN not set β skipping upload.")
|
158 |
return
|
|
|
166 |
print("No suggestions found β skipping.")
|
167 |
return
|
168 |
|
169 |
+
existing_path = download_existing_suggestions()
|
170 |
+
merged_json = merge_with_existing(suggestions, existing_path)
|
171 |
+
upload_if_updated(merged_json)
|
172 |
|
173 |
if __name__ == "__main__":
|
174 |
main()
|