axxam commited on
Commit
30ea2f3
Β·
verified Β·
1 Parent(s): d5f3a77

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +62 -15
upload_suggestions.py CHANGED
@@ -3,15 +3,13 @@ import sqlite3
3
  import json
4
  import hashlib
5
  from datetime import datetime
6
- from huggingface_hub import HfApi
7
 
8
  # Settings
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
- REPO_ID = "axxam/libretranslate-suggestions" # Dataset repo on HF
11
  DEST_PATH_IN_REPO = "suggestions.json"
12
  REPO_TYPE = "dataset"
13
-
14
- # Safe paths
15
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
16
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
17
 
@@ -55,23 +53,66 @@ def extract_suggestions(db_path):
55
  print(f"SQLite error: {e}")
56
  return suggestions
57
 
58
- def merge_with_existing(suggestions, json_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  existing = {}
60
- if os.path.exists(json_path):
 
61
  try:
62
- with open(json_path, "r", encoding="utf-8") as f:
63
  for item in json.load(f):
64
- existing[item["id"]] = item
 
 
 
 
 
 
65
  except Exception as e:
66
  print(f"Failed to read existing JSON: {e}")
67
 
 
68
  for s in suggestions:
69
- existing[s["id"]] = s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- with open(json_path, "w", encoding="utf-8") as f:
72
- json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
73
 
74
- return json_path
75
 
76
  def get_checksum(filepath):
77
  if not os.path.exists(filepath):
@@ -80,6 +121,9 @@ def get_checksum(filepath):
80
  return hashlib.md5(f.read()).hexdigest()
81
 
82
  def upload_if_updated(json_path):
 
 
 
83
  new_checksum = get_checksum(json_path)
84
  old_checksum = None
85
 
@@ -104,9 +148,11 @@ def upload_if_updated(json_path):
104
  except Exception as e:
105
  print("Upload failed:", e)
106
  else:
107
- print("No changes in suggestions.json β€” skipping upload.")
108
 
109
  def main():
 
 
110
  if not HF_TOKEN:
111
  print("HF_TOKEN not set β€” skipping upload.")
112
  return
@@ -120,8 +166,9 @@ def main():
120
  print("No suggestions found β€” skipping.")
121
  return
122
 
123
- json_path = merge_with_existing(suggestions, JSON_OUTPUT_PATH)
124
- upload_if_updated(json_path)
 
125
 
126
  if __name__ == "__main__":
127
  main()
 
3
  import json
4
  import hashlib
5
  from datetime import datetime
6
+ from huggingface_hub import HfApi, hf_hub_download
7
 
8
  # Settings
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
+ REPO_ID = "axxam/libretranslate-suggestions"
11
  DEST_PATH_IN_REPO = "suggestions.json"
12
  REPO_TYPE = "dataset"
 
 
13
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
14
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
15
 
 
53
  print(f"SQLite error: {e}")
54
  return suggestions
55
 
56
+ def download_existing_suggestions():
57
+ try:
58
+ path = hf_hub_download(
59
+ repo_id=REPO_ID,
60
+ repo_type=REPO_TYPE,
61
+ filename=DEST_PATH_IN_REPO,
62
+ token=HF_TOKEN,
63
+ local_dir="/tmp",
64
+ local_dir_use_symlinks=False
65
+ )
66
+ print("Downloaded existing suggestions from Hugging Face.")
67
+ return path
68
+ except Exception as e:
69
+ print(f"Could not fetch existing suggestions from HF: {e}")
70
+ return None
71
+
72
+ def merge_with_existing(suggestions, existing_json_path):
73
  existing = {}
74
+
75
+ if existing_json_path and os.path.exists(existing_json_path):
76
  try:
77
+ with open(existing_json_path, "r", encoding="utf-8") as f:
78
  for item in json.load(f):
79
+ existing[item["id"]] = {
80
+ "source_text": item["source_text"],
81
+ "suggested_text": item["suggested_text"],
82
+ "source_lang": item["source_lang"],
83
+ "target_lang": item["target_lang"],
84
+ "timestamp": item.get("timestamp", datetime.now().isoformat())
85
+ }
86
  except Exception as e:
87
  print(f"Failed to read existing JSON: {e}")
88
 
89
+ changed = False
90
  for s in suggestions:
91
+ s_clean = {
92
+ "source_text": s["source_text"],
93
+ "suggested_text": s["suggested_text"],
94
+ "source_lang": s["source_lang"],
95
+ "target_lang": s["target_lang"],
96
+ }
97
+
98
+ existing_entry = existing.get(s["id"])
99
+ if not existing_entry:
100
+ changed = True
101
+ existing[s["id"]] = {**s_clean, "timestamp": datetime.now().isoformat()}
102
+
103
+ if not changed:
104
+ print("No new suggestions β€” skipping write/upload.")
105
+ return None
106
+
107
+ # Write merged file
108
+ final = []
109
+ for id_, data in existing.items():
110
+ final.append({**data, "id": id_})
111
 
112
+ with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
113
+ json.dump(final, f, indent=2, ensure_ascii=False)
114
 
115
+ return JSON_OUTPUT_PATH
116
 
117
  def get_checksum(filepath):
118
  if not os.path.exists(filepath):
 
121
  return hashlib.md5(f.read()).hexdigest()
122
 
123
  def upload_if_updated(json_path):
124
+ if not json_path or not os.path.exists(json_path):
125
+ return
126
+
127
  new_checksum = get_checksum(json_path)
128
  old_checksum = None
129
 
 
148
  except Exception as e:
149
  print("Upload failed:", e)
150
  else:
151
+ print("No changes in file β€” skipping upload.")
152
 
153
  def main():
154
+ print(f"===== Application Startup at {datetime.now().isoformat()} =====")
155
+
156
  if not HF_TOKEN:
157
  print("HF_TOKEN not set β€” skipping upload.")
158
  return
 
166
  print("No suggestions found β€” skipping.")
167
  return
168
 
169
+ existing_path = download_existing_suggestions()
170
+ merged_json = merge_with_existing(suggestions, existing_path)
171
+ upload_if_updated(merged_json)
172
 
173
  if __name__ == "__main__":
174
  main()