axxam commited on
Commit
26cd20e
Β·
verified Β·
1 Parent(s): 49d808e

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +44 -45
upload_suggestions.py CHANGED
@@ -9,13 +9,14 @@ from huggingface_hub import HfApi, hf_hub_download
9
  # Settings
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  REPO_ID = "axxam/libretranslate-suggestions"
12
- DEST_PATH_IN_REPO = "suggestions.json"
13
- DEST_CSV_IN_REPO = "suggestions.csv"
14
  REPO_TYPE = "dataset"
15
 
16
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
17
  CSV_OUTPUT_PATH = "/tmp/suggestions.csv"
18
- CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
 
19
 
20
  possible_paths = [
21
  "/app/db/suggestions.db",
@@ -56,12 +57,12 @@ def extract_suggestions(db_path):
56
  print(f"SQLite error: {e}")
57
  return suggestions
58
 
59
- def download_existing_suggestions():
60
  try:
61
  path = hf_hub_download(
62
  repo_id=REPO_ID,
63
  repo_type=REPO_TYPE,
64
- filename=DEST_PATH_IN_REPO,
65
  token=HF_TOKEN,
66
  local_dir="/tmp"
67
  )
@@ -90,36 +91,43 @@ def merge_with_existing(suggestions, existing_json_path):
90
 
91
  changed = False
92
  for s in suggestions:
93
- if s["id"] not in existing:
 
 
 
 
 
 
 
 
94
  changed = True
95
- existing[s["id"]] = {
96
- "source_text": s["source_text"],
97
- "suggested_text": s["suggested_text"],
98
- "source_lang": s["source_lang"],
99
- "target_lang": s["target_lang"],
100
- "timestamp": datetime.now().isoformat()
101
- }
102
 
103
  if not changed:
104
  print("No new suggestions β€” skipping write/upload.")
105
  return None
106
 
107
- # Create final list
108
  final = []
109
  for id_, data in existing.items():
110
  final.append({**data, "id": id_})
111
 
112
- # Write JSON
113
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
114
  json.dump(final, f, indent=2, ensure_ascii=False)
115
 
116
- # Write CSV
117
- with open(CSV_OUTPUT_PATH, "w", encoding="utf-8", newline="") as f:
118
- writer = csv.DictWriter(f, fieldnames=["id", "source_text", "suggested_text", "source_lang", "target_lang", "timestamp"])
119
- writer.writeheader()
120
- writer.writerows(final)
121
 
122
- return JSON_OUTPUT_PATH # Return one of them for checksum comparison
 
 
 
 
 
 
 
123
 
124
  def get_checksum(filepath):
125
  if not os.path.exists(filepath):
@@ -127,45 +135,35 @@ def get_checksum(filepath):
127
  with open(filepath, "rb") as f:
128
  return hashlib.md5(f.read()).hexdigest()
129
 
130
- def upload_if_updated(json_path, csv_path):
131
- if not json_path or not os.path.exists(json_path):
132
  return
133
 
134
- new_checksum = get_checksum(json_path)
135
  old_checksum = None
136
 
137
- if os.path.exists(CHECKSUM_FILE):
138
- with open(CHECKSUM_FILE, "r") as f:
139
  old_checksum = f.read().strip()
140
 
141
  if new_checksum != old_checksum:
142
- print("Uploading updated suggestions.json and suggestions.csv to Hugging Face...")
143
  try:
144
  api = HfApi()
145
-
146
- api.upload_file(
147
- path_or_fileobj=json_path,
148
- path_in_repo=DEST_PATH_IN_REPO,
149
- repo_id=REPO_ID,
150
- repo_type=REPO_TYPE,
151
- token=HF_TOKEN
152
- )
153
  api.upload_file(
154
- path_or_fileobj=csv_path,
155
- path_in_repo=DEST_CSV_IN_REPO,
156
  repo_id=REPO_ID,
157
  repo_type=REPO_TYPE,
158
  token=HF_TOKEN
159
  )
160
-
161
- with open(CHECKSUM_FILE, "w") as f:
162
  f.write(new_checksum)
163
-
164
- print(f"Upload successful at {datetime.now().isoformat()}")
165
  except Exception as e:
166
- print("Upload failed:", e)
167
  else:
168
- print("No changes in file β€” skipping upload.")
169
 
170
  def main():
171
  print(f"===== Application Startup at {datetime.now().isoformat()} =====")
@@ -183,11 +181,12 @@ def main():
183
  print("No suggestions found β€” skipping.")
184
  return
185
 
186
- existing_path = download_existing_suggestions()
187
  merged_json = merge_with_existing(suggestions, existing_path)
188
 
189
  if merged_json:
190
- upload_if_updated(JSON_OUTPUT_PATH, CSV_OUTPUT_PATH)
 
191
 
192
  if __name__ == "__main__":
193
  main()
 
9
  # Settings
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  REPO_ID = "axxam/libretranslate-suggestions"
12
+ DEST_JSON_PATH_IN_REPO = "suggestions.json"
13
+ DEST_CSV_PATH_IN_REPO = "suggestions.csv"
14
  REPO_TYPE = "dataset"
15
 
16
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
17
  CSV_OUTPUT_PATH = "/tmp/suggestions.csv"
18
+ CHECKSUM_FILE_JSON = "/tmp/.last_suggestions_json_checksum"
19
+ CHECKSUM_FILE_CSV = "/tmp/.last_suggestions_csv_checksum"
20
 
21
  possible_paths = [
22
  "/app/db/suggestions.db",
 
57
  print(f"SQLite error: {e}")
58
  return suggestions
59
 
60
+ def download_existing_json():
61
  try:
62
  path = hf_hub_download(
63
  repo_id=REPO_ID,
64
  repo_type=REPO_TYPE,
65
+ filename=DEST_JSON_PATH_IN_REPO,
66
  token=HF_TOKEN,
67
  local_dir="/tmp"
68
  )
 
91
 
92
  changed = False
93
  for s in suggestions:
94
+ s_clean = {
95
+ "source_text": s["source_text"],
96
+ "suggested_text": s["suggested_text"],
97
+ "source_lang": s["source_lang"],
98
+ "target_lang": s["target_lang"],
99
+ }
100
+
101
+ existing_entry = existing.get(s["id"])
102
+ if not existing_entry:
103
  changed = True
104
+ existing[s["id"]] = {**s_clean, "timestamp": datetime.now().isoformat()}
 
 
 
 
 
 
105
 
106
  if not changed:
107
  print("No new suggestions β€” skipping write/upload.")
108
  return None
109
 
110
+ # Write merged JSON
111
  final = []
112
  for id_, data in existing.items():
113
  final.append({**data, "id": id_})
114
 
 
115
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
116
  json.dump(final, f, indent=2, ensure_ascii=False)
117
 
118
+ # Also write CSV
119
+ write_csv(final, CSV_OUTPUT_PATH)
120
+
121
+ return JSON_OUTPUT_PATH
 
122
 
123
+ def write_csv(suggestions, csv_path):
124
+ with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
125
+ writer = csv.DictWriter(csvfile, fieldnames=[
126
+ "id", "source_text", "suggested_text", "source_lang", "target_lang", "timestamp"
127
+ ])
128
+ writer.writeheader()
129
+ for item in suggestions:
130
+ writer.writerow(item)
131
 
132
  def get_checksum(filepath):
133
  if not os.path.exists(filepath):
 
135
  with open(filepath, "rb") as f:
136
  return hashlib.md5(f.read()).hexdigest()
137
 
138
+ def upload_if_updated(filepath, dest_path, checksum_file):
139
+ if not filepath or not os.path.exists(filepath):
140
  return
141
 
142
+ new_checksum = get_checksum(filepath)
143
  old_checksum = None
144
 
145
+ if os.path.exists(checksum_file):
146
+ with open(checksum_file, "r") as f:
147
  old_checksum = f.read().strip()
148
 
149
  if new_checksum != old_checksum:
150
+ print(f"Uploading updated {os.path.basename(dest_path)} to Hugging Face...")
151
  try:
152
  api = HfApi()
 
 
 
 
 
 
 
 
153
  api.upload_file(
154
+ path_or_fileobj=filepath,
155
+ path_in_repo=dest_path,
156
  repo_id=REPO_ID,
157
  repo_type=REPO_TYPE,
158
  token=HF_TOKEN
159
  )
160
+ with open(checksum_file, "w") as f:
 
161
  f.write(new_checksum)
162
+ print(f"Upload successful: {dest_path} at {datetime.now().isoformat()}")
 
163
  except Exception as e:
164
+ print(f"Upload failed for {dest_path}:", e)
165
  else:
166
+ print(f"No changes in {os.path.basename(dest_path)} β€” skipping upload.")
167
 
168
  def main():
169
  print(f"===== Application Startup at {datetime.now().isoformat()} =====")
 
181
  print("No suggestions found β€” skipping.")
182
  return
183
 
184
+ existing_path = download_existing_json()
185
  merged_json = merge_with_existing(suggestions, existing_path)
186
 
187
  if merged_json:
188
+ upload_if_updated(JSON_OUTPUT_PATH, DEST_JSON_PATH_IN_REPO, CHECKSUM_FILE_JSON)
189
+ upload_if_updated(CSV_OUTPUT_PATH, DEST_CSV_PATH_IN_REPO, CHECKSUM_FILE_CSV)
190
 
191
  if __name__ == "__main__":
192
  main()