axxam commited on
Commit
49d808e
·
verified ·
1 Parent(s): 8fcba69

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +29 -36
upload_suggestions.py CHANGED
@@ -10,13 +10,13 @@ from huggingface_hub import HfApi, hf_hub_download
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  REPO_ID = "axxam/libretranslate-suggestions"
12
  DEST_PATH_IN_REPO = "suggestions.json"
13
- DEST_CSV_PATH_IN_REPO = "suggestions.csv"
14
  REPO_TYPE = "dataset"
 
15
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
16
  CSV_OUTPUT_PATH = "/tmp/suggestions.csv"
17
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
18
 
19
- # Known or expected locations for the DB
20
  possible_paths = [
21
  "/app/db/suggestions.db",
22
  "/app/suggestions.db",
@@ -90,39 +90,36 @@ def merge_with_existing(suggestions, existing_json_path):
90
 
91
  changed = False
92
  for s in suggestions:
93
- s_clean = {
94
- "source_text": s["source_text"],
95
- "suggested_text": s["suggested_text"],
96
- "source_lang": s["source_lang"],
97
- "target_lang": s["target_lang"],
98
- }
99
-
100
- existing_entry = existing.get(s["id"])
101
- if not existing_entry:
102
  changed = True
103
- existing[s["id"]] = {**s_clean, "timestamp": datetime.now().isoformat()}
 
 
 
 
 
 
104
 
105
  if not changed:
106
  print("No new suggestions — skipping write/upload.")
107
  return None
108
 
109
- # Write merged JSON
110
  final = []
111
  for id_, data in existing.items():
112
  final.append({**data, "id": id_})
113
 
 
114
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
115
  json.dump(final, f, indent=2, ensure_ascii=False)
116
 
117
- # Write merged CSV
118
- with open(CSV_OUTPUT_PATH, "w", encoding="utf-8", newline='') as csvfile:
119
- fieldnames = ["id", "source_text", "suggested_text", "source_lang", "target_lang", "timestamp"]
120
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
121
  writer.writeheader()
122
- for row in final:
123
- writer.writerow(row)
124
 
125
- return JSON_OUTPUT_PATH, CSV_OUTPUT_PATH
126
 
127
  def get_checksum(filepath):
128
  if not os.path.exists(filepath):
@@ -142,11 +139,10 @@ def upload_if_updated(json_path, csv_path):
142
  old_checksum = f.read().strip()
143
 
144
  if new_checksum != old_checksum:
145
- print("Uploading updated files to Hugging Face...")
146
  try:
147
  api = HfApi()
148
 
149
- # Upload JSON
150
  api.upload_file(
151
  path_or_fileobj=json_path,
152
  path_in_repo=DEST_PATH_IN_REPO,
@@ -154,16 +150,13 @@ def upload_if_updated(json_path, csv_path):
154
  repo_type=REPO_TYPE,
155
  token=HF_TOKEN
156
  )
157
-
158
- # Upload CSV
159
- if csv_path and os.path.exists(csv_path):
160
- api.upload_file(
161
- path_or_fileobj=csv_path,
162
- path_in_repo=DEST_CSV_PATH_IN_REPO,
163
- repo_id=REPO_ID,
164
- repo_type=REPO_TYPE,
165
- token=HF_TOKEN
166
- )
167
 
168
  with open(CHECKSUM_FILE, "w") as f:
169
  f.write(new_checksum)
@@ -191,10 +184,10 @@ def main():
191
  return
192
 
193
  existing_path = download_existing_suggestions()
194
- result = merge_with_existing(suggestions, existing_path)
195
- if result:
196
- merged_json, merged_csv = result
197
- upload_if_updated(merged_json, merged_csv)
198
 
199
  if __name__ == "__main__":
200
  main()
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  REPO_ID = "axxam/libretranslate-suggestions"
12
  DEST_PATH_IN_REPO = "suggestions.json"
13
+ DEST_CSV_IN_REPO = "suggestions.csv"
14
  REPO_TYPE = "dataset"
15
+
16
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
17
  CSV_OUTPUT_PATH = "/tmp/suggestions.csv"
18
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
19
 
 
20
  possible_paths = [
21
  "/app/db/suggestions.db",
22
  "/app/suggestions.db",
 
90
 
91
  changed = False
92
  for s in suggestions:
93
+ if s["id"] not in existing:
 
 
 
 
 
 
 
 
94
  changed = True
95
+ existing[s["id"]] = {
96
+ "source_text": s["source_text"],
97
+ "suggested_text": s["suggested_text"],
98
+ "source_lang": s["source_lang"],
99
+ "target_lang": s["target_lang"],
100
+ "timestamp": datetime.now().isoformat()
101
+ }
102
 
103
  if not changed:
104
  print("No new suggestions — skipping write/upload.")
105
  return None
106
 
107
+ # Create final list
108
  final = []
109
  for id_, data in existing.items():
110
  final.append({**data, "id": id_})
111
 
112
+ # Write JSON
113
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
114
  json.dump(final, f, indent=2, ensure_ascii=False)
115
 
116
+ # Write CSV
117
+ with open(CSV_OUTPUT_PATH, "w", encoding="utf-8", newline="") as f:
118
+ writer = csv.DictWriter(f, fieldnames=["id", "source_text", "suggested_text", "source_lang", "target_lang", "timestamp"])
 
119
  writer.writeheader()
120
+ writer.writerows(final)
 
121
 
122
+ return JSON_OUTPUT_PATH # Return one of them for checksum comparison
123
 
124
  def get_checksum(filepath):
125
  if not os.path.exists(filepath):
 
139
  old_checksum = f.read().strip()
140
 
141
  if new_checksum != old_checksum:
142
+ print("Uploading updated suggestions.json and suggestions.csv to Hugging Face...")
143
  try:
144
  api = HfApi()
145
 
 
146
  api.upload_file(
147
  path_or_fileobj=json_path,
148
  path_in_repo=DEST_PATH_IN_REPO,
 
150
  repo_type=REPO_TYPE,
151
  token=HF_TOKEN
152
  )
153
+ api.upload_file(
154
+ path_or_fileobj=csv_path,
155
+ path_in_repo=DEST_CSV_IN_REPO,
156
+ repo_id=REPO_ID,
157
+ repo_type=REPO_TYPE,
158
+ token=HF_TOKEN
159
+ )
 
 
 
160
 
161
  with open(CHECKSUM_FILE, "w") as f:
162
  f.write(new_checksum)
 
184
  return
185
 
186
  existing_path = download_existing_suggestions()
187
+ merged_json = merge_with_existing(suggestions, existing_path)
188
+
189
+ if merged_json:
190
+ upload_if_updated(JSON_OUTPUT_PATH, CSV_OUTPUT_PATH)
191
 
192
  if __name__ == "__main__":
193
  main()