axxam commited on
Commit
8fcba69
·
verified ·
1 Parent(s): 368e482

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +33 -6
upload_suggestions.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import sqlite3
3
  import json
 
4
  import hashlib
5
  from datetime import datetime
6
  from huggingface_hub import HfApi, hf_hub_download
@@ -9,8 +10,10 @@ from huggingface_hub import HfApi, hf_hub_download
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
  REPO_ID = "axxam/libretranslate-suggestions"
11
  DEST_PATH_IN_REPO = "suggestions.json"
 
12
  REPO_TYPE = "dataset"
13
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
 
14
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
15
 
16
  # Known or expected locations for the DB
@@ -103,7 +106,7 @@ def merge_with_existing(suggestions, existing_json_path):
103
  print("No new suggestions — skipping write/upload.")
104
  return None
105
 
106
- # Write merged file
107
  final = []
108
  for id_, data in existing.items():
109
  final.append({**data, "id": id_})
@@ -111,7 +114,15 @@ def merge_with_existing(suggestions, existing_json_path):
111
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
112
  json.dump(final, f, indent=2, ensure_ascii=False)
113
 
114
- return JSON_OUTPUT_PATH
 
 
 
 
 
 
 
 
115
 
116
  def get_checksum(filepath):
117
  if not os.path.exists(filepath):
@@ -119,7 +130,7 @@ def get_checksum(filepath):
119
  with open(filepath, "rb") as f:
120
  return hashlib.md5(f.read()).hexdigest()
121
 
122
- def upload_if_updated(json_path):
123
  if not json_path or not os.path.exists(json_path):
124
  return
125
 
@@ -131,9 +142,11 @@ def upload_if_updated(json_path):
131
  old_checksum = f.read().strip()
132
 
133
  if new_checksum != old_checksum:
134
- print("Uploading updated suggestions.json to Hugging Face...")
135
  try:
136
  api = HfApi()
 
 
137
  api.upload_file(
138
  path_or_fileobj=json_path,
139
  path_in_repo=DEST_PATH_IN_REPO,
@@ -141,8 +154,20 @@ def upload_if_updated(json_path):
141
  repo_type=REPO_TYPE,
142
  token=HF_TOKEN
143
  )
 
 
 
 
 
 
 
 
 
 
 
144
  with open(CHECKSUM_FILE, "w") as f:
145
  f.write(new_checksum)
 
146
  print(f"Upload successful at {datetime.now().isoformat()}")
147
  except Exception as e:
148
  print("Upload failed:", e)
@@ -166,8 +191,10 @@ def main():
166
  return
167
 
168
  existing_path = download_existing_suggestions()
169
- merged_json = merge_with_existing(suggestions, existing_path)
170
- upload_if_updated(merged_json)
 
 
171
 
172
  if __name__ == "__main__":
173
  main()
 
1
  import os
2
  import sqlite3
3
  import json
4
+ import csv
5
  import hashlib
6
  from datetime import datetime
7
  from huggingface_hub import HfApi, hf_hub_download
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  REPO_ID = "axxam/libretranslate-suggestions"
12
  DEST_PATH_IN_REPO = "suggestions.json"
13
+ DEST_CSV_PATH_IN_REPO = "suggestions.csv"
14
  REPO_TYPE = "dataset"
15
  JSON_OUTPUT_PATH = "/tmp/suggestions.json"
16
+ CSV_OUTPUT_PATH = "/tmp/suggestions.csv"
17
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
18
 
19
  # Known or expected locations for the DB
 
106
  print("No new suggestions — skipping write/upload.")
107
  return None
108
 
109
+ # Write merged JSON
110
  final = []
111
  for id_, data in existing.items():
112
  final.append({**data, "id": id_})
 
114
  with open(JSON_OUTPUT_PATH, "w", encoding="utf-8") as f:
115
  json.dump(final, f, indent=2, ensure_ascii=False)
116
 
117
+ # Write merged CSV
118
+ with open(CSV_OUTPUT_PATH, "w", encoding="utf-8", newline='') as csvfile:
119
+ fieldnames = ["id", "source_text", "suggested_text", "source_lang", "target_lang", "timestamp"]
120
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
121
+ writer.writeheader()
122
+ for row in final:
123
+ writer.writerow(row)
124
+
125
+ return JSON_OUTPUT_PATH, CSV_OUTPUT_PATH
126
 
127
  def get_checksum(filepath):
128
  if not os.path.exists(filepath):
 
130
  with open(filepath, "rb") as f:
131
  return hashlib.md5(f.read()).hexdigest()
132
 
133
+ def upload_if_updated(json_path, csv_path):
134
  if not json_path or not os.path.exists(json_path):
135
  return
136
 
 
142
  old_checksum = f.read().strip()
143
 
144
  if new_checksum != old_checksum:
145
+ print("Uploading updated files to Hugging Face...")
146
  try:
147
  api = HfApi()
148
+
149
+ # Upload JSON
150
  api.upload_file(
151
  path_or_fileobj=json_path,
152
  path_in_repo=DEST_PATH_IN_REPO,
 
154
  repo_type=REPO_TYPE,
155
  token=HF_TOKEN
156
  )
157
+
158
+ # Upload CSV
159
+ if csv_path and os.path.exists(csv_path):
160
+ api.upload_file(
161
+ path_or_fileobj=csv_path,
162
+ path_in_repo=DEST_CSV_PATH_IN_REPO,
163
+ repo_id=REPO_ID,
164
+ repo_type=REPO_TYPE,
165
+ token=HF_TOKEN
166
+ )
167
+
168
  with open(CHECKSUM_FILE, "w") as f:
169
  f.write(new_checksum)
170
+
171
  print(f"Upload successful at {datetime.now().isoformat()}")
172
  except Exception as e:
173
  print("Upload failed:", e)
 
191
  return
192
 
193
  existing_path = download_existing_suggestions()
194
+ result = merge_with_existing(suggestions, existing_path)
195
+ if result:
196
+ merged_json, merged_csv = result
197
+ upload_if_updated(merged_json, merged_csv)
198
 
199
  if __name__ == "__main__":
200
  main()