axxam commited on
Commit
63363bc
·
verified ·
1 Parent(s): 4c3f5ca

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +24 -15
upload_suggestions.py CHANGED
@@ -7,11 +7,11 @@ from huggingface_hub import HfApi
7
 
8
  # Settings
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
- REPO_ID = "axxam/libretranslate-suggestions" # Dataset repo on HF
11
  DEST_PATH_IN_REPO = "suggestions.json"
12
  REPO_TYPE = "dataset"
13
- JSON_OUTPUT_PATH = "/tmp/suggestions.json"
14
- CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
15
 
16
  # Known or expected locations for the DB
17
  possible_paths = [
@@ -22,12 +22,11 @@ possible_paths = [
22
  ]
23
 
24
  def find_db():
25
- print(f"Running in CWD: {os.getcwd()}")
26
  for path in possible_paths:
27
  if os.path.exists(path):
28
  print(f"Found suggestions.db at {path}")
29
  return path
30
- print("suggestions.db not found in any known path.")
31
  return None
32
 
33
  def extract_suggestions(db_path):
@@ -61,15 +60,24 @@ def merge_with_existing(suggestions, json_path):
61
  for item in json.load(f):
62
  existing[item["id"]] = item
63
  except Exception as e:
64
- print(f"Failed to read existing JSON: {e}")
65
 
 
 
66
  for s in suggestions:
67
- existing[s["id"]] = s
 
 
68
 
 
 
 
 
 
69
  with open(json_path, "w", encoding="utf-8") as f:
70
  json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
71
-
72
- return json_path
73
 
74
  def get_checksum(filepath):
75
  if not os.path.exists(filepath):
@@ -86,7 +94,7 @@ def upload_if_updated(json_path):
86
  old_checksum = f.read().strip()
87
 
88
  if new_checksum != old_checksum:
89
- print("Uploading updated suggestions.json to Hugging Face...")
90
  try:
91
  api = HfApi()
92
  api.upload_file(
@@ -98,11 +106,11 @@ def upload_if_updated(json_path):
98
  )
99
  with open(CHECKSUM_FILE, "w") as f:
100
  f.write(new_checksum)
101
- print(f"Upload successful at {datetime.now().isoformat()}")
102
  except Exception as e:
103
  print("Upload failed:", e)
104
  else:
105
- print("No changes in suggestions.json — skipping upload.")
106
 
107
  def main():
108
  if not HF_TOKEN:
@@ -115,11 +123,12 @@ def main():
115
 
116
  suggestions = extract_suggestions(db_path)
117
  if not suggestions:
118
- print("No suggestions found — skipping.")
119
  return
120
 
121
- json_path = merge_with_existing(suggestions, JSON_OUTPUT_PATH)
122
- upload_if_updated(json_path)
 
123
 
124
  if __name__ == "__main__":
125
  main()
 
7
 
8
  # Settings
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
+ REPO_ID = "axxam/libretranslate-suggestions"
11
  DEST_PATH_IN_REPO = "suggestions.json"
12
  REPO_TYPE = "dataset"
13
+ JSON_OUTPUT_PATH = "/app/suggestions/suggestions.json"
14
+ CHECKSUM_FILE = "/app/suggestions/.last_suggestions_checksum"
15
 
16
  # Known or expected locations for the DB
17
  possible_paths = [
 
22
  ]
23
 
24
  def find_db():
 
25
  for path in possible_paths:
26
  if os.path.exists(path):
27
  print(f"Found suggestions.db at {path}")
28
  return path
29
+ print("suggestions.db not found.")
30
  return None
31
 
32
  def extract_suggestions(db_path):
 
60
  for item in json.load(f):
61
  existing[item["id"]] = item
62
  except Exception as e:
63
+ print(f"Error loading existing JSON: {e}")
64
 
65
+ # Add only new suggestions
66
+ new_data_added = False
67
  for s in suggestions:
68
+ if s["id"] not in existing:
69
+ existing[s["id"]] = s
70
+ new_data_added = True
71
 
72
+ if not new_data_added:
73
+ print("No new suggestions — nothing to update.")
74
+ return False
75
+
76
+ # Only write if new data was added
77
  with open(json_path, "w", encoding="utf-8") as f:
78
  json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
79
+ print(f"{len(suggestions)} suggestions processed. JSON updated.")
80
+ return True
81
 
82
  def get_checksum(filepath):
83
  if not os.path.exists(filepath):
 
94
  old_checksum = f.read().strip()
95
 
96
  if new_checksum != old_checksum:
97
+ print("New checksum detected — uploading to Hugging Face.")
98
  try:
99
  api = HfApi()
100
  api.upload_file(
 
106
  )
107
  with open(CHECKSUM_FILE, "w") as f:
108
  f.write(new_checksum)
109
+ print(f"Upload completed at {datetime.now().isoformat()}")
110
  except Exception as e:
111
  print("Upload failed:", e)
112
  else:
113
+ print("Checksum unchanged — skipping upload.")
114
 
115
  def main():
116
  if not HF_TOKEN:
 
123
 
124
  suggestions = extract_suggestions(db_path)
125
  if not suggestions:
126
+ print("No suggestions found.")
127
  return
128
 
129
+ updated = merge_with_existing(suggestions, JSON_OUTPUT_PATH)
130
+ if updated:
131
+ upload_if_updated(JSON_OUTPUT_PATH)
132
 
133
  if __name__ == "__main__":
134
  main()