axxam commited on
Commit
b43e601
·
verified ·
1 Parent(s): dff7871

Update upload_suggestions.py

Browse files
Files changed (1) hide show
  1. upload_suggestions.py +91 -49
upload_suggestions.py CHANGED
@@ -1,80 +1,122 @@
1
  import os
 
 
2
  import hashlib
3
- import time
4
  from huggingface_hub import HfApi
5
 
6
- REPO_ID = "axxam/libretranslate-suggestions"
7
- DEST_PATH_IN_REPO = "suggestions.db"
 
 
8
  REPO_TYPE = "dataset"
9
- HF_TOKEN = os.getenv("HF_TOKEN")
10
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
11
 
12
- def get_suggestions_db_path():
13
- cwd = os.getcwd()
14
- print(f"Running in CWD: {cwd}")
15
-
16
- possible_paths = [
17
- os.path.join(cwd, "db", "suggestions.db"),
18
- "/root/.local/share/db/suggestions.db",
19
- "/home/libretranslate/.local/share/db/suggestions.db",
20
- "/app/.local/share/db/suggestions.db"
21
- ]
22
 
 
 
23
  for path in possible_paths:
24
  if os.path.exists(path):
25
  print(f"Found suggestions.db at {path}")
26
  return path
27
-
28
  print("suggestions.db not found in any known path.")
29
  return None
30
 
31
- def file_checksum(path):
32
- h = hashlib.sha256()
33
- with open(path, "rb") as f:
34
- for chunk in iter(lambda: f.read(4096), b""):
35
- h.update(chunk)
36
- return h.hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- def load_last_checksum():
39
  if os.path.exists(CHECKSUM_FILE):
40
  with open(CHECKSUM_FILE, "r") as f:
41
- return f.read().strip()
42
- return None
43
 
44
- def save_checksum(checksum):
45
- with open(CHECKSUM_FILE, "w") as f:
46
- f.write(checksum)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- def upload_if_updated():
49
  if not HF_TOKEN:
50
  print("HF_TOKEN not set — skipping upload.")
51
  return
52
 
53
- db_path = get_suggestions_db_path()
54
  if not db_path:
55
  return
56
 
57
- current_checksum = file_checksum(db_path)
58
- last_checksum = load_last_checksum()
59
-
60
- if current_checksum == last_checksum:
61
- print("suggestions.db unchanged — skipping upload.")
62
  return
63
 
64
- print("Uploading updated suggestions.db to Hugging Face...")
65
- try:
66
- api = HfApi()
67
- api.upload_file(
68
- path_or_fileobj=db_path,
69
- path_in_repo=DEST_PATH_IN_REPO,
70
- repo_id=REPO_ID,
71
- repo_type=REPO_TYPE,
72
- token=HF_TOKEN
73
- )
74
- save_checksum(current_checksum)
75
- print(f"Upload successful at {time.strftime('%Y-%m-%d %H:%M:%S')}")
76
- except Exception as e:
77
- print(f"Upload failed: {e}")
78
 
79
  if __name__ == "__main__":
80
- upload_if_updated()
 
1
  import os
2
+ import sqlite3
3
+ import json
4
  import hashlib
5
+ from datetime import datetime
6
  from huggingface_hub import HfApi
7
 
8
+ # Settings
9
+ HF_TOKEN = os.environ.get("HF_TOKEN")
10
+ REPO_ID = "axxam/libretranslate-suggestions" # Dataset repo
11
+ DEST_PATH_IN_REPO = "suggestions.json"
12
  REPO_TYPE = "dataset"
13
+ JSON_OUTPUT_PATH = "/tmp/suggestions.json"
14
  CHECKSUM_FILE = "/tmp/.last_suggestions_checksum"
15
 
16
+ # Known or expected locations for the DB
17
+ possible_paths = [
18
+ "/app/db/suggestions.db",
19
+ "/app/suggestions.db",
20
+ "/root/.local/share/db/suggestions.db",
21
+ "/home/libretranslate/.local/share/db/suggestions.db"
22
+ ]
 
 
 
23
 
24
+ def find_db():
25
+ print(f"Running in CWD: {os.getcwd()}")
26
  for path in possible_paths:
27
  if os.path.exists(path):
28
  print(f"Found suggestions.db at {path}")
29
  return path
 
30
  print("suggestions.db not found in any known path.")
31
  return None
32
 
33
+ def extract_suggestions(db_path):
34
+ suggestions = []
35
+ try:
36
+ conn = sqlite3.connect(db_path)
37
+ cursor = conn.cursor()
38
+ cursor.execute("SELECT id, source_text, suggested_text, timestamp FROM suggestions")
39
+ rows = cursor.fetchall()
40
+ conn.close()
41
+
42
+ for row in rows:
43
+ suggestions.append({
44
+ "id": row[0],
45
+ "source_text": row[1],
46
+ "suggested_text": row[2],
47
+ "timestamp": row[3] or datetime.now().isoformat()
48
+ })
49
+ except sqlite3.Error as e:
50
+ print(f"SQLite error: {e}")
51
+ return suggestions
52
+
53
+ def merge_with_existing(suggestions, json_path):
54
+ existing = {}
55
+ if os.path.exists(json_path):
56
+ try:
57
+ with open(json_path, "r", encoding="utf-8") as f:
58
+ for item in json.load(f):
59
+ existing[item["id"]] = item
60
+ except Exception as e:
61
+ print(f"Failed to read existing JSON: {e}")
62
+
63
+ for s in suggestions:
64
+ existing[s["id"]] = s
65
+
66
+ with open(json_path, "w", encoding="utf-8") as f:
67
+ json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
68
+
69
+ return json_path
70
+
71
+ def get_checksum(filepath):
72
+ if not os.path.exists(filepath):
73
+ return None
74
+ with open(filepath, "rb") as f:
75
+ return hashlib.md5(f.read()).hexdigest()
76
+
77
+ def upload_if_updated(json_path):
78
+ new_checksum = get_checksum(json_path)
79
+ old_checksum = None
80
 
 
81
  if os.path.exists(CHECKSUM_FILE):
82
  with open(CHECKSUM_FILE, "r") as f:
83
+ old_checksum = f.read().strip()
 
84
 
85
+ if new_checksum != old_checksum:
86
+ print("Uploading updated suggestions.json to Hugging Face...")
87
+ try:
88
+ api = HfApi()
89
+ api.upload_file(
90
+ path_or_fileobj=json_path,
91
+ path_in_repo=DEST_PATH_IN_REPO,
92
+ repo_id=REPO_ID,
93
+ repo_type=REPO_TYPE,
94
+ token=HF_TOKEN
95
+ )
96
+ with open(CHECKSUM_FILE, "w") as f:
97
+ f.write(new_checksum)
98
+ print(f"Upload successful at {datetime.now().isoformat()}")
99
+ except Exception as e:
100
+ print("Upload failed:", e)
101
+ else:
102
+ print("No changes in suggestions.json — skipping upload.")
103
 
104
+ def main():
105
  if not HF_TOKEN:
106
  print("HF_TOKEN not set — skipping upload.")
107
  return
108
 
109
+ db_path = find_db()
110
  if not db_path:
111
  return
112
 
113
+ suggestions = extract_suggestions(db_path)
114
+ if not suggestions:
115
+ print("No suggestions found — skipping.")
 
 
116
  return
117
 
118
+ json_path = merge_with_existing(suggestions, JSON_OUTPUT_PATH)
119
+ upload_if_updated(json_path)
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  if __name__ == "__main__":
122
+ main()