Update upload_suggestions.py
Browse files- upload_suggestions.py +10 -7
upload_suggestions.py
CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import HfApi
|
|
7 |
|
8 |
# Settings
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
10 |
-
REPO_ID = "axxam/libretranslate-suggestions" # Dataset repo
|
11 |
DEST_PATH_IN_REPO = "suggestions.json"
|
12 |
REPO_TYPE = "dataset"
|
13 |
JSON_OUTPUT_PATH = "/tmp/suggestions.json"
|
@@ -35,16 +35,19 @@ def extract_suggestions(db_path):
|
|
35 |
try:
|
36 |
conn = sqlite3.connect(db_path)
|
37 |
cursor = conn.cursor()
|
38 |
-
cursor.execute("SELECT
|
39 |
rows = cursor.fetchall()
|
40 |
conn.close()
|
41 |
|
42 |
for row in rows:
|
|
|
43 |
suggestions.append({
|
44 |
-
"id":
|
45 |
-
"source_text": row[
|
46 |
-
"suggested_text": row[
|
47 |
-
"
|
|
|
|
|
48 |
})
|
49 |
except sqlite3.Error as e:
|
50 |
print(f"SQLite error: {e}")
|
@@ -65,7 +68,7 @@ def merge_with_existing(suggestions, json_path):
|
|
65 |
|
66 |
with open(json_path, "w", encoding="utf-8") as f:
|
67 |
json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
|
68 |
-
|
69 |
return json_path
|
70 |
|
71 |
def get_checksum(filepath):
|
|
|
7 |
|
8 |
# Settings
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
10 |
+
REPO_ID = "axxam/libretranslate-suggestions" # Dataset repo on HF
|
11 |
DEST_PATH_IN_REPO = "suggestions.json"
|
12 |
REPO_TYPE = "dataset"
|
13 |
JSON_OUTPUT_PATH = "/tmp/suggestions.json"
|
|
|
35 |
try:
|
36 |
conn = sqlite3.connect(db_path)
|
37 |
cursor = conn.cursor()
|
38 |
+
cursor.execute("SELECT q, s, source, target FROM suggestions")
|
39 |
rows = cursor.fetchall()
|
40 |
conn.close()
|
41 |
|
42 |
for row in rows:
|
43 |
+
unique_id = hashlib.md5((row[0] + row[1] + row[2] + row[3]).encode()).hexdigest()
|
44 |
suggestions.append({
|
45 |
+
"id": unique_id,
|
46 |
+
"source_text": row[0],
|
47 |
+
"suggested_text": row[1],
|
48 |
+
"source_lang": row[2],
|
49 |
+
"target_lang": row[3],
|
50 |
+
"timestamp": datetime.now().isoformat()
|
51 |
})
|
52 |
except sqlite3.Error as e:
|
53 |
print(f"SQLite error: {e}")
|
|
|
68 |
|
69 |
with open(json_path, "w", encoding="utf-8") as f:
|
70 |
json.dump(list(existing.values()), f, indent=2, ensure_ascii=False)
|
71 |
+
|
72 |
return json_path
|
73 |
|
74 |
def get_checksum(filepath):
|