storage verison added
Browse files- modules/storage.py +48 -37
modules/storage.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# modules/storage.py
|
|
|
2 |
import os
|
3 |
import urllib.parse
|
4 |
import tempfile
|
@@ -106,10 +107,17 @@ def upload_files_to_repo(
|
|
106 |
valid_files.append(f)
|
107 |
|
108 |
if not valid_files:
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
# Create a temporary directory; copy valid files directly into it.
|
112 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
113 |
for file_path in valid_files:
|
114 |
filename = os.path.basename(file_path)
|
115 |
dest_path = os.path.join(temp_dir, filename)
|
@@ -135,22 +143,32 @@ def upload_files_to_repo(
|
|
135 |
|
136 |
# If permalink creation is requested and exactly 3 valid files are provided,
|
137 |
# try to generate a permalink using generate_permalink().
|
138 |
-
if create_permalink
|
139 |
permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
|
140 |
if permalink:
|
141 |
-
|
142 |
full_url=permalink,
|
143 |
-
repo_id=HF_REPO_ID,
|
144 |
-
json_file=SHORTENER_JSON_FILE
|
145 |
)
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
return {
|
149 |
"response": response,
|
150 |
"permalink": permalink,
|
151 |
"short_permalink": permalink_short
|
152 |
}
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# Otherwise, return individual tuples for each file.
|
155 |
return [(response, link) for link in individual_links]
|
156 |
|
@@ -190,7 +208,11 @@ def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"):
|
|
190 |
try:
|
191 |
login(token=HF_API_TOKEN)
|
192 |
api = HfApi()
|
193 |
-
|
|
|
|
|
|
|
|
|
194 |
json.dump(data, tmp_file, indent=2)
|
195 |
tmp_file_path = tmp_file.name
|
196 |
|
@@ -229,7 +251,7 @@ def _find_url_in_json(data, short_url=None, full_url=None):
|
|
229 |
|
230 |
def _add_url_to_json(data, short_url, full_url):
|
231 |
"""Adds a new short_url/full_url pair to the data. Returns updated data."""
|
232 |
-
if data is None:
|
233 |
data = []
|
234 |
data.append({"short_url": short_url, "full_url": full_url})
|
235 |
return data
|
@@ -259,10 +281,7 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
|
|
259 |
# Case 1: Only short_url provided (lookup full_url)
|
260 |
if short_url and not full_url:
|
261 |
found_full_url = _find_url_in_json(url_data, short_url=short_url)
|
262 |
-
if found_full_url
|
263 |
-
return "success_retrieved_full", found_full_url
|
264 |
-
else:
|
265 |
-
return "not_found_short", None
|
266 |
|
267 |
# Case 2: Only full_url provided (lookup or create short_url)
|
268 |
if full_url and not short_url:
|
@@ -272,9 +291,6 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
|
|
272 |
else:
|
273 |
# Create new short_url
|
274 |
new_short_id = _generate_short_id()
|
275 |
-
# Construct the short URL using the permalink_viewer_url and the new_short_id as a query parameter or path
|
276 |
-
# For this example, let's assume short URLs are like: https://permalink_viewer_url/?id=new_short_id
|
277 |
-
|
278 |
url_data = _add_url_to_json(url_data, new_short_id, full_url)
|
279 |
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
|
280 |
return "created_short", new_short_id
|
@@ -286,25 +302,20 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
|
|
286 |
found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
|
287 |
found_short_for_full = _find_url_in_json(url_data, full_url=full_url)
|
288 |
|
289 |
-
if found_full_for_short
|
290 |
return "exists_match", short_url
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
return "
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
url_data = _add_url_to_json(url_data, short_url, full_url)
|
305 |
-
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
|
306 |
-
return "created_specific_pair", short_url
|
307 |
-
else:
|
308 |
-
return "error_upload", None
|
309 |
|
310 |
return "error_unhandled_case", None # Should not be reached
|
|
|
1 |
# modules/storage.py
|
2 |
+
__version__ = "0.1.0" # Added version
|
3 |
import os
|
4 |
import urllib.parse
|
5 |
import tempfile
|
|
|
107 |
valid_files.append(f)
|
108 |
|
109 |
if not valid_files:
|
110 |
+
# Return a dictionary with None values for permalinks if create_permalink was True
|
111 |
+
if create_permalink:
|
112 |
+
return {
|
113 |
+
"response": "No valid files to upload.",
|
114 |
+
"permalink": None,
|
115 |
+
"short_permalink": None
|
116 |
+
}
|
117 |
+
return []
|
118 |
|
119 |
# Create a temporary directory; copy valid files directly into it.
|
120 |
+
with tempfile.TemporaryDirectory(dir=os.getenv("TMPDIR", "/tmp")) as temp_dir:
|
121 |
for file_path in valid_files:
|
122 |
filename = os.path.basename(file_path)
|
123 |
dest_path = os.path.join(temp_dir, filename)
|
|
|
143 |
|
144 |
# If permalink creation is requested and exactly 3 valid files are provided,
|
145 |
# try to generate a permalink using generate_permalink().
|
146 |
+
if create_permalink: # No need to check len(valid_files) == 3 here, generate_permalink will handle it
|
147 |
permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
|
148 |
if permalink:
|
149 |
+
status, short_id = gen_full_url(
|
150 |
full_url=permalink,
|
151 |
+
repo_id=HF_REPO_ID, # This comes from constants
|
152 |
+
json_file=SHORTENER_JSON_FILE # This comes from constants
|
153 |
)
|
154 |
+
if status in ["created_short", "success_retrieved_short", "exists_match"]:
|
155 |
+
permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}"
|
156 |
+
else: # Shortening failed or conflict not resolved to a usable short_id
|
157 |
+
permalink_short = None
|
158 |
+
print(f"URL shortening status: {status} for {permalink}")
|
159 |
+
|
160 |
return {
|
161 |
"response": response,
|
162 |
"permalink": permalink,
|
163 |
"short_permalink": permalink_short
|
164 |
}
|
165 |
+
else: # generate_permalink returned None (criteria not met)
|
166 |
+
return {
|
167 |
+
"response": response, # Still return upload response
|
168 |
+
"permalink": None,
|
169 |
+
"short_permalink": None
|
170 |
+
}
|
171 |
+
|
172 |
# Otherwise, return individual tuples for each file.
|
173 |
return [(response, link) for link in individual_links]
|
174 |
|
|
|
208 |
try:
|
209 |
login(token=HF_API_TOKEN)
|
210 |
api = HfApi()
|
211 |
+
# Use a temporary directory specified by TMPDIR or default to system temp
|
212 |
+
temp_dir_for_json = os.getenv("TMPDIR", tempfile.gettempdir())
|
213 |
+
os.makedirs(temp_dir_for_json, exist_ok=True)
|
214 |
+
|
215 |
+
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", dir=temp_dir_for_json) as tmp_file:
|
216 |
json.dump(data, tmp_file, indent=2)
|
217 |
tmp_file_path = tmp_file.name
|
218 |
|
|
|
251 |
|
252 |
def _add_url_to_json(data, short_url, full_url):
|
253 |
"""Adds a new short_url/full_url pair to the data. Returns updated data."""
|
254 |
+
if data is None:
|
255 |
data = []
|
256 |
data.append({"short_url": short_url, "full_url": full_url})
|
257 |
return data
|
|
|
281 |
# Case 1: Only short_url provided (lookup full_url)
|
282 |
if short_url and not full_url:
|
283 |
found_full_url = _find_url_in_json(url_data, short_url=short_url)
|
284 |
+
return ("success_retrieved_full", found_full_url) if found_full_url else ("not_found_short", None)
|
|
|
|
|
|
|
285 |
|
286 |
# Case 2: Only full_url provided (lookup or create short_url)
|
287 |
if full_url and not short_url:
|
|
|
291 |
else:
|
292 |
# Create new short_url
|
293 |
new_short_id = _generate_short_id()
|
|
|
|
|
|
|
294 |
url_data = _add_url_to_json(url_data, new_short_id, full_url)
|
295 |
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
|
296 |
return "created_short", new_short_id
|
|
|
302 |
found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
|
303 |
found_short_for_full = _find_url_in_json(url_data, full_url=full_url)
|
304 |
|
305 |
+
if found_full_for_short == full_url:
|
306 |
return "exists_match", short_url
|
307 |
+
if found_full_for_short is not None and found_full_for_short != full_url:
|
308 |
+
return "error_conflict_short_exists_different_full", short_url
|
309 |
+
if found_short_for_full is not None and found_short_for_full != short_url:
|
310 |
+
return "error_conflict_full_exists_different_short", found_short_for_full
|
311 |
+
|
312 |
+
# If short_url is provided and not found, or full_url is provided and not found,
|
313 |
+
# or neither is found, then create a new entry with the provided short_url and full_url.
|
314 |
+
# This effectively allows specifying a custom short_url if it's not already taken.
|
315 |
+
url_data = _add_url_to_json(url_data, short_url, full_url)
|
316 |
+
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
|
317 |
+
return "created_specific_pair", short_url
|
318 |
+
else:
|
319 |
+
return "error_upload", None
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
return "error_unhandled_case", None # Should not be reached
|