Surn commited on
Commit
a1d49fc
·
1 Parent(s): 027506b

storage verison added

Browse files
Files changed (1) hide show
  1. modules/storage.py +48 -37
modules/storage.py CHANGED
@@ -1,4 +1,5 @@
1
  # modules/storage.py
 
2
  import os
3
  import urllib.parse
4
  import tempfile
@@ -106,10 +107,17 @@ def upload_files_to_repo(
106
  valid_files.append(f)
107
 
108
  if not valid_files:
109
- return [] # or raise an exception
 
 
 
 
 
 
 
110
 
111
  # Create a temporary directory; copy valid files directly into it.
112
- with tempfile.TemporaryDirectory() as temp_dir:
113
  for file_path in valid_files:
114
  filename = os.path.basename(file_path)
115
  dest_path = os.path.join(temp_dir, filename)
@@ -135,22 +143,32 @@ def upload_files_to_repo(
135
 
136
  # If permalink creation is requested and exactly 3 valid files are provided,
137
  # try to generate a permalink using generate_permalink().
138
- if create_permalink and len(valid_files) == 3:
139
  permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
140
  if permalink:
141
- result, short_id = gen_full_url(
142
  full_url=permalink,
143
- repo_id=HF_REPO_ID,
144
- json_file=SHORTENER_JSON_FILE
145
  )
146
- permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}"
147
- print(f"Creating shortened URL: {result} - {short_id}")
 
 
 
 
148
  return {
149
  "response": response,
150
  "permalink": permalink,
151
  "short_permalink": permalink_short
152
  }
153
-
 
 
 
 
 
 
154
  # Otherwise, return individual tuples for each file.
155
  return [(response, link) for link in individual_links]
156
 
@@ -190,7 +208,11 @@ def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"):
190
  try:
191
  login(token=HF_API_TOKEN)
192
  api = HfApi()
193
- with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as tmp_file:
 
 
 
 
194
  json.dump(data, tmp_file, indent=2)
195
  tmp_file_path = tmp_file.name
196
 
@@ -229,7 +251,7 @@ def _find_url_in_json(data, short_url=None, full_url=None):
229
 
230
  def _add_url_to_json(data, short_url, full_url):
231
  """Adds a new short_url/full_url pair to the data. Returns updated data."""
232
- if data is None: # Initialize if data is None
233
  data = []
234
  data.append({"short_url": short_url, "full_url": full_url})
235
  return data
@@ -259,10 +281,7 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
259
  # Case 1: Only short_url provided (lookup full_url)
260
  if short_url and not full_url:
261
  found_full_url = _find_url_in_json(url_data, short_url=short_url)
262
- if found_full_url:
263
- return "success_retrieved_full", found_full_url
264
- else:
265
- return "not_found_short", None
266
 
267
  # Case 2: Only full_url provided (lookup or create short_url)
268
  if full_url and not short_url:
@@ -272,9 +291,6 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
272
  else:
273
  # Create new short_url
274
  new_short_id = _generate_short_id()
275
- # Construct the short URL using the permalink_viewer_url and the new_short_id as a query parameter or path
276
- # For this example, let's assume short URLs are like: https://permalink_viewer_url/?id=new_short_id
277
-
278
  url_data = _add_url_to_json(url_data, new_short_id, full_url)
279
  if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
280
  return "created_short", new_short_id
@@ -286,25 +302,20 @@ def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset
286
  found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
287
  found_short_for_full = _find_url_in_json(url_data, full_url=full_url)
288
 
289
- if found_full_for_short and found_full_for_short == full_url: # Both exist and match
290
  return "exists_match", short_url
291
- elif found_short_for_full and found_short_for_full == short_url: # Both exist and match (redundant check, but safe)
292
- return "exists_match", short_url
293
- elif found_full_for_short and found_full_for_short != full_url: # short_url exists but maps to a different full_url
294
- return "error_conflict_short_exists", short_url # Or perhaps update, depending on desired behavior
295
- elif found_short_for_full and found_short_for_full != short_url: # full_url exists but maps to a different short_url
296
- # This implies the user provided a short_url that is *not* the one already associated with this full_url.
297
- # We should probably return the *existing* short_url for that full_url.
298
- return "exists_full_maps_to_different_short", found_short_for_full
299
- else: # Neither exists, or one exists but not the pair. Create new entry.
300
- # Check if the provided short_url is already in use by another full_url
301
- if _find_url_in_json(url_data, short_url=short_url) is not None:
302
- return "error_conflict_short_id_taken", short_url
303
-
304
- url_data = _add_url_to_json(url_data, short_url, full_url)
305
- if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
306
- return "created_specific_pair", short_url
307
- else:
308
- return "error_upload", None
309
 
310
  return "error_unhandled_case", None # Should not be reached
 
1
  # modules/storage.py
2
+ __version__ = "0.1.0" # Added version
3
  import os
4
  import urllib.parse
5
  import tempfile
 
107
  valid_files.append(f)
108
 
109
  if not valid_files:
110
+ # Return a dictionary with None values for permalinks if create_permalink was True
111
+ if create_permalink:
112
+ return {
113
+ "response": "No valid files to upload.",
114
+ "permalink": None,
115
+ "short_permalink": None
116
+ }
117
+ return []
118
 
119
  # Create a temporary directory; copy valid files directly into it.
120
+ with tempfile.TemporaryDirectory(dir=os.getenv("TMPDIR", "/tmp")) as temp_dir:
121
  for file_path in valid_files:
122
  filename = os.path.basename(file_path)
123
  dest_path = os.path.join(temp_dir, filename)
 
143
 
144
  # If permalink creation is requested and exactly 3 valid files are provided,
145
  # try to generate a permalink using generate_permalink().
146
+ if create_permalink: # No need to check len(valid_files) == 3 here, generate_permalink will handle it
147
  permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
148
  if permalink:
149
+ status, short_id = gen_full_url(
150
  full_url=permalink,
151
+ repo_id=HF_REPO_ID, # This comes from constants
152
+ json_file=SHORTENER_JSON_FILE # This comes from constants
153
  )
154
+ if status in ["created_short", "success_retrieved_short", "exists_match"]:
155
+ permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}"
156
+ else: # Shortening failed or conflict not resolved to a usable short_id
157
+ permalink_short = None
158
+ print(f"URL shortening status: {status} for {permalink}")
159
+
160
  return {
161
  "response": response,
162
  "permalink": permalink,
163
  "short_permalink": permalink_short
164
  }
165
+ else: # generate_permalink returned None (criteria not met)
166
+ return {
167
+ "response": response, # Still return upload response
168
+ "permalink": None,
169
+ "short_permalink": None
170
+ }
171
+
172
  # Otherwise, return individual tuples for each file.
173
  return [(response, link) for link in individual_links]
174
 
 
208
  try:
209
  login(token=HF_API_TOKEN)
210
  api = HfApi()
211
+ # Use a temporary directory specified by TMPDIR or default to system temp
212
+ temp_dir_for_json = os.getenv("TMPDIR", tempfile.gettempdir())
213
+ os.makedirs(temp_dir_for_json, exist_ok=True)
214
+
215
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", dir=temp_dir_for_json) as tmp_file:
216
  json.dump(data, tmp_file, indent=2)
217
  tmp_file_path = tmp_file.name
218
 
 
251
 
252
  def _add_url_to_json(data, short_url, full_url):
253
  """Adds a new short_url/full_url pair to the data. Returns updated data."""
254
+ if data is None:
255
  data = []
256
  data.append({"short_url": short_url, "full_url": full_url})
257
  return data
 
281
  # Case 1: Only short_url provided (lookup full_url)
282
  if short_url and not full_url:
283
  found_full_url = _find_url_in_json(url_data, short_url=short_url)
284
+ return ("success_retrieved_full", found_full_url) if found_full_url else ("not_found_short", None)
 
 
 
285
 
286
  # Case 2: Only full_url provided (lookup or create short_url)
287
  if full_url and not short_url:
 
291
  else:
292
  # Create new short_url
293
  new_short_id = _generate_short_id()
 
 
 
294
  url_data = _add_url_to_json(url_data, new_short_id, full_url)
295
  if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
296
  return "created_short", new_short_id
 
302
  found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
303
  found_short_for_full = _find_url_in_json(url_data, full_url=full_url)
304
 
305
+ if found_full_for_short == full_url:
306
  return "exists_match", short_url
307
+ if found_full_for_short is not None and found_full_for_short != full_url:
308
+ return "error_conflict_short_exists_different_full", short_url
309
+ if found_short_for_full is not None and found_short_for_full != short_url:
310
+ return "error_conflict_full_exists_different_short", found_short_for_full
311
+
312
+ # If short_url is provided and not found, or full_url is provided and not found,
313
+ # or neither is found, then create a new entry with the provided short_url and full_url.
314
+ # This effectively allows specifying a custom short_url if it's not already taken.
315
+ url_data = _add_url_to_json(url_data, short_url, full_url)
316
+ if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
317
+ return "created_specific_pair", short_url
318
+ else:
319
+ return "error_upload", None
 
 
 
 
 
320
 
321
  return "error_unhandled_case", None # Should not be reached