|
"""API custom file upload call wrappers.""" |
|
|
|
import hashlib |
|
import os |
|
import shutil |
|
import tempfile |
|
from copy import copy |
|
from datetime import datetime |
|
from pathlib import Path |
|
from zipfile import ZIP_DEFLATED, ZipFile |
|
|
|
import requests |
|
import typer |
|
from folding_studio_data_models import CustomFileType |
|
from rich import ( |
|
print, |
|
) |
|
from tqdm import tqdm |
|
from tqdm.utils import CallbackIOWrapper |
|
|
|
from folding_studio.config import API_URL, REQUEST_TIMEOUT |
|
|
|
|
|
def _upload_file_to_signed_url( |
|
signed_url: str, |
|
src: str, |
|
headers: dict[str, str], |
|
) -> requests.Response: |
|
"""Upload a local file to a GCS bucket using a signed URL. |
|
|
|
Use a PUT request. |
|
|
|
Args: |
|
signed_url (str): the signed URL corresponding to the GCS path. |
|
src (src | Path): the local file path. |
|
headers (dict[str, str]): HTTP request headers. |
|
Raises: |
|
requests.exceptions.HTTPError: if something went wrong during the uploading. |
|
Returns: |
|
A response to the PUT request. |
|
""" |
|
|
|
put_headers = copy(headers) |
|
put_headers["Content-type"] = "application/octet-stream" |
|
file_size = os.path.getsize(src) |
|
with open(src, "rb") as fd: |
|
with tqdm( |
|
desc=f"Uploading {src}", |
|
total=file_size, |
|
unit="B", |
|
unit_scale=True, |
|
unit_divisor=1024, |
|
) as t: |
|
reader_wrapper = CallbackIOWrapper(t.update, fd, "read") |
|
response = requests.put( |
|
url=signed_url, |
|
data=reader_wrapper, |
|
headers=put_headers, |
|
) |
|
response.raise_for_status() |
|
return response |
|
|
|
|
|
def _get_blob_name_from_file_content(src: str | Path) -> str: |
|
"""Get a unique file name based on its content. |
|
|
|
This file name is used as blob name when uploading the file to a bucket. |
|
|
|
Args: |
|
src (str | Path): Path to local file. |
|
|
|
Returns: |
|
The unique blob name. |
|
""" |
|
src = Path(src) |
|
file_hash = hashlib.md5() |
|
with src.open("rb") as fd: |
|
fd.seek(0) |
|
while chunk := fd.read(8192): |
|
file_hash.update(chunk) |
|
|
|
hexcode = file_hash.hexdigest()[:8] |
|
|
|
|
|
|
|
file_stem = src.name.split(".")[0] |
|
suffix = "".join(src.suffixes) |
|
return f"{file_stem}_{hexcode}{suffix}" |
|
|
|
|
|
def _copy_and_zip_files( |
|
file_list: list[Path], |
|
temp_dir: tempfile.TemporaryDirectory, |
|
zip_name: str = "files.zip", |
|
): |
|
""" |
|
Copies a list of files to a temporary directory and zips them into one archive |
|
with the highest compression level. |
|
|
|
Args: |
|
file_list (list): List of file paths to be copied and zipped. |
|
temp_dir: (TemporaryDirectory): Path to the temporary directory. |
|
zip_name (str): Name of the resulting zip file. |
|
|
|
Returns: |
|
str: Path to the created zip file. |
|
""" |
|
to_zip = [] |
|
for file_path in file_list: |
|
if file_path.is_file(): |
|
blob_name = _get_blob_name_from_file_content(src=file_path) |
|
dest_file = os.path.join(temp_dir, blob_name) |
|
shutil.copy(file_path, dest_file) |
|
to_zip.append(dest_file) |
|
else: |
|
print(f"Warning: {file_path} does not exist or is not a file.") |
|
|
|
zip_path = os.path.join(temp_dir, zip_name) |
|
with ZipFile(zip_path, "w", compression=ZIP_DEFLATED, compresslevel=7) as zipf: |
|
for file_name in to_zip: |
|
zipf.write(file_name, arcname=Path(file_name).name) |
|
return zip_path |
|
|
|
|
|
def _get_blob_zip_name(file_type: str): |
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") |
|
return f"{file_type}_files_{timestamp}.zip" |
|
|
|
|
|
def upload_custom_files( |
|
headers: dict[str, str], |
|
paths: list[Path], |
|
file_type: CustomFileType, |
|
) -> dict[str, str]: |
|
"""Upload custom files to the GCS bucket. |
|
Args: |
|
headers (dict[str, str]): HTTP request headers. |
|
paths (list[Path]): List of custom template files path. |
|
file_type (CustomFileType): Type of file to upload. |
|
Raises: |
|
typer.Exit: If an error occurs during the API call. |
|
Returns: |
|
dict[str, str]: Mapping of local filenames to GCS paths. |
|
""" |
|
|
|
url = API_URL + "getUploadSignedURL" |
|
|
|
paths = set(paths) |
|
print(f"Uploading {len(paths)}: {tuple(str(p) for p in paths)}.") |
|
|
|
blobs = [_get_blob_name_from_file_content(src=file) for file in paths] |
|
|
|
|
|
blob_zip = _get_blob_zip_name(file_type.value) |
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
zip_path = _copy_and_zip_files( |
|
file_list=paths, |
|
temp_dir=temp_dir, |
|
zip_name=blob_zip, |
|
) |
|
|
|
url_response = requests.get( |
|
url, |
|
params={ |
|
"blob_name": blob_zip, |
|
"file_type": file_type.value, |
|
}, |
|
headers=headers, |
|
timeout=REQUEST_TIMEOUT, |
|
) |
|
|
|
if not url_response.ok: |
|
print(f"Error while generating signed URL: {url_response.content.decode()}") |
|
raise typer.Exit(code=1) |
|
|
|
json_response = url_response.json() |
|
signed_url = json_response["signed_url"] |
|
|
|
upload_response = _upload_file_to_signed_url( |
|
signed_url=signed_url, src=zip_path, headers=headers |
|
) |
|
if not upload_response.ok: |
|
print(f"Error while uploading {zip_path}.") |
|
raise typer.Exit(code=1) |
|
|
|
|
|
unzip_response = requests.post( |
|
API_URL + "unzipFileInBucket", |
|
params={ |
|
"zip_file_path": json_response["destination_file"], |
|
}, |
|
headers=headers, |
|
timeout=REQUEST_TIMEOUT, |
|
) |
|
if not unzip_response.ok: |
|
print(f"Error while unzip custom files: {unzip_response.content.decode()}") |
|
raise typer.Exit(code=1) |
|
|
|
local_to_gcs = { |
|
str(file): f"{json_response['destination_bucket']}/{blob_name}" |
|
for file, blob_name in zip(paths, blobs) |
|
} |
|
print("Custom files successfully uploaded.") |
|
return local_to_gcs |
|
|