Spaces:
Sleeping
Sleeping
File size: 4,031 Bytes
73c784b d1d1d97 73c784b d7f3fa0 73c784b d1d1d97 d7f3fa0 73c784b d1d1d97 73c784b d1d1d97 73c784b a874957 73c784b d7f3fa0 73c784b a874957 d1d1d97 73c784b a874957 dcc55fe d1d1d97 dcc55fe d1d1d97 dcc55fe 73c784b dcc55fe 73c784b d1d1d97 73c784b a874957 dcc55fe d1d1d97 73c784b dcc55fe a874957 73c784b a874957 d1d1d97 a874957 73c784b d1d1d97 73c784b a874957 73c784b a874957 73c784b a874957 73c784b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
import requests
import shutil
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
logging.info(f"Fetching image IDs for batch {batch_id}")
response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
image_ids = [item["id"].split("!")[1][:14] for item in response["items"]]
logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
return image_ids
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
logging.info(f"Downloading image from {url} to {dest}")
response = requests.get(url, stream=True)
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def download_image_by_image_id(image_id: str):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
def download_batch_images(batch_id: str, workers: int = 2, progress=None):
logging.info(f"Starting download for batch {batch_id}")
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
if progress:
progress(0, desc=f"Starting download for {batch_id}...")
def track_download(image_id):
download_image_by_image_id(image_id)
logging.info(f"Downloaded image {image_id}")
if progress:
# Update progress after each image
current_progress = (image_ids.index(image_id) + 1) / total_images
progress(current_progress, desc=f"Downloading {image_id}...")
with ThreadPoolExecutor(max_workers=workers) as executor:
for image_id in image_ids:
executor.submit(track_download, image_id)
logging.info(f"Zipping downloaded images for batch {batch_id}")
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
zipf.write(img_path, arcname=os.path.basename(img_path))
if progress:
progress(1, desc=f"Completed {batch_id}")
logging.info(f"Completed download and zip for batch {batch_id}")
return zip_filename
def gradio_interface(batch_ids_input, progress=gr.Progress()):
batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
zip_files = []
try:
for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
logging.info(f"Processing batch {batch_id}")
zip_file = download_batch_images(batch_id, progress=progress)
zip_files.append(zip_file)
return zip_files # Return the list of zip files for download
except Exception as e:
logging.error(f"Error processing batches: {e}")
return str(e)
with gr.Blocks() as app:
gr.Markdown("# Batch Image Downloader")
with gr.Row():
with gr.Column():
batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
download_button = gr.Button("Download Images")
with gr.Column():
output_files = gr.File(label="Download Zip Files", file_count="multiple")
download_button.click(
gradio_interface,
inputs=[batch_ids_input],
outputs=[output_files]
)
app.queue()
app.launch()
|