iiif_downloader / app.py
Gabriel's picture
Update app.py
d1d1d97 verified
raw
history blame
4.03 kB
import os
import requests
import shutil
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
logging.info(f"Fetching image IDs for batch {batch_id}")
response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
image_ids = [item["id"].split("!")[1][:14] for item in response["items"]]
logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
return image_ids
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
logging.info(f"Downloading image from {url} to {dest}")
response = requests.get(url, stream=True)
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def download_image_by_image_id(image_id: str):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
def download_batch_images(batch_id: str, workers: int = 2, progress=None):
logging.info(f"Starting download for batch {batch_id}")
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
if progress:
progress(0, desc=f"Starting download for {batch_id}...")
def track_download(image_id):
download_image_by_image_id(image_id)
logging.info(f"Downloaded image {image_id}")
if progress:
# Update progress after each image
current_progress = (image_ids.index(image_id) + 1) / total_images
progress(current_progress, desc=f"Downloading {image_id}...")
with ThreadPoolExecutor(max_workers=workers) as executor:
for image_id in image_ids:
executor.submit(track_download, image_id)
logging.info(f"Zipping downloaded images for batch {batch_id}")
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
zipf.write(img_path, arcname=os.path.basename(img_path))
if progress:
progress(1, desc=f"Completed {batch_id}")
logging.info(f"Completed download and zip for batch {batch_id}")
return zip_filename
def gradio_interface(batch_ids_input, progress=gr.Progress()):
batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
zip_files = []
try:
for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
logging.info(f"Processing batch {batch_id}")
zip_file = download_batch_images(batch_id, progress=progress)
zip_files.append(zip_file)
return zip_files # Return the list of zip files for download
except Exception as e:
logging.error(f"Error processing batches: {e}")
return str(e)
with gr.Blocks() as app:
gr.Markdown("# Batch Image Downloader")
with gr.Row():
with gr.Column():
batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
download_button = gr.Button("Download Images")
with gr.Column():
output_files = gr.File(label="Download Zip Files", file_count="multiple")
download_button.click(
gradio_interface,
inputs=[batch_ids_input],
outputs=[output_files]
)
app.queue()
app.launch()