File size: 4,031 Bytes
73c784b
 
 
 
 
 
d1d1d97
 
 
73c784b
d7f3fa0
 
73c784b
 
d1d1d97
d7f3fa0
73c784b
 
d1d1d97
 
 
73c784b
 
 
 
 
 
 
 
d1d1d97
73c784b
 
 
 
 
a874957
73c784b
 
 
 
 
 
 
d7f3fa0
73c784b
 
 
a874957
d1d1d97
73c784b
 
 
a874957
 
dcc55fe
 
 
d1d1d97
dcc55fe
 
d1d1d97
dcc55fe
 
73c784b
 
dcc55fe
73c784b
d1d1d97
73c784b
 
 
 
 
 
a874957
 
dcc55fe
d1d1d97
73c784b
 
dcc55fe
a874957
 
 
 
73c784b
a874957
d1d1d97
a874957
 
 
73c784b
d1d1d97
73c784b
 
 
 
 
 
a874957
 
 
 
 
73c784b
 
 
 
a874957
 
73c784b
 
a874957
73c784b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import requests
import shutil
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"

def get_image_ids(batch_id: str) -> list[str]:
    """A list of image IDs in the given batch"""
    logging.info(f"Fetching image IDs for batch {batch_id}")
    response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
    response.raise_for_status()
    response = response.json()
    image_ids = [item["id"].split("!")[1][:14] for item in response["items"]]
    logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
    return image_ids

def download_image(url: str, dest: str) -> None:
    """
    Download an image
    Arguments:
        url: Image url
        dest: Destination file name
    """
    logging.info(f"Downloading image from {url} to {dest}")
    response = requests.get(url, stream=True)
    with open(dest, "wb") as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response

def download_image_by_image_id(image_id: str):
    """
    Download the image with the given image ID
    Creates a directory named after the batch ID and saves the image in
    that directory.
    """
    batch_id = image_id[:8]
    os.makedirs(batch_id, exist_ok=True)
    url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
    dest = os.path.join(batch_id, image_id + ".jpg")
    download_image(url, dest)

def download_batch_images(batch_id: str, workers: int = 2, progress=None):
    logging.info(f"Starting download for batch {batch_id}")
    image_ids = get_image_ids(batch_id)
    total_images = len(image_ids)

    if progress:
        progress(0, desc=f"Starting download for {batch_id}...")

    def track_download(image_id):
        download_image_by_image_id(image_id)
        logging.info(f"Downloaded image {image_id}")
        if progress:
            # Update progress after each image
            current_progress = (image_ids.index(image_id) + 1) / total_images
            progress(current_progress, desc=f"Downloading {image_id}...")

    with ThreadPoolExecutor(max_workers=workers) as executor:
        for image_id in image_ids:
            executor.submit(track_download, image_id)

    logging.info(f"Zipping downloaded images for batch {batch_id}")
    zip_filename = f"{batch_id}.zip"
    with ZipFile(zip_filename, 'w') as zipf:
        for image_id in image_ids:
            img_path = os.path.join(batch_id, f"{image_id}.jpg")
            zipf.write(img_path, arcname=os.path.basename(img_path))

    if progress:
        progress(1, desc=f"Completed {batch_id}")

    logging.info(f"Completed download and zip for batch {batch_id}")
    return zip_filename


def gradio_interface(batch_ids_input, progress=gr.Progress()):
    batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
    
    zip_files = []
    try:
        for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
            logging.info(f"Processing batch {batch_id}")
            zip_file = download_batch_images(batch_id, progress=progress)
            zip_files.append(zip_file)
        return zip_files  # Return the list of zip files for download
    except Exception as e:
        logging.error(f"Error processing batches: {e}")
        return str(e)

with gr.Blocks() as app:
    gr.Markdown("# Batch Image Downloader")

    with gr.Row():
        with gr.Column():
            batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
            download_button = gr.Button("Download Images")
        with gr.Column():
            output_files = gr.File(label="Download Zip Files", file_count="multiple")


    download_button.click(
        gradio_interface,
        inputs=[batch_ids_input],
        outputs=[output_files]
    )

app.queue()
app.launch()