File size: 3,111 Bytes
73c784b
 
 
 
 
 
 
d7f3fa0
 
73c784b
 
d7f3fa0
73c784b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a874957
73c784b
 
 
 
 
 
 
 
d7f3fa0
73c784b
 
 
a874957
73c784b
 
 
a874957
 
73c784b
 
 
a874957
73c784b
 
 
 
 
 
 
 
a874957
 
 
73c784b
 
a874957
 
 
 
73c784b
a874957
 
 
 
73c784b
 
 
 
 
 
 
a874957
 
 
 
 
73c784b
 
 
 
a874957
 
73c784b
 
a874957
73c784b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import requests
import shutil
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile

IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"

def get_image_ids(batch_id: str) -> list[str]:
    """A list of image IDs in the given batch"""
    response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
    response.raise_for_status()
    response = response.json()
    return [item["id"].split("!")[1][:14] for item in response["items"]]

def download_image(url: str, dest: str) -> None:
    """
    Download an image

    Arguments:
        url: Image url
        dest: Destination file name
    """
    response = requests.get(url, stream=True)
    with open(dest, "wb") as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response

def download_image_by_image_id(image_id: str):
    """
    Download the image with the given image ID

    Creates a directory named after the batch ID and saves the image in
    that directory.
    """
    batch_id = image_id[:8]
    os.makedirs(batch_id, exist_ok=True)
    url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
    dest = os.path.join(batch_id, image_id + ".jpg")
    download_image(url, dest)

def download_batch_images(batch_id: str, workers: int = 2, progress=None):
    image_ids = get_image_ids(batch_id)
    total_images = len(image_ids)

    if progress:
        progress(0, desc=f"Starting download for {batch_id}...")
    
    with ThreadPoolExecutor(max_workers=workers) as executor:
        for image_id in image_ids:
            executor.submit(download_image_by_image_id, image_id)

    # Zip the folder with downloaded images
    zip_filename = f"{batch_id}.zip"
    with ZipFile(zip_filename, 'w') as zipf:
        for image_id in image_ids:
            img_path = os.path.join(batch_id, f"{image_id}.jpg")
            zipf.write(img_path, arcname=os.path.basename(img_path))

    if progress:
        progress(1, desc=f"Completed {batch_id}")
        
    return zip_filename

def gradio_interface(batch_ids_input, progress=gr.Progress()):
    batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
    
    zip_files = []
    try:
        for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
            zip_file = download_batch_images(batch_id, progress=progress)
            zip_files.append(zip_file)
        return zip_files  # Return the list of zip files for download
    except Exception as e:
        return str(e)

with gr.Blocks() as app:
    gr.Markdown("# Batch Image Downloader")

    with gr.Row():
        with gr.Column():
            batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
            download_button = gr.Button("Download Images")
        with gr.Column():
            output_files = gr.File(label="Download Zip Files", file_count="multiple")


    download_button.click(
        gradio_interface,
        inputs=[batch_ids_input],
        outputs=[output_files]
    )

app.queue()
app.launch()