dsgt-snakeclef / generate_dummy_testset.py
Anthony Miyaguchi
Add integration testing
867532a
raw
history blame
858 Bytes
import PIL.Image
import pandas as pd
import tempfile
from pathlib import Path
import zipfile
import numpy as np
if __name__ == "__main__":
output_path = "/tmp/data/private_testset.zip"
metadata = pd.read_csv("metadata-subset.csv")
print(metadata)
# create temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
for row in metadata.itertuples():
img = PIL.Image.fromarray(
np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
)
img.save(tmp_path / row.filename)
# create a zip file
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(output_path, "w") as zip_ref:
for file in tmp_path.iterdir():
zip_ref.write(file, f"private_testset/{file.name}")