Spaces:
Sleeping
Sleeping
import os | |
import glob | |
import pandas as pd | |
from PIL import Image | |
from itertools import islice | |
import earthview as ev | |
import gradio as gr | |
# this only gets the first file in the first shard - you can download more by editing this line | |
filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet") | |
# this returns an iterator for all files, not sorted | |
# filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet")) | |
# loads a dataset with pandas, this loads a single file | |
# for larger datasets you want to use `dask` which is significantly faster, | |
# but I wanted to provide a simple version which only uses dependencies that have already been imported. | |
data = pd.read_parquet(filename) | |
# transforms a metadata sample to bounds and timestamps, handling revisits | |
def item_to_bounds_timestamps(sample): | |
# unpack metadata | |
bounds_list = sample["metadata"]["bounds"] | |
timestamp_list = sample["metadata"]["timestamp"] | |
# each sample contains nested metadata | |
bounds = [] | |
timestamps = [] | |
# return two flat lists | |
for b, t in zip(bounds_list, timestamp_list): | |
bounds.append(b) | |
timestamps.append(t) | |
return bounds, timestamps | |
# Create an empty list to store ratings | |
ratings = [] | |
image_id = 0 # Initialize image ID counter | |
bounds = [] | |
timestamps = [] | |
# Limit the number of images to process for the test | |
num_images_to_process = 5 | |
# Iterate through the samples, display, rate, and store info | |
data_iter = islice(data.iterrows(), num_images_to_process) | |
for index, sample in data_iter: | |
rgb = sample["rgb"] | |
bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample) | |
# iterate through the RGB images (revisits) | |
for i, img in enumerate(rgb): | |
print("Image ID:", image_id) | |
display(img) | |
# Get the rating from the user | |
while True: | |
try: | |
rating = int(input("Rate the image (0 or 1): ")) | |
if rating in [0, 1]: | |
break | |
else: | |
print("Invalid rating. Please enter 0 or 1.") | |
except ValueError: | |
print("Invalid input. Please enter a number.") | |
# Store the rating and other info | |
ratings.append(rating) | |
# Store the bounds and timestamp | |
bounds.append(bounds_sample[i]) | |
timestamps.append(timestamps_sample[i]) | |
image_id += 1 # Increment image ID | |
# Create a DataFrame from the collected data | |
df = pd.DataFrame({ | |
"image_id": range(image_id), | |
"rating": ratings, | |
"bounds": bounds, | |
"timestamp": timestamps | |
}) | |
# Save the DataFrame to a CSV file | |
df.to_csv("image_ratings_test.csv", index=False) | |
print("Test complete. Ratings saved to image_ratings_test.csv") |