Spaces:
Sleeping
Sleeping
File size: 2,748 Bytes
465c443 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import os
import glob
import pandas as pd
from PIL import Image
from itertools import islice
import earthview as ev
import gradio as gr
# this only gets the first file in the first shard - you can download more by editing this line
filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet")
# this returns an iterator for all files, not sorted
# filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet"))
# loads a dataset with pandas, this loads a single file
# for larger datasets you want to use `dask` which is significantly faster,
# but I wanted to provide a simple version which only uses dependencies that have already been imported.
data = pd.read_parquet(filename)
# transforms a metadata sample to bounds and timestamps, handling revisits
def item_to_bounds_timestamps(sample):
# unpack metadata
bounds_list = sample["metadata"]["bounds"]
timestamp_list = sample["metadata"]["timestamp"]
# each sample contains nested metadata
bounds = []
timestamps = []
# return two flat lists
for b, t in zip(bounds_list, timestamp_list):
bounds.append(b)
timestamps.append(t)
return bounds, timestamps
# Create an empty list to store ratings
ratings = []
image_id = 0 # Initialize image ID counter
bounds = []
timestamps = []
# Limit the number of images to process for the test
num_images_to_process = 5
# Iterate through the samples, display, rate, and store info
data_iter = islice(data.iterrows(), num_images_to_process)
for index, sample in data_iter:
rgb = sample["rgb"]
bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample)
# iterate through the RGB images (revisits)
for i, img in enumerate(rgb):
print("Image ID:", image_id)
display(img)
# Get the rating from the user
while True:
try:
rating = int(input("Rate the image (0 or 1): "))
if rating in [0, 1]:
break
else:
print("Invalid rating. Please enter 0 or 1.")
except ValueError:
print("Invalid input. Please enter a number.")
# Store the rating and other info
ratings.append(rating)
# Store the bounds and timestamp
bounds.append(bounds_sample[i])
timestamps.append(timestamps_sample[i])
image_id += 1 # Increment image ID
# Create a DataFrame from the collected data
df = pd.DataFrame({
"image_id": range(image_id),
"rating": ratings,
"bounds": bounds,
"timestamp": timestamps
})
# Save the DataFrame to a CSV file
df.to_csv("image_ratings_test.csv", index=False)
print("Test complete. Ratings saved to image_ratings_test.csv") |