TerraNomaly / app.py
dropbop's picture
Create app.py test version (small scale example)
465c443 verified
raw
history blame
2.75 kB
import os
import glob
import pandas as pd
from PIL import Image
from itertools import islice
import earthview as ev
import gradio as gr
# this only gets the first file in the first shard - you can download more by editing this line
filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet")
# this returns an iterator for all files, not sorted
# filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet"))
# loads a dataset with pandas, this loads a single file
# for larger datasets you want to use `dask` which is significantly faster,
# but I wanted to provide a simple version which only uses dependencies that have already been imported.
data = pd.read_parquet(filename)
# transforms a metadata sample to bounds and timestamps, handling revisits
def item_to_bounds_timestamps(sample):
# unpack metadata
bounds_list = sample["metadata"]["bounds"]
timestamp_list = sample["metadata"]["timestamp"]
# each sample contains nested metadata
bounds = []
timestamps = []
# return two flat lists
for b, t in zip(bounds_list, timestamp_list):
bounds.append(b)
timestamps.append(t)
return bounds, timestamps
# Create an empty list to store ratings
ratings = []
image_id = 0 # Initialize image ID counter
bounds = []
timestamps = []
# Limit the number of images to process for the test
num_images_to_process = 5
# Iterate through the samples, display, rate, and store info
data_iter = islice(data.iterrows(), num_images_to_process)
for index, sample in data_iter:
rgb = sample["rgb"]
bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample)
# iterate through the RGB images (revisits)
for i, img in enumerate(rgb):
print("Image ID:", image_id)
display(img)
# Get the rating from the user
while True:
try:
rating = int(input("Rate the image (0 or 1): "))
if rating in [0, 1]:
break
else:
print("Invalid rating. Please enter 0 or 1.")
except ValueError:
print("Invalid input. Please enter a number.")
# Store the rating and other info
ratings.append(rating)
# Store the bounds and timestamp
bounds.append(bounds_sample[i])
timestamps.append(timestamps_sample[i])
image_id += 1 # Increment image ID
# Create a DataFrame from the collected data
df = pd.DataFrame({
"image_id": range(image_id),
"rating": ratings,
"bounds": bounds,
"timestamp": timestamps
})
# Save the DataFrame to a CSV file
df.to_csv("image_ratings_test.csv", index=False)
print("Test complete. Ratings saved to image_ratings_test.csv")