Spaces:

dropbop
/

TerraNomaly

Sleeping

App Files Files Community

TerraNomaly / app.py

dropbop

Create app.py test version (small scale example)

465c443 verified 7 months ago

raw

history blame

2.75 kB

	import os
	import glob
	import pandas as pd
	from PIL import Image
	from itertools import islice
	import earthview as ev
	import gradio as gr

	# this only gets the first file in the first shard - you can download more by editing this line
	filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet")
	# this returns an iterator for all files, not sorted
	# filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet"))

	# loads a dataset with pandas, this loads a single file
	# for larger datasets you want to use `dask` which is significantly faster,
	# but I wanted to provide a simple version which only uses dependencies that have already been imported.
	data = pd.read_parquet(filename)

	# transforms a metadata sample to bounds and timestamps, handling revisits
	def item_to_bounds_timestamps(sample):
	# unpack metadata
	bounds_list = sample["metadata"]["bounds"]
	timestamp_list = sample["metadata"]["timestamp"]
	# each sample contains nested metadata
	bounds = []
	timestamps = []
	# return two flat lists
	for b, t in zip(bounds_list, timestamp_list):
	bounds.append(b)
	timestamps.append(t)
	return bounds, timestamps

	# Create an empty list to store ratings
	ratings = []
	image_id = 0 # Initialize image ID counter
	bounds = []
	timestamps = []

	# Limit the number of images to process for the test
	num_images_to_process = 5

	# Iterate through the samples, display, rate, and store info
	data_iter = islice(data.iterrows(), num_images_to_process)
	for index, sample in data_iter:
	rgb = sample["rgb"]
	bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample)
	# iterate through the RGB images (revisits)
	for i, img in enumerate(rgb):
	print("Image ID:", image_id)
	display(img)

	# Get the rating from the user
	while True:
	try:
	rating = int(input("Rate the image (0 or 1): "))
	if rating in [0, 1]:
	break
	else:
	print("Invalid rating. Please enter 0 or 1.")
	except ValueError:
	print("Invalid input. Please enter a number.")

	# Store the rating and other info
	ratings.append(rating)

	# Store the bounds and timestamp
	bounds.append(bounds_sample[i])
	timestamps.append(timestamps_sample[i])

	image_id += 1 # Increment image ID

	# Create a DataFrame from the collected data
	df = pd.DataFrame({
	"image_id": range(image_id),
	"rating": ratings,
	"bounds": bounds,
	"timestamp": timestamps
	})

	# Save the DataFrame to a CSV file
	df.to_csv("image_ratings_test.csv", index=False)

	print("Test complete. Ratings saved to image_ratings_test.csv")