TerraNomaly / app.py
dropbop's picture
Update app.py
fb2c0c4 verified
import gradio as gr
import earthview as ev
import utils
import random
import pandas as pd
import os
from itertools import islice
# Configuration
chunk_size = 100 # Size of the chunks to shuffle
label_file = os.path.join(os.path.dirname(__file__), "labels.csv") # Save CSV in the same directory as the script
# Load the Satellogic dataset (streaming)
dataset = ev.load_dataset("satellogic", streaming=True)
data_iter = iter(dataset)
shuffled_chunk = [] # Initialize an empty list to hold the current chunk
chunk_iter = None # Initialize the chunk iterator
# Initialize or load labels DataFrame
labels_df = None
if os.path.exists(label_file):
labels_df = pd.read_csv(label_file)
else:
labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
def get_next_image():
global data_iter, labels_df, shuffled_chunk, chunk_iter
while True:
# If we don't have a current chunk or it's exhausted, get a new one
if not shuffled_chunk or chunk_iter is None:
chunk = list(islice(data_iter, chunk_size))
if not chunk: # If the dataset is exhausted, reset the iterator
print("Dataset exhausted, resetting iterator.")
reset_dataset_iterator() # Use the reset function
chunk = list(islice(data_iter, chunk_size))
if not chunk:
print("Still no data after reset.")
return None, "Dataset exhausted", None, None
random.shuffle(chunk)
shuffled_chunk = chunk
chunk_iter = iter(shuffled_chunk)
try:
sample = next(chunk_iter)
sample = ev.item_to_images("satellogic", sample)
image = sample["rgb"][0]
metadata = sample["metadata"]
bounds = metadata["bounds"]
google_maps_link = utils.get_google_map_link(sample, "satellogic")
image_id = str(bounds)
if labels_df is not None and image_id not in labels_df["image_id"].values:
return image, image_id, bounds, google_maps_link
elif labels_df is None: # Handle case where labels_df is not initialized yet
return image, image_id, bounds, google_maps_link
except StopIteration:
# Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
shuffled_chunk = []
chunk_iter = None
def rate_image(image_id, bounds, rating):
global labels_df
new_row = pd.DataFrame(
{
"image_id": [image_id],
"bounds": [bounds],
"rating": [rating],
"google_maps_link": [""], # this isn't necessary to pass to the function since we aren't updating it here.
}
)
labels_df = pd.concat([labels_df, new_row], ignore_index=True)
labels_df.to_csv(label_file, index=False)
next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
return next_image, next_image_id, next_bounds, next_google_maps_link
def save_labels_parquet():
global labels_df
if labels_df is not None and not labels_df.empty:
table = pa.Table.from_pandas(labels_df)
pq.write_table(table, 'labeled_data.parquet')
return 'labeled_data.parquet'
else:
return None
def reset_dataset_iterator():
global data_iter, shuffled_chunk, chunk_iter
data_iter = iter(ev.load_dataset("satellogic", streaming=True))
shuffled_chunk = []
chunk_iter = None
def load_different_batch():
print("Loading a different batch of images...")
reset_dataset_iterator()
return get_next_image() # Return the first image from the new batch
# Gradio interface
with gr.Blocks() as iface:
image_out = gr.Image(label="Satellite Image")
image_id_out = gr.Textbox(label="Image ID", visible=False)
bounds_out = gr.Textbox(label="Bounds", visible=False)
google_maps_link_out = gr.Textbox(label="Google Maps Link", visible=True)
rating_radio = gr.Radio(["Cool", "Not Cool"], label="Rating")
with gr.Row():
submit_button = gr.Button("Submit Rating")
different_batch_button = gr.Button("Load Different Batch") # New button
download_button = gr.Button("Download Labels (Parquet)")
download_output = gr.File(label="Download Labeled Data")
submit_button.click(
fn=rate_image,
inputs=[image_id_out, bounds_out, rating_radio],
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out],
)
download_button.click(
fn=save_labels_parquet,
inputs=[],
outputs=[download_output],
)
different_batch_button.click(
fn=load_different_batch,
inputs=[],
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out]
)
# Load the first image
initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image()
# Set initial values
if initial_image:
iface.load(lambda: (initial_image, initial_image_id, initial_bounds, initial_google_maps_link),
inputs=None,
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out])
iface.launch(share=True)