TerraNomaly / app.py
dropbop's picture
Update app.py
fb2c0c4 verified
raw
history blame
5.25 kB
import gradio as gr
import earthview as ev
import utils
import random
import pandas as pd
import os
from itertools import islice
# Configuration
chunk_size = 100 # Size of the chunks to shuffle
label_file = os.path.join(os.path.dirname(__file__), "labels.csv") # Save CSV in the same directory as the script
# Load the Satellogic dataset (streaming)
dataset = ev.load_dataset("satellogic", streaming=True)
data_iter = iter(dataset)
shuffled_chunk = [] # Initialize an empty list to hold the current chunk
chunk_iter = None # Initialize the chunk iterator
# Initialize or load labels DataFrame
labels_df = None
if os.path.exists(label_file):
labels_df = pd.read_csv(label_file)
else:
labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
def get_next_image():
global data_iter, labels_df, shuffled_chunk, chunk_iter
while True:
# If we don't have a current chunk or it's exhausted, get a new one
if not shuffled_chunk or chunk_iter is None:
chunk = list(islice(data_iter, chunk_size))
if not chunk: # If the dataset is exhausted, reset the iterator
print("Dataset exhausted, resetting iterator.")
reset_dataset_iterator() # Use the reset function
chunk = list(islice(data_iter, chunk_size))
if not chunk:
print("Still no data after reset.")
return None, "Dataset exhausted", None, None
random.shuffle(chunk)
shuffled_chunk = chunk
chunk_iter = iter(shuffled_chunk)
try:
sample = next(chunk_iter)
sample = ev.item_to_images("satellogic", sample)
image = sample["rgb"][0]
metadata = sample["metadata"]
bounds = metadata["bounds"]
google_maps_link = utils.get_google_map_link(sample, "satellogic")
image_id = str(bounds)
if labels_df is not None and image_id not in labels_df["image_id"].values:
return image, image_id, bounds, google_maps_link
elif labels_df is None: # Handle case where labels_df is not initialized yet
return image, image_id, bounds, google_maps_link
except StopIteration:
# Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
shuffled_chunk = []
chunk_iter = None
def rate_image(image_id, bounds, rating):
global labels_df
new_row = pd.DataFrame(
{
"image_id": [image_id],
"bounds": [bounds],
"rating": [rating],
"google_maps_link": [""], # this isn't necessary to pass to the function since we aren't updating it here.
}
)
labels_df = pd.concat([labels_df, new_row], ignore_index=True)
labels_df.to_csv(label_file, index=False)
next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
return next_image, next_image_id, next_bounds, next_google_maps_link
def save_labels_parquet():
global labels_df
if labels_df is not None and not labels_df.empty:
table = pa.Table.from_pandas(labels_df)
pq.write_table(table, 'labeled_data.parquet')
return 'labeled_data.parquet'
else:
return None
def reset_dataset_iterator():
global data_iter, shuffled_chunk, chunk_iter
data_iter = iter(ev.load_dataset("satellogic", streaming=True))
shuffled_chunk = []
chunk_iter = None
def load_different_batch():
print("Loading a different batch of images...")
reset_dataset_iterator()
return get_next_image() # Return the first image from the new batch
# Gradio interface
with gr.Blocks() as iface:
image_out = gr.Image(label="Satellite Image")
image_id_out = gr.Textbox(label="Image ID", visible=False)
bounds_out = gr.Textbox(label="Bounds", visible=False)
google_maps_link_out = gr.Textbox(label="Google Maps Link", visible=True)
rating_radio = gr.Radio(["Cool", "Not Cool"], label="Rating")
with gr.Row():
submit_button = gr.Button("Submit Rating")
different_batch_button = gr.Button("Load Different Batch") # New button
download_button = gr.Button("Download Labels (Parquet)")
download_output = gr.File(label="Download Labeled Data")
submit_button.click(
fn=rate_image,
inputs=[image_id_out, bounds_out, rating_radio],
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out],
)
download_button.click(
fn=save_labels_parquet,
inputs=[],
outputs=[download_output],
)
different_batch_button.click(
fn=load_different_batch,
inputs=[],
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out]
)
# Load the first image
initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image()
# Set initial values
if initial_image:
iface.load(lambda: (initial_image, initial_image_id, initial_bounds, initial_google_maps_link),
inputs=None,
outputs=[image_out, image_id_out, bounds_out, google_maps_link_out])
iface.launch(share=True)