File size: 2,748 Bytes
465c443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import glob
import pandas as pd
from PIL import Image
from itertools import islice
import earthview as ev
import gradio as gr

# this only gets the first file in the first shard - you can download more by editing this line
filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet")
# this returns an iterator for all files, not sorted
# filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet"))

# loads a dataset with pandas, this loads a single file
# for larger datasets you want to use `dask` which is significantly faster,
# but I wanted to provide a simple version which only uses dependencies that have already been imported.
data = pd.read_parquet(filename)

# transforms a metadata sample to bounds and timestamps, handling revisits
def item_to_bounds_timestamps(sample):
    # unpack metadata
    bounds_list = sample["metadata"]["bounds"]
    timestamp_list = sample["metadata"]["timestamp"]
    # each sample contains nested metadata
    bounds = []
    timestamps = []
    # return two flat lists
    for b, t in zip(bounds_list, timestamp_list):
        bounds.append(b)
        timestamps.append(t)
    return bounds, timestamps

# Create an empty list to store ratings
ratings = []
image_id = 0  # Initialize image ID counter
bounds = []
timestamps = []

# Limit the number of images to process for the test
num_images_to_process = 5

# Iterate through the samples, display, rate, and store info
data_iter = islice(data.iterrows(), num_images_to_process)
for index, sample in data_iter:
    rgb = sample["rgb"]
    bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample)
    # iterate through the RGB images (revisits)
    for i, img in enumerate(rgb):
        print("Image ID:", image_id)
        display(img)

        # Get the rating from the user
        while True:
            try:
                rating = int(input("Rate the image (0 or 1): "))
                if rating in [0, 1]:
                    break
                else:
                    print("Invalid rating. Please enter 0 or 1.")
            except ValueError:
                print("Invalid input. Please enter a number.")

        # Store the rating and other info
        ratings.append(rating)

        # Store the bounds and timestamp
        bounds.append(bounds_sample[i])
        timestamps.append(timestamps_sample[i])

        image_id += 1  # Increment image ID

# Create a DataFrame from the collected data
df = pd.DataFrame({
    "image_id": range(image_id),
    "rating": ratings,
    "bounds": bounds,
    "timestamp": timestamps
})

# Save the DataFrame to a CSV file
df.to_csv("image_ratings_test.csv", index=False)

print("Test complete. Ratings saved to image_ratings_test.csv")