dropbop commited on
Commit
465c443
·
verified ·
1 Parent(s): 1e627ff

Create app.py test version (small scale example)

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import pandas as pd
4
+ from PIL import Image
5
+ from itertools import islice
6
+ import earthview as ev
7
+ import gradio as gr
8
+
9
+ # this only gets the first file in the first shard - you can download more by editing this line
10
+ filename = os.path.join("dataset", "satellogic", "train-00000-of-00065.parquet")
11
+ # this returns an iterator for all files, not sorted
12
+ # filenames = glob.glob(os.path.join("dataset", "satellogic", "*.parquet"))
13
+
14
+ # loads a dataset with pandas, this loads a single file
15
+ # for larger datasets you want to use `dask` which is significantly faster,
16
+ # but I wanted to provide a simple version which only uses dependencies that have already been imported.
17
+ data = pd.read_parquet(filename)
18
+
19
+ # transforms a metadata sample to bounds and timestamps, handling revisits
20
+ def item_to_bounds_timestamps(sample):
21
+ # unpack metadata
22
+ bounds_list = sample["metadata"]["bounds"]
23
+ timestamp_list = sample["metadata"]["timestamp"]
24
+ # each sample contains nested metadata
25
+ bounds = []
26
+ timestamps = []
27
+ # return two flat lists
28
+ for b, t in zip(bounds_list, timestamp_list):
29
+ bounds.append(b)
30
+ timestamps.append(t)
31
+ return bounds, timestamps
32
+
33
+ # Create an empty list to store ratings
34
+ ratings = []
35
+ image_id = 0 # Initialize image ID counter
36
+ bounds = []
37
+ timestamps = []
38
+
39
+ # Limit the number of images to process for the test
40
+ num_images_to_process = 5
41
+
42
+ # Iterate through the samples, display, rate, and store info
43
+ data_iter = islice(data.iterrows(), num_images_to_process)
44
+ for index, sample in data_iter:
45
+ rgb = sample["rgb"]
46
+ bounds_sample, timestamps_sample = item_to_bounds_timestamps(sample)
47
+ # iterate through the RGB images (revisits)
48
+ for i, img in enumerate(rgb):
49
+ print("Image ID:", image_id)
50
+ display(img)
51
+
52
+ # Get the rating from the user
53
+ while True:
54
+ try:
55
+ rating = int(input("Rate the image (0 or 1): "))
56
+ if rating in [0, 1]:
57
+ break
58
+ else:
59
+ print("Invalid rating. Please enter 0 or 1.")
60
+ except ValueError:
61
+ print("Invalid input. Please enter a number.")
62
+
63
+ # Store the rating and other info
64
+ ratings.append(rating)
65
+
66
+ # Store the bounds and timestamp
67
+ bounds.append(bounds_sample[i])
68
+ timestamps.append(timestamps_sample[i])
69
+
70
+ image_id += 1 # Increment image ID
71
+
72
+ # Create a DataFrame from the collected data
73
+ df = pd.DataFrame({
74
+ "image_id": range(image_id),
75
+ "rating": ratings,
76
+ "bounds": bounds,
77
+ "timestamp": timestamps
78
+ })
79
+
80
+ # Save the DataFrame to a CSV file
81
+ df.to_csv("image_ratings_test.csv", index=False)
82
+
83
+ print("Test complete. Ratings saved to image_ratings_test.csv")