dropbop commited on
Commit
09f8048
·
verified ·
1 Parent(s): df0f588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -1
app.py CHANGED
@@ -6,7 +6,73 @@ import pandas as pd
6
  import os
7
  from itertools import islice
8
 
9
- # ... (rest of your code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Gradio interface (remove initial_outputs from launch)
12
  iface = gr.Interface(
 
6
  import os
7
  from itertools import islice
8
 
9
+ # Configuration
10
+ chunk_size = 100 # Size of the chunks to shuffle
11
+ label_file = "labels.csv"
12
+
13
+ # Load the Satellogic dataset (streaming)
14
+ dataset = ev.load_dataset("satellogic", streaming=True)
15
+ data_iter = iter(dataset)
16
+ shuffled_chunk = [] # Initialize an empty list to hold the current chunk
17
+ chunk_iter = None # Initialize the chunk iterator
18
+
19
+ # Initialize or load labels DataFrame
20
+ if os.path.exists(label_file):
21
+ labels_df = pd.read_csv(label_file)
22
+ else:
23
+ labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
24
+
25
+ def get_next_image():
26
+ global data_iter, labels_df, shuffled_chunk, chunk_iter
27
+
28
+ while True:
29
+ # If we don't have a current chunk or it's exhausted, get a new one
30
+ if not shuffled_chunk or chunk_iter is None:
31
+ chunk = list(islice(data_iter, chunk_size))
32
+ if not chunk: # If the dataset is exhausted, reset the iterator
33
+ print("Dataset exhausted, resetting iterator.")
34
+ data_iter = iter(ev.load_dataset("satellogic", streaming=True))
35
+ chunk = list(islice(data_iter, chunk_size))
36
+ if not chunk:
37
+ print("Still no data after reset.")
38
+ return None, "Dataset exhausted", None, None
39
+
40
+ random.shuffle(chunk)
41
+ shuffled_chunk = chunk
42
+ chunk_iter = iter(shuffled_chunk)
43
+
44
+ try:
45
+ sample = next(chunk_iter)
46
+ sample = ev.item_to_images("satellogic", sample)
47
+ image = sample["rgb"][0]
48
+ metadata = sample["metadata"]
49
+ bounds = metadata["bounds"]
50
+ google_maps_link = utils.get_google_map_link(sample, "satellogic")
51
+ image_id = str(bounds)
52
+
53
+ if image_id not in labels_df["image_id"].values:
54
+ return image, image_id, bounds, google_maps_link
55
+ except StopIteration:
56
+ # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
57
+ shuffled_chunk = []
58
+ chunk_iter = None
59
+
60
+ def rate_image(image_id, bounds, rating, google_maps_link):
61
+ global labels_df
62
+
63
+ new_row = pd.DataFrame(
64
+ {
65
+ "image_id": [image_id],
66
+ "bounds": [bounds],
67
+ "rating": [rating],
68
+ "google_maps_link": [google_maps_link],
69
+ }
70
+ )
71
+ labels_df = pd.concat([labels_df, new_row], ignore_index=True)
72
+ labels_df.to_csv(label_file, index=False)
73
+
74
+ next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
75
+ return next_image, next_image_id, next_bounds, next_google_maps_link
76
 
77
  # Gradio interface (remove initial_outputs from launch)
78
  iface = gr.Interface(