dropbop commited on
Commit
5ec1d6a
·
verified ·
1 Parent(s): 7d5835f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -36
app.py CHANGED
@@ -4,61 +4,77 @@ import utils
4
  import random
5
  import pandas as pd
6
  import os
 
7
 
8
- # Load the Satellogic dataset
9
- dataset = ev.load_dataset("satellogic", streaming=True).shuffle(seed=42)
10
- data_iter = iter(dataset)
11
-
12
- # File to store labels (will create if it doesn't exist)
13
  label_file = "labels.csv"
14
 
15
- # Initialize a DataFrame to hold labels (or load existing)
 
 
 
 
 
 
16
  if os.path.exists(label_file):
17
  labels_df = pd.read_csv(label_file)
18
  else:
19
  labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
20
 
21
  def get_next_image():
22
- global data_iter, labels_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- while True: # Keep iterating until we find an unlabeled image
25
  try:
26
- sample = next(data_iter)
 
 
 
 
 
 
 
 
 
27
  except StopIteration:
28
- #refresh the dataset if we reach the end
29
- dataset = ev.load_dataset("satellogic", streaming=True).shuffle(seed=random.randint(0, 1000000))
30
- data_iter = iter(dataset)
31
- continue
32
-
33
- sample = ev.item_to_images("satellogic", sample)
34
- image = sample["rgb"][0] # Get the first RGB image
35
- metadata = sample["metadata"]
36
-
37
- bounds = metadata["bounds"]
38
- google_maps_link = utils.get_google_map_link(sample, "satellogic")
39
- #generate a unique image ID:
40
- image_id = (str(bounds))
41
-
42
- # Check if image is already labeled
43
- if image_id not in labels_df["image_id"].values:
44
- return image, image_id, bounds, google_maps_link
45
 
46
  def rate_image(image_id, bounds, rating, google_maps_link):
47
  global labels_df
48
 
49
- # Add the rating to the DataFrame
50
- new_row = pd.DataFrame({"image_id": [image_id], "bounds": [bounds], "rating": [rating], "google_maps_link": [google_maps_link]})
 
 
 
 
 
 
51
  labels_df = pd.concat([labels_df, new_row], ignore_index=True)
52
-
53
- # Save the DataFrame to CSV
54
  labels_df.to_csv(label_file, index=False)
55
 
56
- # Get the next image and its details
57
  next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
58
-
59
  return next_image, next_image_id, next_bounds, next_google_maps_link
60
 
61
- # Define the Gradio interface
62
  iface = gr.Interface(
63
  fn=rate_image,
64
  inputs=[
@@ -78,10 +94,8 @@ iface = gr.Interface(
78
  live=False,
79
  )
80
 
81
- # Get the first image and its details
82
  initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image()
83
 
84
- # Set the initial values for the output components
85
  iface.launch(
86
  share=True,
87
  initial_outputs=[
@@ -90,5 +104,4 @@ iface.launch(
90
  initial_bounds,
91
  initial_google_maps_link,
92
  ],
93
-
94
  )
 
4
  import random
5
  import pandas as pd
6
  import os
7
+ from itertools import islice
8
 
9
+ # Configuration
10
+ chunk_size = 100 # Size of the chunks to shuffle
 
 
 
11
  label_file = "labels.csv"
12
 
13
+ # Load the Satellogic dataset (streaming)
14
+ dataset = ev.load_dataset("satellogic", streaming=True)
15
+ data_iter = iter(dataset)
16
+ shuffled_chunk = [] # Initialize an empty list to hold the current chunk
17
+ chunk_iter = None # Initialize the chunk iterator
18
+
19
+ # Initialize or load labels DataFrame
20
  if os.path.exists(label_file):
21
  labels_df = pd.read_csv(label_file)
22
  else:
23
  labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
24
 
25
  def get_next_image():
26
+ global data_iter, labels_df, shuffled_chunk, chunk_iter
27
+
28
+ while True:
29
+ # If we don't have a current chunk or it's exhausted, get a new one
30
+ if not shuffled_chunk or chunk_iter is None:
31
+ chunk = list(islice(data_iter, chunk_size))
32
+ if not chunk: # If the dataset is exhausted, reset the iterator
33
+ print("Dataset exhausted, resetting iterator.")
34
+ data_iter = iter(ev.load_dataset("satellogic", streaming=True))
35
+ chunk = list(islice(data_iter, chunk_size))
36
+ if not chunk:
37
+ print("Still no data after reset.")
38
+ return None, "Dataset exhausted", None, None
39
+
40
+ random.shuffle(chunk)
41
+ shuffled_chunk = chunk
42
+ chunk_iter = iter(shuffled_chunk)
43
 
 
44
  try:
45
+ sample = next(chunk_iter)
46
+ sample = ev.item_to_images("satellogic", sample)
47
+ image = sample["rgb"][0]
48
+ metadata = sample["metadata"]
49
+ bounds = metadata["bounds"]
50
+ google_maps_link = utils.get_google_map_link(sample, "satellogic")
51
+ image_id = str(bounds)
52
+
53
+ if image_id not in labels_df["image_id"].values:
54
+ return image, image_id, bounds, google_maps_link
55
  except StopIteration:
56
+ # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
57
+ shuffled_chunk = []
58
+ chunk_iter = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def rate_image(image_id, bounds, rating, google_maps_link):
61
  global labels_df
62
 
63
+ new_row = pd.DataFrame(
64
+ {
65
+ "image_id": [image_id],
66
+ "bounds": [bounds],
67
+ "rating": [rating],
68
+ "google_maps_link": [google_maps_link],
69
+ }
70
+ )
71
  labels_df = pd.concat([labels_df, new_row], ignore_index=True)
 
 
72
  labels_df.to_csv(label_file, index=False)
73
 
 
74
  next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
 
75
  return next_image, next_image_id, next_bounds, next_google_maps_link
76
 
77
+ # Gradio interface (no changes needed here)
78
  iface = gr.Interface(
79
  fn=rate_image,
80
  inputs=[
 
94
  live=False,
95
  )
96
 
 
97
  initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image()
98
 
 
99
  iface.launch(
100
  share=True,
101
  initial_outputs=[
 
104
  initial_bounds,
105
  initial_google_maps_link,
106
  ],
 
107
  )