latterworks commited on
Commit
9d4c4f1
·
verified ·
1 Parent(s): b63d232

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -44
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import torch
3
  import numpy as np
4
  import folium
@@ -8,45 +7,78 @@ import os
8
  import PIL.Image
9
  from io import BytesIO
10
  import base64
11
- from typing import Tuple, List, Dict, Any, Optional, Union
 
 
12
  from pathlib import Path
 
13
 
14
  # GeoCLIP dependencies
15
  from geoclip import GeoCLIP
16
  from transformers import CLIPTokenizer, CLIPProcessor
 
17
 
18
 
19
  class GeoCLIPCore:
20
  """
21
- Vectorized GeoCLIP implementation with minimal compute overhead.
22
 
23
- Implements tensor-optimized inference for:
24
- 1. Text-to-location prediction
25
- 2. Image-to-location prediction
26
- 3. Coordinate embedding generation
27
- 4. Cross-modal similarity analysis
 
28
  """
29
 
30
- def __init__(self, device: Optional[str] = None) -> None:
 
 
 
31
  """
32
- Initialize model with optimal compute resource allocation.
33
 
34
  Args:
35
  device: Target compute device (None for auto-detection)
 
 
36
  """
37
  self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
38
 
39
- # Load and configure core model components
40
  self._model = GeoCLIP().to(self.device)
41
  self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
42
  self._processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
43
 
44
- # Cache frequently used components for performance
45
  self._location_encoder = self._model.location_encoder
46
  self._image_encoder = self._model.image_encoder
47
  self._gps_gallery = None # Lazy-loaded on first prediction
48
 
49
- print(f"GeoCLIP initialized on {self.device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def embed_text(self, text: str) -> torch.Tensor:
52
  """
@@ -76,15 +108,15 @@ class GeoCLIPCore:
76
  L2-normalized embedding tensor (shape: [1, 512])
77
  """
78
  with torch.no_grad():
79
- # Process different image input types
80
  if isinstance(image, str):
81
  # Path to image file
82
  image = PIL.Image.open(image).convert("RGB")
83
  elif isinstance(image, np.ndarray):
84
- # Convert numpy array to PIL Image
85
  image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
86
 
87
- # Process image using CLIP processor
88
  inputs = self._processor(images=image, return_tensors="pt").to(self.device)
89
  embedding = self._model.image_encoder(inputs.pixel_values)
90
  return torch.nn.functional.normalize(embedding, dim=1)
@@ -104,7 +136,7 @@ class GeoCLIPCore:
104
  embedding = self._location_encoder(coords_tensor)
105
  return torch.nn.functional.normalize(embedding, dim=1)
106
 
107
- def _ensure_gps_gallery(self):
108
  """Ensure GPS gallery is loaded and cached for efficient reuse."""
109
  if self._gps_gallery is None:
110
  self._gps_gallery = self._model.gps_gallery.to(self.device)
@@ -123,21 +155,21 @@ class GeoCLIPCore:
123
  List of prediction dictionaries with coordinates and confidence scores
124
  """
125
  with torch.no_grad():
126
- # Ensure GPS gallery is loaded
127
  self._ensure_gps_gallery()
128
 
129
- # Generate location embeddings
130
  location_embeddings = self._location_encoder(self._gps_gallery)
131
  location_embeddings = torch.nn.functional.normalize(location_embeddings, dim=1)
132
 
133
- # Calculate similarity and softmax probabilities
134
  similarity = self._model.logit_scale.exp() * (query_embedding @ location_embeddings.T)
135
  probs = similarity.softmax(dim=-1)
136
 
137
- # Extract top predictions
138
  top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
139
 
140
- # Format results
141
  predictions = []
142
  for idx, confidence in zip(top_indices.cpu().numpy(), top_values.cpu().numpy()):
143
  predictions.append({
@@ -161,7 +193,9 @@ class GeoCLIPCore:
161
  embedding = self.embed_text(text)
162
  return self.predict_location(embedding, top_k)
163
 
164
- def image_to_location(self, image: Union[str, PIL.Image.Image, np.ndarray], top_k: int = 5) -> List[Dict[str, Any]]:
 
 
165
  """
166
  Primary entry point for image-to-location prediction pipeline.
167
 
@@ -175,6 +209,151 @@ class GeoCLIPCore:
175
  embedding = self.embed_image(image)
176
  return self.predict_location(embedding, top_k)
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def compute_similarity(self, embed1: torch.Tensor, embed2: torch.Tensor) -> float:
179
  """
180
  Compute cosine similarity between two embeddings.
@@ -238,17 +417,22 @@ class GeoCLIPCore:
238
  return m
239
 
240
 
241
- def launch_gradio_interface():
242
- """Deploy GeoCLIP with Gradio interface for both text and image inputs."""
 
 
 
 
 
243
  # Initialize model with optimal compute configuration
244
- geo_core = GeoCLIPCore()
245
 
246
  def predict_from_text(text_query, top_k):
247
- """Process text query and generate visualization."""
248
  if not text_query.strip():
249
  return None, "Please enter a location description."
250
 
251
- # Execute prediction pipeline
252
  predictions = geo_core.text_to_location(text_query, top_k=int(top_k))
253
 
254
  # Generate map visualization
@@ -257,7 +441,7 @@ def launch_gradio_interface():
257
  title=f"Predictions for: {text_query}"
258
  )
259
 
260
- # Create temporary HTML file for map
261
  map_html = m._repr_html_()
262
 
263
  # Format textual results
@@ -269,12 +453,16 @@ def launch_gradio_interface():
269
 
270
  return map_html, result_text
271
 
272
- def predict_from_image(image, top_k):
273
- """Process image input and generate visualization."""
 
 
 
 
274
  if image is None:
275
- return None, "Please upload an image."
276
 
277
- # Execute prediction pipeline
278
  predictions = geo_core.image_to_location(image, top_k=int(top_k))
279
 
280
  # Generate map visualization
@@ -283,7 +471,7 @@ def launch_gradio_interface():
283
  title="Predictions from Image"
284
  )
285
 
286
- # Create temporary HTML file for map
287
  map_html = m._repr_html_()
288
 
289
  # Format textual results
@@ -293,7 +481,25 @@ def launch_gradio_interface():
293
  conf = pred["confidence"]
294
  result_text += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - confidence: {conf:.6f}\n"
295
 
296
- return map_html, result_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
  def compute_text_similarity(text1, text2):
299
  """Compute semantic similarity between two text descriptions."""
@@ -308,8 +514,8 @@ def launch_gradio_interface():
308
 
309
  # Create Gradio interface with tabs for different functions
310
  with gr.Blocks(title="GeoCLIP Location Intelligence") as demo:
311
- gr.Markdown("# GeoCLIP Location Intelligence")
312
- gr.Markdown("Predict locations from text descriptions or images.")
313
 
314
  with gr.Tabs():
315
  with gr.TabItem("Text → Location"):
@@ -352,10 +558,14 @@ def launch_gradio_interface():
352
  outputs=[text_map_output, text_result_output]
353
  )
354
 
355
- with gr.TabItem("Image → Location"):
356
  with gr.Row():
357
  with gr.Column():
358
  image_input = gr.Image(type="pil", label="Upload Image")
 
 
 
 
359
  image_top_k = gr.Slider(
360
  minimum=1,
361
  maximum=20,
@@ -363,15 +573,16 @@ def launch_gradio_interface():
363
  step=1,
364
  label="Number of Predictions"
365
  )
366
- image_submit = gr.Button("Predict Location")
367
 
368
  image_map_output = gr.HTML(label="Map Visualization")
369
  image_result_output = gr.Textbox(label="Prediction Results")
 
370
 
371
  image_submit.click(
372
- predict_from_image,
373
- inputs=[image_input, image_top_k],
374
- outputs=[image_map_output, image_result_output]
375
  )
376
 
377
  with gr.TabItem("Semantic Similarity"):
@@ -393,11 +604,31 @@ def launch_gradio_interface():
393
  inputs=[text1_input, text2_input],
394
  outputs=similarity_output
395
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
  # Launch Gradio interface with optimized server settings
398
- demo.launch(share=True, server_name="0.0.0.0")
399
 
400
 
401
  if __name__ == "__main__":
 
 
 
402
  # Execute vectorized deployment pipeline
403
- launch_gradio_interface()
 
 
1
  import torch
2
  import numpy as np
3
  import folium
 
7
  import PIL.Image
8
  from io import BytesIO
9
  import base64
10
+ import json
11
+ import time
12
+ from typing import Tuple, List, Dict, Any, Optional, Union, Callable
13
  from pathlib import Path
14
+ from datasets import Dataset, load_dataset, concatenate_datasets
15
 
16
  # GeoCLIP dependencies
17
  from geoclip import GeoCLIP
18
  from transformers import CLIPTokenizer, CLIPProcessor
19
+ from huggingface_hub import HfApi
20
 
21
 
22
  class GeoCLIPCore:
23
  """
24
+ Vectorized GeoCLIP implementation with HuggingFace Hub integration.
25
 
26
+ Implements tensor-optimized inference with persistent dataset storage:
27
+ 1. Text-to-location prediction with confidence scoring
28
+ 2. Image-to-location prediction with metadata extraction
29
+ 3. Coordinate embedding generation for vector analysis
30
+ 4. Cross-modal similarity computation
31
+ 5. Dataset persistence to HuggingFace Hub
32
  """
33
 
34
+ def __init__(self,
35
+ device: Optional[str] = None,
36
+ dataset_id: str = "latterworks/geo-metadata",
37
+ token: Optional[str] = None) -> None:
38
  """
39
+ Initialize model with optimal compute allocation and dataset connection.
40
 
41
  Args:
42
  device: Target compute device (None for auto-detection)
43
+ dataset_id: HuggingFace dataset identifier
44
+ token: HuggingFace API token
45
  """
46
  self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
47
+ self.dataset_id = dataset_id
48
+ self.token = token
49
+
50
+ # Initialize HuggingFace API for dataset operations
51
+ self.api = HfApi(token=token)
52
 
53
+ # Load and configure core model components with vectorized execution path
54
  self._model = GeoCLIP().to(self.device)
55
  self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
56
  self._processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
57
 
58
+ # Cache frequently accessed components for reduced latency
59
  self._location_encoder = self._model.location_encoder
60
  self._image_encoder = self._model.image_encoder
61
  self._gps_gallery = None # Lazy-loaded on first prediction
62
 
63
+ # Initialize local dataset cache
64
+ self._initialize_dataset()
65
+
66
+ print(f"GeoCLIP initialized on {self.device} with Hub dataset: {dataset_id}")
67
+
68
+ def _initialize_dataset(self) -> None:
69
+ """Initialize connection to HuggingFace dataset with atomic transaction handling."""
70
+ try:
71
+ # Attempt to load existing dataset
72
+ self.dataset = load_dataset(self.dataset_id, split="train", token=self.token)
73
+ print(f"Loaded existing dataset with {len(self.dataset)} entries")
74
+ except Exception as e:
75
+ print(f"Creating new dataset: {e}")
76
+ # Create empty dataset with required schema
77
+ self.dataset = Dataset.from_dict({
78
+ "filename": [],
79
+ "classes": [],
80
+ "metadata": []
81
+ })
82
 
83
  def embed_text(self, text: str) -> torch.Tensor:
84
  """
 
108
  L2-normalized embedding tensor (shape: [1, 512])
109
  """
110
  with torch.no_grad():
111
+ # Process different image input types with type-specific optimizations
112
  if isinstance(image, str):
113
  # Path to image file
114
  image = PIL.Image.open(image).convert("RGB")
115
  elif isinstance(image, np.ndarray):
116
+ # Convert numpy array to PIL Image with optimal memory layout
117
  image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
118
 
119
+ # Process image using CLIP processor with tensor allocation
120
  inputs = self._processor(images=image, return_tensors="pt").to(self.device)
121
  embedding = self._model.image_encoder(inputs.pixel_values)
122
  return torch.nn.functional.normalize(embedding, dim=1)
 
136
  embedding = self._location_encoder(coords_tensor)
137
  return torch.nn.functional.normalize(embedding, dim=1)
138
 
139
+ def _ensure_gps_gallery(self) -> None:
140
  """Ensure GPS gallery is loaded and cached for efficient reuse."""
141
  if self._gps_gallery is None:
142
  self._gps_gallery = self._model.gps_gallery.to(self.device)
 
155
  List of prediction dictionaries with coordinates and confidence scores
156
  """
157
  with torch.no_grad():
158
+ # Ensure GPS gallery is loaded with resource pooling
159
  self._ensure_gps_gallery()
160
 
161
+ # Generate location embeddings with memory-efficient tensor operations
162
  location_embeddings = self._location_encoder(self._gps_gallery)
163
  location_embeddings = torch.nn.functional.normalize(location_embeddings, dim=1)
164
 
165
+ # Calculate similarity with vectorized matrix multiplication
166
  similarity = self._model.logit_scale.exp() * (query_embedding @ location_embeddings.T)
167
  probs = similarity.softmax(dim=-1)
168
 
169
+ # Extract top predictions with single tensor operation
170
  top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
171
 
172
+ # Format results with CPU offloading
173
  predictions = []
174
  for idx, confidence in zip(top_indices.cpu().numpy(), top_values.cpu().numpy()):
175
  predictions.append({
 
193
  embedding = self.embed_text(text)
194
  return self.predict_location(embedding, top_k)
195
 
196
+ def image_to_location(self,
197
+ image: Union[str, PIL.Image.Image, np.ndarray],
198
+ top_k: int = 5) -> List[Dict[str, Any]]:
199
  """
200
  Primary entry point for image-to-location prediction pipeline.
201
 
 
209
  embedding = self.embed_image(image)
210
  return self.predict_location(embedding, top_k)
211
 
212
+ def extract_image_metadata(self, image_path: str) -> Dict[str, Any]:
213
+ """
214
+ Extract comprehensive metadata from image file with GPS coordinates.
215
+
216
+ Args:
217
+ image_path: Path to image file
218
+
219
+ Returns:
220
+ Dictionary containing extracted metadata
221
+ """
222
+ try:
223
+ from PIL import Image, ExifTags
224
+ import piexif
225
+
226
+ # Open image and extract EXIF data with efficient memory mapping
227
+ img = Image.open(image_path)
228
+ metadata = {"file_name": image_path, "file_size": os.path.getsize(image_path)}
229
+
230
+ # Extract basic image properties
231
+ metadata["format"] = img.format
232
+ metadata["mode"] = img.mode
233
+ metadata["size"] = list(img.size)
234
+
235
+ if hasattr(img, "_getexif") and img._getexif():
236
+ exif_dict = {}
237
+ for tag_id, value in img._getexif().items():
238
+ tag = ExifTags.TAGS.get(tag_id, tag_id)
239
+ exif_dict[tag.lower()] = value
240
+
241
+ # Copy relevant EXIF data to metadata
242
+ for key, value in exif_dict.items():
243
+ if isinstance(value, bytes):
244
+ continue
245
+ metadata[key] = value
246
+
247
+ # Extract GPS data with specialized parsing
248
+ gps_info = {}
249
+ if "gpsinfo" in exif_dict:
250
+ gps_data = exif_dict["gpsinfo"]
251
+ for key, value in gps_data.items():
252
+ tag = ExifTags.GPSTAGS.get(key, key)
253
+ gps_info[tag] = value
254
+
255
+ # Parse GPS coordinates to decimal format
256
+ if "GPSLatitude" in gps_info and "GPSLongitude" in gps_info:
257
+ lat = self._convert_to_decimal(
258
+ gps_info["GPSLatitude"],
259
+ gps_info.get("GPSLatitudeRef", "N")
260
+ )
261
+ lon = self._convert_to_decimal(
262
+ gps_info["GPSLongitude"],
263
+ gps_info.get("GPSLongitudeRef", "E")
264
+ )
265
+ gps_info["Latitude"] = lat
266
+ gps_info["Longitude"] = lon
267
+
268
+ metadata["gps_info"] = gps_info
269
+
270
+ # Add file metadata
271
+ metadata["file_extension"] = os.path.splitext(image_path)[1]
272
+ metadata["extraction_timestamp"] = int(time.time())
273
+
274
+ return metadata
275
+ except Exception as e:
276
+ print(f"Error extracting metadata: {e}")
277
+ return {"error": str(e), "file_name": image_path}
278
+
279
+ def _convert_to_decimal(self, dms_coords, ref) -> float:
280
+ """
281
+ Convert GPS DMS (Degree, Minute, Second) to decimal format.
282
+
283
+ Args:
284
+ dms_coords: Tuple of degrees, minutes, seconds
285
+ ref: Direction reference (N/S/E/W)
286
+
287
+ Returns:
288
+ Decimal coordinate value
289
+ """
290
+ degrees = dms_coords[0]
291
+ minutes = dms_coords[1] / 60.0
292
+ seconds = dms_coords[2] / 3600.0
293
+
294
+ decimal = degrees + minutes + seconds
295
+
296
+ # Apply negative value for south or west coordinates
297
+ if ref in ['S', 'W']:
298
+ decimal = -decimal
299
+
300
+ return decimal
301
+
302
+ def add_to_dataset(self,
303
+ image_path: str,
304
+ classes: Optional[List[str]] = None,
305
+ push_to_hub: bool = True) -> Dict[str, Any]:
306
+ """
307
+ Process image and add entry to dataset with optional HuggingFace Hub synchronization.
308
+
309
+ Args:
310
+ image_path: Path to image file
311
+ classes: Optional list of class labels
312
+ push_to_hub: Whether to push changes to Hub
313
+
314
+ Returns:
315
+ Dictionary containing the added entry
316
+ """
317
+ # Extract filename from path
318
+ filename = os.path.basename(image_path)
319
+
320
+ # Extract comprehensive metadata with optimized parser
321
+ metadata = self.extract_image_metadata(image_path)
322
+
323
+ # Prepare new entry
324
+ new_entry = {
325
+ "filename": filename,
326
+ "classes": classes or [],
327
+ "metadata": metadata
328
+ }
329
+
330
+ # Add to local dataset with optimized append operation
331
+ self.dataset = concatenate_datasets([
332
+ self.dataset,
333
+ Dataset.from_dict({
334
+ "filename": [new_entry["filename"]],
335
+ "classes": [new_entry["classes"]],
336
+ "metadata": [new_entry["metadata"]]
337
+ })
338
+ ])
339
+
340
+ # Push updates to HuggingFace Hub
341
+ if push_to_hub:
342
+ self.push_dataset_to_hub()
343
+
344
+ return new_entry
345
+
346
+ def push_dataset_to_hub(self) -> None:
347
+ """Push dataset updates to HuggingFace Hub with atomic transaction."""
348
+ if self.token:
349
+ try:
350
+ self.dataset.push_to_hub(self.dataset_id, token=self.token)
351
+ print(f"Successfully pushed dataset with {len(self.dataset)} entries to {self.dataset_id}")
352
+ except Exception as e:
353
+ print(f"Error pushing to Hub: {e}")
354
+ else:
355
+ print("HuggingFace token not provided. Dataset not pushed to Hub.")
356
+
357
  def compute_similarity(self, embed1: torch.Tensor, embed2: torch.Tensor) -> float:
358
  """
359
  Compute cosine similarity between two embeddings.
 
417
  return m
418
 
419
 
420
+ def launch_gradio_interface(hf_token: Optional[str] = None):
421
+ """
422
+ Deploy GeoCLIP with Gradio interface with Hub data persistence.
423
+
424
+ Args:
425
+ hf_token: HuggingFace API token for dataset operations
426
+ """
427
  # Initialize model with optimal compute configuration
428
+ geo_core = GeoCLIPCore(token=hf_token)
429
 
430
  def predict_from_text(text_query, top_k):
431
+ """Process text query and generate visualization with vector operations."""
432
  if not text_query.strip():
433
  return None, "Please enter a location description."
434
 
435
+ # Execute prediction pipeline with tensor acceleration
436
  predictions = geo_core.text_to_location(text_query, top_k=int(top_k))
437
 
438
  # Generate map visualization
 
441
  title=f"Predictions for: {text_query}"
442
  )
443
 
444
+ # Create HTML representation
445
  map_html = m._repr_html_()
446
 
447
  # Format textual results
 
453
 
454
  return map_html, result_text
455
 
456
+ def process_image(image, image_path, save_to_hub, top_k):
457
+ """
458
+ Process image for prediction and metadata extraction with Hub integration.
459
+
460
+ Returns map visualization, prediction results, and metadata.
461
+ """
462
  if image is None:
463
+ return None, "Please upload an image.", "{}"
464
 
465
+ # Execute prediction pipeline with tensor acceleration
466
  predictions = geo_core.image_to_location(image, top_k=int(top_k))
467
 
468
  # Generate map visualization
 
471
  title="Predictions from Image"
472
  )
473
 
474
+ # Create HTML representation
475
  map_html = m._repr_html_()
476
 
477
  # Format textual results
 
481
  conf = pred["confidence"]
482
  result_text += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - confidence: {conf:.6f}\n"
483
 
484
+ # Extract metadata if image was uploaded and path is available
485
+ metadata = {}
486
+ if image_path:
487
+ # Add to dataset if requested
488
+ if save_to_hub:
489
+ entry = geo_core.add_to_dataset(
490
+ image_path,
491
+ classes=["location"],
492
+ push_to_hub=True
493
+ )
494
+ metadata = entry["metadata"]
495
+ else:
496
+ # Just extract metadata without saving
497
+ metadata = geo_core.extract_image_metadata(image_path)
498
+
499
+ # Format metadata as JSON
500
+ metadata_json = json.dumps(metadata, indent=2)
501
+
502
+ return map_html, result_text, metadata_json
503
 
504
  def compute_text_similarity(text1, text2):
505
  """Compute semantic similarity between two text descriptions."""
 
514
 
515
  # Create Gradio interface with tabs for different functions
516
  with gr.Blocks(title="GeoCLIP Location Intelligence") as demo:
517
+ gr.Markdown("# GeoCLIP Location Intelligence with Hub Integration")
518
+ gr.Markdown("Predict locations from text descriptions or images with dataset persistence.")
519
 
520
  with gr.Tabs():
521
  with gr.TabItem("Text → Location"):
 
558
  outputs=[text_map_output, text_result_output]
559
  )
560
 
561
+ with gr.TabItem("Image → Location with Hub Integration"):
562
  with gr.Row():
563
  with gr.Column():
564
  image_input = gr.Image(type="pil", label="Upload Image")
565
+ save_to_hub = gr.Checkbox(
566
+ label="Save to HuggingFace Dataset",
567
+ value=True
568
+ )
569
  image_top_k = gr.Slider(
570
  minimum=1,
571
  maximum=20,
 
573
  step=1,
574
  label="Number of Predictions"
575
  )
576
+ image_submit = gr.Button("Process Image")
577
 
578
  image_map_output = gr.HTML(label="Map Visualization")
579
  image_result_output = gr.Textbox(label="Prediction Results")
580
+ metadata_output = gr.JSON(label="Image Metadata")
581
 
582
  image_submit.click(
583
+ process_image,
584
+ inputs=[image_input, image_input.upload_path, save_to_hub, image_top_k],
585
+ outputs=[image_map_output, image_result_output, metadata_output]
586
  )
587
 
588
  with gr.TabItem("Semantic Similarity"):
 
604
  inputs=[text1_input, text2_input],
605
  outputs=similarity_output
606
  )
607
+
608
+ with gr.TabItem("Dataset Status"):
609
+ dataset_info = gr.Markdown(f"Current dataset: {geo_core.dataset_id}")
610
+ dataset_count = gr.Markdown(f"Number of entries: {len(geo_core.dataset)}")
611
+ update_status = gr.Button("Refresh Dataset Status")
612
+
613
+ def update_dataset_status():
614
+ return (
615
+ f"Current dataset: {geo_core.dataset_id}",
616
+ f"Number of entries: {len(geo_core.dataset)}"
617
+ )
618
+
619
+ update_status.click(
620
+ update_dataset_status,
621
+ inputs=[],
622
+ outputs=[dataset_info, dataset_count]
623
+ )
624
 
625
  # Launch Gradio interface with optimized server settings
626
+ demo.launch(share=True)
627
 
628
 
629
  if __name__ == "__main__":
630
+ # Read API token from environment variable
631
+ hf_token = os.environ.get("HF_TOKEN")
632
+
633
  # Execute vectorized deployment pipeline
634
+ launch_gradio_interface(hf_token=hf_token)