Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import folium
|
@@ -8,45 +7,78 @@ import os
|
|
8 |
import PIL.Image
|
9 |
from io import BytesIO
|
10 |
import base64
|
11 |
-
|
|
|
|
|
12 |
from pathlib import Path
|
|
|
13 |
|
14 |
# GeoCLIP dependencies
|
15 |
from geoclip import GeoCLIP
|
16 |
from transformers import CLIPTokenizer, CLIPProcessor
|
|
|
17 |
|
18 |
|
19 |
class GeoCLIPCore:
|
20 |
"""
|
21 |
-
Vectorized GeoCLIP implementation with
|
22 |
|
23 |
-
Implements tensor-optimized inference
|
24 |
-
1. Text-to-location prediction
|
25 |
-
2. Image-to-location prediction
|
26 |
-
3. Coordinate embedding generation
|
27 |
-
4. Cross-modal similarity
|
|
|
28 |
"""
|
29 |
|
30 |
-
def __init__(self,
|
|
|
|
|
|
|
31 |
"""
|
32 |
-
Initialize model with optimal compute
|
33 |
|
34 |
Args:
|
35 |
device: Target compute device (None for auto-detection)
|
|
|
|
|
36 |
"""
|
37 |
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
# Load and configure core model components
|
40 |
self._model = GeoCLIP().to(self.device)
|
41 |
self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
42 |
self._processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
43 |
|
44 |
-
# Cache frequently
|
45 |
self._location_encoder = self._model.location_encoder
|
46 |
self._image_encoder = self._model.image_encoder
|
47 |
self._gps_gallery = None # Lazy-loaded on first prediction
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def embed_text(self, text: str) -> torch.Tensor:
|
52 |
"""
|
@@ -76,15 +108,15 @@ class GeoCLIPCore:
|
|
76 |
L2-normalized embedding tensor (shape: [1, 512])
|
77 |
"""
|
78 |
with torch.no_grad():
|
79 |
-
# Process different image input types
|
80 |
if isinstance(image, str):
|
81 |
# Path to image file
|
82 |
image = PIL.Image.open(image).convert("RGB")
|
83 |
elif isinstance(image, np.ndarray):
|
84 |
-
# Convert numpy array to PIL Image
|
85 |
image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
|
86 |
|
87 |
-
# Process image using CLIP processor
|
88 |
inputs = self._processor(images=image, return_tensors="pt").to(self.device)
|
89 |
embedding = self._model.image_encoder(inputs.pixel_values)
|
90 |
return torch.nn.functional.normalize(embedding, dim=1)
|
@@ -104,7 +136,7 @@ class GeoCLIPCore:
|
|
104 |
embedding = self._location_encoder(coords_tensor)
|
105 |
return torch.nn.functional.normalize(embedding, dim=1)
|
106 |
|
107 |
-
def _ensure_gps_gallery(self):
|
108 |
"""Ensure GPS gallery is loaded and cached for efficient reuse."""
|
109 |
if self._gps_gallery is None:
|
110 |
self._gps_gallery = self._model.gps_gallery.to(self.device)
|
@@ -123,21 +155,21 @@ class GeoCLIPCore:
|
|
123 |
List of prediction dictionaries with coordinates and confidence scores
|
124 |
"""
|
125 |
with torch.no_grad():
|
126 |
-
# Ensure GPS gallery is loaded
|
127 |
self._ensure_gps_gallery()
|
128 |
|
129 |
-
# Generate location embeddings
|
130 |
location_embeddings = self._location_encoder(self._gps_gallery)
|
131 |
location_embeddings = torch.nn.functional.normalize(location_embeddings, dim=1)
|
132 |
|
133 |
-
# Calculate similarity
|
134 |
similarity = self._model.logit_scale.exp() * (query_embedding @ location_embeddings.T)
|
135 |
probs = similarity.softmax(dim=-1)
|
136 |
|
137 |
-
# Extract top predictions
|
138 |
top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
|
139 |
|
140 |
-
# Format results
|
141 |
predictions = []
|
142 |
for idx, confidence in zip(top_indices.cpu().numpy(), top_values.cpu().numpy()):
|
143 |
predictions.append({
|
@@ -161,7 +193,9 @@ class GeoCLIPCore:
|
|
161 |
embedding = self.embed_text(text)
|
162 |
return self.predict_location(embedding, top_k)
|
163 |
|
164 |
-
def image_to_location(self,
|
|
|
|
|
165 |
"""
|
166 |
Primary entry point for image-to-location prediction pipeline.
|
167 |
|
@@ -175,6 +209,151 @@ class GeoCLIPCore:
|
|
175 |
embedding = self.embed_image(image)
|
176 |
return self.predict_location(embedding, top_k)
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
def compute_similarity(self, embed1: torch.Tensor, embed2: torch.Tensor) -> float:
|
179 |
"""
|
180 |
Compute cosine similarity between two embeddings.
|
@@ -238,17 +417,22 @@ class GeoCLIPCore:
|
|
238 |
return m
|
239 |
|
240 |
|
241 |
-
def launch_gradio_interface():
|
242 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
243 |
# Initialize model with optimal compute configuration
|
244 |
-
geo_core = GeoCLIPCore()
|
245 |
|
246 |
def predict_from_text(text_query, top_k):
|
247 |
-
"""Process text query and generate visualization."""
|
248 |
if not text_query.strip():
|
249 |
return None, "Please enter a location description."
|
250 |
|
251 |
-
# Execute prediction pipeline
|
252 |
predictions = geo_core.text_to_location(text_query, top_k=int(top_k))
|
253 |
|
254 |
# Generate map visualization
|
@@ -257,7 +441,7 @@ def launch_gradio_interface():
|
|
257 |
title=f"Predictions for: {text_query}"
|
258 |
)
|
259 |
|
260 |
-
# Create
|
261 |
map_html = m._repr_html_()
|
262 |
|
263 |
# Format textual results
|
@@ -269,12 +453,16 @@ def launch_gradio_interface():
|
|
269 |
|
270 |
return map_html, result_text
|
271 |
|
272 |
-
def
|
273 |
-
"""
|
|
|
|
|
|
|
|
|
274 |
if image is None:
|
275 |
-
return None, "Please upload an image."
|
276 |
|
277 |
-
# Execute prediction pipeline
|
278 |
predictions = geo_core.image_to_location(image, top_k=int(top_k))
|
279 |
|
280 |
# Generate map visualization
|
@@ -283,7 +471,7 @@ def launch_gradio_interface():
|
|
283 |
title="Predictions from Image"
|
284 |
)
|
285 |
|
286 |
-
# Create
|
287 |
map_html = m._repr_html_()
|
288 |
|
289 |
# Format textual results
|
@@ -293,7 +481,25 @@ def launch_gradio_interface():
|
|
293 |
conf = pred["confidence"]
|
294 |
result_text += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - confidence: {conf:.6f}\n"
|
295 |
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
def compute_text_similarity(text1, text2):
|
299 |
"""Compute semantic similarity between two text descriptions."""
|
@@ -308,8 +514,8 @@ def launch_gradio_interface():
|
|
308 |
|
309 |
# Create Gradio interface with tabs for different functions
|
310 |
with gr.Blocks(title="GeoCLIP Location Intelligence") as demo:
|
311 |
-
gr.Markdown("# GeoCLIP Location Intelligence")
|
312 |
-
gr.Markdown("Predict locations from text descriptions or images.")
|
313 |
|
314 |
with gr.Tabs():
|
315 |
with gr.TabItem("Text → Location"):
|
@@ -352,10 +558,14 @@ def launch_gradio_interface():
|
|
352 |
outputs=[text_map_output, text_result_output]
|
353 |
)
|
354 |
|
355 |
-
with gr.TabItem("Image → Location"):
|
356 |
with gr.Row():
|
357 |
with gr.Column():
|
358 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
|
|
|
|
|
|
|
359 |
image_top_k = gr.Slider(
|
360 |
minimum=1,
|
361 |
maximum=20,
|
@@ -363,15 +573,16 @@ def launch_gradio_interface():
|
|
363 |
step=1,
|
364 |
label="Number of Predictions"
|
365 |
)
|
366 |
-
image_submit = gr.Button("
|
367 |
|
368 |
image_map_output = gr.HTML(label="Map Visualization")
|
369 |
image_result_output = gr.Textbox(label="Prediction Results")
|
|
|
370 |
|
371 |
image_submit.click(
|
372 |
-
|
373 |
-
inputs=[image_input, image_top_k],
|
374 |
-
outputs=[image_map_output, image_result_output]
|
375 |
)
|
376 |
|
377 |
with gr.TabItem("Semantic Similarity"):
|
@@ -393,11 +604,31 @@ def launch_gradio_interface():
|
|
393 |
inputs=[text1_input, text2_input],
|
394 |
outputs=similarity_output
|
395 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
|
397 |
# Launch Gradio interface with optimized server settings
|
398 |
-
demo.launch(share=True
|
399 |
|
400 |
|
401 |
if __name__ == "__main__":
|
|
|
|
|
|
|
402 |
# Execute vectorized deployment pipeline
|
403 |
-
launch_gradio_interface()
|
|
|
|
|
1 |
import torch
|
2 |
import numpy as np
|
3 |
import folium
|
|
|
7 |
import PIL.Image
|
8 |
from io import BytesIO
|
9 |
import base64
|
10 |
+
import json
|
11 |
+
import time
|
12 |
+
from typing import Tuple, List, Dict, Any, Optional, Union, Callable
|
13 |
from pathlib import Path
|
14 |
+
from datasets import Dataset, load_dataset, concatenate_datasets
|
15 |
|
16 |
# GeoCLIP dependencies
|
17 |
from geoclip import GeoCLIP
|
18 |
from transformers import CLIPTokenizer, CLIPProcessor
|
19 |
+
from huggingface_hub import HfApi
|
20 |
|
21 |
|
22 |
class GeoCLIPCore:
|
23 |
"""
|
24 |
+
Vectorized GeoCLIP implementation with HuggingFace Hub integration.
|
25 |
|
26 |
+
Implements tensor-optimized inference with persistent dataset storage:
|
27 |
+
1. Text-to-location prediction with confidence scoring
|
28 |
+
2. Image-to-location prediction with metadata extraction
|
29 |
+
3. Coordinate embedding generation for vector analysis
|
30 |
+
4. Cross-modal similarity computation
|
31 |
+
5. Dataset persistence to HuggingFace Hub
|
32 |
"""
|
33 |
|
34 |
+
def __init__(self,
|
35 |
+
device: Optional[str] = None,
|
36 |
+
dataset_id: str = "latterworks/geo-metadata",
|
37 |
+
token: Optional[str] = None) -> None:
|
38 |
"""
|
39 |
+
Initialize model with optimal compute allocation and dataset connection.
|
40 |
|
41 |
Args:
|
42 |
device: Target compute device (None for auto-detection)
|
43 |
+
dataset_id: HuggingFace dataset identifier
|
44 |
+
token: HuggingFace API token
|
45 |
"""
|
46 |
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
47 |
+
self.dataset_id = dataset_id
|
48 |
+
self.token = token
|
49 |
+
|
50 |
+
# Initialize HuggingFace API for dataset operations
|
51 |
+
self.api = HfApi(token=token)
|
52 |
|
53 |
+
# Load and configure core model components with vectorized execution path
|
54 |
self._model = GeoCLIP().to(self.device)
|
55 |
self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
56 |
self._processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
57 |
|
58 |
+
# Cache frequently accessed components for reduced latency
|
59 |
self._location_encoder = self._model.location_encoder
|
60 |
self._image_encoder = self._model.image_encoder
|
61 |
self._gps_gallery = None # Lazy-loaded on first prediction
|
62 |
|
63 |
+
# Initialize local dataset cache
|
64 |
+
self._initialize_dataset()
|
65 |
+
|
66 |
+
print(f"GeoCLIP initialized on {self.device} with Hub dataset: {dataset_id}")
|
67 |
+
|
68 |
+
def _initialize_dataset(self) -> None:
|
69 |
+
"""Initialize connection to HuggingFace dataset with atomic transaction handling."""
|
70 |
+
try:
|
71 |
+
# Attempt to load existing dataset
|
72 |
+
self.dataset = load_dataset(self.dataset_id, split="train", token=self.token)
|
73 |
+
print(f"Loaded existing dataset with {len(self.dataset)} entries")
|
74 |
+
except Exception as e:
|
75 |
+
print(f"Creating new dataset: {e}")
|
76 |
+
# Create empty dataset with required schema
|
77 |
+
self.dataset = Dataset.from_dict({
|
78 |
+
"filename": [],
|
79 |
+
"classes": [],
|
80 |
+
"metadata": []
|
81 |
+
})
|
82 |
|
83 |
def embed_text(self, text: str) -> torch.Tensor:
|
84 |
"""
|
|
|
108 |
L2-normalized embedding tensor (shape: [1, 512])
|
109 |
"""
|
110 |
with torch.no_grad():
|
111 |
+
# Process different image input types with type-specific optimizations
|
112 |
if isinstance(image, str):
|
113 |
# Path to image file
|
114 |
image = PIL.Image.open(image).convert("RGB")
|
115 |
elif isinstance(image, np.ndarray):
|
116 |
+
# Convert numpy array to PIL Image with optimal memory layout
|
117 |
image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
|
118 |
|
119 |
+
# Process image using CLIP processor with tensor allocation
|
120 |
inputs = self._processor(images=image, return_tensors="pt").to(self.device)
|
121 |
embedding = self._model.image_encoder(inputs.pixel_values)
|
122 |
return torch.nn.functional.normalize(embedding, dim=1)
|
|
|
136 |
embedding = self._location_encoder(coords_tensor)
|
137 |
return torch.nn.functional.normalize(embedding, dim=1)
|
138 |
|
139 |
+
def _ensure_gps_gallery(self) -> None:
|
140 |
"""Ensure GPS gallery is loaded and cached for efficient reuse."""
|
141 |
if self._gps_gallery is None:
|
142 |
self._gps_gallery = self._model.gps_gallery.to(self.device)
|
|
|
155 |
List of prediction dictionaries with coordinates and confidence scores
|
156 |
"""
|
157 |
with torch.no_grad():
|
158 |
+
# Ensure GPS gallery is loaded with resource pooling
|
159 |
self._ensure_gps_gallery()
|
160 |
|
161 |
+
# Generate location embeddings with memory-efficient tensor operations
|
162 |
location_embeddings = self._location_encoder(self._gps_gallery)
|
163 |
location_embeddings = torch.nn.functional.normalize(location_embeddings, dim=1)
|
164 |
|
165 |
+
# Calculate similarity with vectorized matrix multiplication
|
166 |
similarity = self._model.logit_scale.exp() * (query_embedding @ location_embeddings.T)
|
167 |
probs = similarity.softmax(dim=-1)
|
168 |
|
169 |
+
# Extract top predictions with single tensor operation
|
170 |
top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
|
171 |
|
172 |
+
# Format results with CPU offloading
|
173 |
predictions = []
|
174 |
for idx, confidence in zip(top_indices.cpu().numpy(), top_values.cpu().numpy()):
|
175 |
predictions.append({
|
|
|
193 |
embedding = self.embed_text(text)
|
194 |
return self.predict_location(embedding, top_k)
|
195 |
|
196 |
+
def image_to_location(self,
|
197 |
+
image: Union[str, PIL.Image.Image, np.ndarray],
|
198 |
+
top_k: int = 5) -> List[Dict[str, Any]]:
|
199 |
"""
|
200 |
Primary entry point for image-to-location prediction pipeline.
|
201 |
|
|
|
209 |
embedding = self.embed_image(image)
|
210 |
return self.predict_location(embedding, top_k)
|
211 |
|
212 |
+
def extract_image_metadata(self, image_path: str) -> Dict[str, Any]:
|
213 |
+
"""
|
214 |
+
Extract comprehensive metadata from image file with GPS coordinates.
|
215 |
+
|
216 |
+
Args:
|
217 |
+
image_path: Path to image file
|
218 |
+
|
219 |
+
Returns:
|
220 |
+
Dictionary containing extracted metadata
|
221 |
+
"""
|
222 |
+
try:
|
223 |
+
from PIL import Image, ExifTags
|
224 |
+
import piexif
|
225 |
+
|
226 |
+
# Open image and extract EXIF data with efficient memory mapping
|
227 |
+
img = Image.open(image_path)
|
228 |
+
metadata = {"file_name": image_path, "file_size": os.path.getsize(image_path)}
|
229 |
+
|
230 |
+
# Extract basic image properties
|
231 |
+
metadata["format"] = img.format
|
232 |
+
metadata["mode"] = img.mode
|
233 |
+
metadata["size"] = list(img.size)
|
234 |
+
|
235 |
+
if hasattr(img, "_getexif") and img._getexif():
|
236 |
+
exif_dict = {}
|
237 |
+
for tag_id, value in img._getexif().items():
|
238 |
+
tag = ExifTags.TAGS.get(tag_id, tag_id)
|
239 |
+
exif_dict[tag.lower()] = value
|
240 |
+
|
241 |
+
# Copy relevant EXIF data to metadata
|
242 |
+
for key, value in exif_dict.items():
|
243 |
+
if isinstance(value, bytes):
|
244 |
+
continue
|
245 |
+
metadata[key] = value
|
246 |
+
|
247 |
+
# Extract GPS data with specialized parsing
|
248 |
+
gps_info = {}
|
249 |
+
if "gpsinfo" in exif_dict:
|
250 |
+
gps_data = exif_dict["gpsinfo"]
|
251 |
+
for key, value in gps_data.items():
|
252 |
+
tag = ExifTags.GPSTAGS.get(key, key)
|
253 |
+
gps_info[tag] = value
|
254 |
+
|
255 |
+
# Parse GPS coordinates to decimal format
|
256 |
+
if "GPSLatitude" in gps_info and "GPSLongitude" in gps_info:
|
257 |
+
lat = self._convert_to_decimal(
|
258 |
+
gps_info["GPSLatitude"],
|
259 |
+
gps_info.get("GPSLatitudeRef", "N")
|
260 |
+
)
|
261 |
+
lon = self._convert_to_decimal(
|
262 |
+
gps_info["GPSLongitude"],
|
263 |
+
gps_info.get("GPSLongitudeRef", "E")
|
264 |
+
)
|
265 |
+
gps_info["Latitude"] = lat
|
266 |
+
gps_info["Longitude"] = lon
|
267 |
+
|
268 |
+
metadata["gps_info"] = gps_info
|
269 |
+
|
270 |
+
# Add file metadata
|
271 |
+
metadata["file_extension"] = os.path.splitext(image_path)[1]
|
272 |
+
metadata["extraction_timestamp"] = int(time.time())
|
273 |
+
|
274 |
+
return metadata
|
275 |
+
except Exception as e:
|
276 |
+
print(f"Error extracting metadata: {e}")
|
277 |
+
return {"error": str(e), "file_name": image_path}
|
278 |
+
|
279 |
+
def _convert_to_decimal(self, dms_coords, ref) -> float:
|
280 |
+
"""
|
281 |
+
Convert GPS DMS (Degree, Minute, Second) to decimal format.
|
282 |
+
|
283 |
+
Args:
|
284 |
+
dms_coords: Tuple of degrees, minutes, seconds
|
285 |
+
ref: Direction reference (N/S/E/W)
|
286 |
+
|
287 |
+
Returns:
|
288 |
+
Decimal coordinate value
|
289 |
+
"""
|
290 |
+
degrees = dms_coords[0]
|
291 |
+
minutes = dms_coords[1] / 60.0
|
292 |
+
seconds = dms_coords[2] / 3600.0
|
293 |
+
|
294 |
+
decimal = degrees + minutes + seconds
|
295 |
+
|
296 |
+
# Apply negative value for south or west coordinates
|
297 |
+
if ref in ['S', 'W']:
|
298 |
+
decimal = -decimal
|
299 |
+
|
300 |
+
return decimal
|
301 |
+
|
302 |
+
def add_to_dataset(self,
|
303 |
+
image_path: str,
|
304 |
+
classes: Optional[List[str]] = None,
|
305 |
+
push_to_hub: bool = True) -> Dict[str, Any]:
|
306 |
+
"""
|
307 |
+
Process image and add entry to dataset with optional HuggingFace Hub synchronization.
|
308 |
+
|
309 |
+
Args:
|
310 |
+
image_path: Path to image file
|
311 |
+
classes: Optional list of class labels
|
312 |
+
push_to_hub: Whether to push changes to Hub
|
313 |
+
|
314 |
+
Returns:
|
315 |
+
Dictionary containing the added entry
|
316 |
+
"""
|
317 |
+
# Extract filename from path
|
318 |
+
filename = os.path.basename(image_path)
|
319 |
+
|
320 |
+
# Extract comprehensive metadata with optimized parser
|
321 |
+
metadata = self.extract_image_metadata(image_path)
|
322 |
+
|
323 |
+
# Prepare new entry
|
324 |
+
new_entry = {
|
325 |
+
"filename": filename,
|
326 |
+
"classes": classes or [],
|
327 |
+
"metadata": metadata
|
328 |
+
}
|
329 |
+
|
330 |
+
# Add to local dataset with optimized append operation
|
331 |
+
self.dataset = concatenate_datasets([
|
332 |
+
self.dataset,
|
333 |
+
Dataset.from_dict({
|
334 |
+
"filename": [new_entry["filename"]],
|
335 |
+
"classes": [new_entry["classes"]],
|
336 |
+
"metadata": [new_entry["metadata"]]
|
337 |
+
})
|
338 |
+
])
|
339 |
+
|
340 |
+
# Push updates to HuggingFace Hub
|
341 |
+
if push_to_hub:
|
342 |
+
self.push_dataset_to_hub()
|
343 |
+
|
344 |
+
return new_entry
|
345 |
+
|
346 |
+
def push_dataset_to_hub(self) -> None:
|
347 |
+
"""Push dataset updates to HuggingFace Hub with atomic transaction."""
|
348 |
+
if self.token:
|
349 |
+
try:
|
350 |
+
self.dataset.push_to_hub(self.dataset_id, token=self.token)
|
351 |
+
print(f"Successfully pushed dataset with {len(self.dataset)} entries to {self.dataset_id}")
|
352 |
+
except Exception as e:
|
353 |
+
print(f"Error pushing to Hub: {e}")
|
354 |
+
else:
|
355 |
+
print("HuggingFace token not provided. Dataset not pushed to Hub.")
|
356 |
+
|
357 |
def compute_similarity(self, embed1: torch.Tensor, embed2: torch.Tensor) -> float:
|
358 |
"""
|
359 |
Compute cosine similarity between two embeddings.
|
|
|
417 |
return m
|
418 |
|
419 |
|
420 |
+
def launch_gradio_interface(hf_token: Optional[str] = None):
|
421 |
+
"""
|
422 |
+
Deploy GeoCLIP with Gradio interface with Hub data persistence.
|
423 |
+
|
424 |
+
Args:
|
425 |
+
hf_token: HuggingFace API token for dataset operations
|
426 |
+
"""
|
427 |
# Initialize model with optimal compute configuration
|
428 |
+
geo_core = GeoCLIPCore(token=hf_token)
|
429 |
|
430 |
def predict_from_text(text_query, top_k):
|
431 |
+
"""Process text query and generate visualization with vector operations."""
|
432 |
if not text_query.strip():
|
433 |
return None, "Please enter a location description."
|
434 |
|
435 |
+
# Execute prediction pipeline with tensor acceleration
|
436 |
predictions = geo_core.text_to_location(text_query, top_k=int(top_k))
|
437 |
|
438 |
# Generate map visualization
|
|
|
441 |
title=f"Predictions for: {text_query}"
|
442 |
)
|
443 |
|
444 |
+
# Create HTML representation
|
445 |
map_html = m._repr_html_()
|
446 |
|
447 |
# Format textual results
|
|
|
453 |
|
454 |
return map_html, result_text
|
455 |
|
456 |
+
def process_image(image, image_path, save_to_hub, top_k):
|
457 |
+
"""
|
458 |
+
Process image for prediction and metadata extraction with Hub integration.
|
459 |
+
|
460 |
+
Returns map visualization, prediction results, and metadata.
|
461 |
+
"""
|
462 |
if image is None:
|
463 |
+
return None, "Please upload an image.", "{}"
|
464 |
|
465 |
+
# Execute prediction pipeline with tensor acceleration
|
466 |
predictions = geo_core.image_to_location(image, top_k=int(top_k))
|
467 |
|
468 |
# Generate map visualization
|
|
|
471 |
title="Predictions from Image"
|
472 |
)
|
473 |
|
474 |
+
# Create HTML representation
|
475 |
map_html = m._repr_html_()
|
476 |
|
477 |
# Format textual results
|
|
|
481 |
conf = pred["confidence"]
|
482 |
result_text += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - confidence: {conf:.6f}\n"
|
483 |
|
484 |
+
# Extract metadata if image was uploaded and path is available
|
485 |
+
metadata = {}
|
486 |
+
if image_path:
|
487 |
+
# Add to dataset if requested
|
488 |
+
if save_to_hub:
|
489 |
+
entry = geo_core.add_to_dataset(
|
490 |
+
image_path,
|
491 |
+
classes=["location"],
|
492 |
+
push_to_hub=True
|
493 |
+
)
|
494 |
+
metadata = entry["metadata"]
|
495 |
+
else:
|
496 |
+
# Just extract metadata without saving
|
497 |
+
metadata = geo_core.extract_image_metadata(image_path)
|
498 |
+
|
499 |
+
# Format metadata as JSON
|
500 |
+
metadata_json = json.dumps(metadata, indent=2)
|
501 |
+
|
502 |
+
return map_html, result_text, metadata_json
|
503 |
|
504 |
def compute_text_similarity(text1, text2):
|
505 |
"""Compute semantic similarity between two text descriptions."""
|
|
|
514 |
|
515 |
# Create Gradio interface with tabs for different functions
|
516 |
with gr.Blocks(title="GeoCLIP Location Intelligence") as demo:
|
517 |
+
gr.Markdown("# GeoCLIP Location Intelligence with Hub Integration")
|
518 |
+
gr.Markdown("Predict locations from text descriptions or images with dataset persistence.")
|
519 |
|
520 |
with gr.Tabs():
|
521 |
with gr.TabItem("Text → Location"):
|
|
|
558 |
outputs=[text_map_output, text_result_output]
|
559 |
)
|
560 |
|
561 |
+
with gr.TabItem("Image → Location with Hub Integration"):
|
562 |
with gr.Row():
|
563 |
with gr.Column():
|
564 |
image_input = gr.Image(type="pil", label="Upload Image")
|
565 |
+
save_to_hub = gr.Checkbox(
|
566 |
+
label="Save to HuggingFace Dataset",
|
567 |
+
value=True
|
568 |
+
)
|
569 |
image_top_k = gr.Slider(
|
570 |
minimum=1,
|
571 |
maximum=20,
|
|
|
573 |
step=1,
|
574 |
label="Number of Predictions"
|
575 |
)
|
576 |
+
image_submit = gr.Button("Process Image")
|
577 |
|
578 |
image_map_output = gr.HTML(label="Map Visualization")
|
579 |
image_result_output = gr.Textbox(label="Prediction Results")
|
580 |
+
metadata_output = gr.JSON(label="Image Metadata")
|
581 |
|
582 |
image_submit.click(
|
583 |
+
process_image,
|
584 |
+
inputs=[image_input, image_input.upload_path, save_to_hub, image_top_k],
|
585 |
+
outputs=[image_map_output, image_result_output, metadata_output]
|
586 |
)
|
587 |
|
588 |
with gr.TabItem("Semantic Similarity"):
|
|
|
604 |
inputs=[text1_input, text2_input],
|
605 |
outputs=similarity_output
|
606 |
)
|
607 |
+
|
608 |
+
with gr.TabItem("Dataset Status"):
|
609 |
+
dataset_info = gr.Markdown(f"Current dataset: {geo_core.dataset_id}")
|
610 |
+
dataset_count = gr.Markdown(f"Number of entries: {len(geo_core.dataset)}")
|
611 |
+
update_status = gr.Button("Refresh Dataset Status")
|
612 |
+
|
613 |
+
def update_dataset_status():
|
614 |
+
return (
|
615 |
+
f"Current dataset: {geo_core.dataset_id}",
|
616 |
+
f"Number of entries: {len(geo_core.dataset)}"
|
617 |
+
)
|
618 |
+
|
619 |
+
update_status.click(
|
620 |
+
update_dataset_status,
|
621 |
+
inputs=[],
|
622 |
+
outputs=[dataset_info, dataset_count]
|
623 |
+
)
|
624 |
|
625 |
# Launch Gradio interface with optimized server settings
|
626 |
+
demo.launch(share=True)
|
627 |
|
628 |
|
629 |
if __name__ == "__main__":
|
630 |
+
# Read API token from environment variable
|
631 |
+
hf_token = os.environ.get("HF_TOKEN")
|
632 |
+
|
633 |
# Execute vectorized deployment pipeline
|
634 |
+
launch_gradio_interface(hf_token=hf_token)
|