Spaces:

latterworks
/

geo-metadata-extractor-gradio

Runtime error

App Files Files Community

latterworks commited on Mar 18

Commit

f5dce4b

verified ·

1 Parent(s): 4658311

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -117

app.py CHANGED Viewed

@@ -1,74 +1,77 @@
 import torch
-import numpy as np
 import folium
 from folium.plugins import HeatMap
 import gradio as gr
-import os
-import PIL.Image
-import json
-import time
-from typing import Dict, Any, Optional, Union
-from pathlib import Path
-from datasets import Dataset, load_dataset, concatenate_datasets
-from huggingface_hub import HfApi
 # GeoCLIP dependencies
 from geoclip import GeoCLIP
-from transformers import CLIPTokenizer, CLIPProcessor
-# Initialize GeoCLIP core with vectorized execution path
-class GeoCLIPCore:
-    def __init__(self, device=None, dataset_id="latterworks/geo-metadata", token=None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        self.dataset_id = dataset_id
-        self.token = token
         self._model = GeoCLIP().to(self.device)
         self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-        self._processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
-        self._location_encoder = self._model.location_encoder
-        self._gps_gallery = None  # Lazy-loaded for memory optimization
-        # Initialize dataset connection with error handling
-        try:
-            self.dataset = load_dataset(self.dataset_id, split="train", token=self.token)
-        except Exception:
-            self.dataset = Dataset.from_dict({"filename": [], "classes": [], "metadata": []})
-    # Core tensor operations for embedding generation
-    def text_to_location(self, text, top_k=5):
         with torch.no_grad():
             tokens = self._tokenizer(text, return_tensors="pt", padding=True).to(self.device)
-            embedding = self._model.image_encoder.mlp(
                 self._model.image_encoder.CLIP.get_text_features(**tokens)
             )
-            embedding = torch.nn.functional.normalize(embedding, dim=1)
-            # Ensure gallery is loaded with memory pooling
             if self._gps_gallery is None:
                 self._gps_gallery = self._model.gps_gallery.to(self.device)
-            # Execute vectorized similarity computation
-            location_embeddings = self._location_encoder(self._gps_gallery)
-            location_embeddings = torch.nn.functional.normalize(location_embeddings, dim=1)
-            similarity = self._model.logit_scale.exp() * (embedding @ location_embeddings.T)
             probs = similarity.softmax(dim=-1)
-            # Extract predictions with single tensor operation
             top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
             return [
                 {"coordinates": tuple(self._gps_gallery[idx].cpu().numpy()),
-                 "confidence": float(conf)}
                 for idx, conf in zip(top_indices.cpu().numpy(), top_values.cpu().numpy())
             ]
-    # Generate map visualization with optimized rendering
-    def create_map_visualization(self, predictions, title=""):
-        m = folium.Map(location=predictions[0]["coordinates"], zoom_start=5)
-        if title:
-            m.get_root().html.add_child(folium.Element(f'<h3 style="text-align:center">{title}</h3>'))
-        # Add markers with confidence metadata
         for i, pred in enumerate(predictions):
             color = 'red' if i == 0 else 'blue' if i < 3 else 'green'
             folium.Marker(
@@ -76,8 +79,7 @@ class GeoCLIPCore:
                 popup=f"Prediction #{i+1}<br>Confidence: {pred['confidence']:.6f}",
                 icon=folium.Icon(color=color)
             ).add_to(m)
-        # Add heatmap for density visualization
         if len(predictions) >= 3:
             heat_data = [[p["coordinates"][0], p["coordinates"][1], p["confidence"]]
                         for p in predictions]
@@ -85,82 +87,59 @@ class GeoCLIPCore:
         return m
-# Initialize GeoCLIP and codebase exemplars
-def initialize_gradio_interface(hf_token=None):
-    python_code = """def fib(n):
-    if n <= 0:
-        return 0
-    elif n == 1:
-        return 1
-    else:
-        return fib(n-1) + fib(n-2)
-"""
-    js_code = """function fib(n) {
-    if (n <= 0) return 0;
-    if (n === 1) return 1;
-    return fib(n - 1) + fib(n - 2);
-}
-"""
-    # Initialize GeoCLIP with optimized resource allocation
-    geo_core = GeoCLIPCore(token=hf_token)
-    # Message handler with multimodal dispatch logic
-    def chat(message, history):
-        if "python" in message.lower():
-            return "Type Python or JavaScript to see the code.", gr.Code(language="python", value=python_code)
-        elif "javascript" in message.lower():
-            return "Type Python or JavaScript to see the code.", gr.Code(language="javascript", value=js_code)
-        elif any(kw in message.lower() for kw in ["location", "where", "place", "predict"]):
-            # Extract location query with pattern matching
-            for term in ["location", "where", "place", "find", "predict"]:
-                if term in message.lower():
-                    query = message.lower().split(term, 1)[1].strip()
-                    if not query:
-                        return "Please provide a location description.", None
-                    # Execute prediction with tensor acceleration
-                    predictions = geo_core.text_to_location(query, top_k=5)
-                    m = geo_core.create_map_visualization(predictions, f"Predictions for: {query}")
-                    # Format response with structured data
-                    result = f"Top predictions for: '{query}'\n\n"
-                    for i, pred in enumerate(predictions, 1):
-                        coords = pred["coordinates"]
-                        result += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - conf: {pred['confidence']:.6f}\n"
-                    return result, gr.HTML(value=m._repr_html_())
-            return "Couldn't process your location query. Please try again.", None
-        else:
-            return "I can show code examples or predict locations. Try 'Where is the Eiffel Tower?'", None
-    # Build gradio blocks with structured layout
-    with gr.Blocks() as demo:
-        code = gr.Code(render=False)
-        map_output = gr.HTML(render=False)
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("<center><h1>GeoCLIP + Code Examples</h1></center>")
-                chatbot = gr.ChatInterface(
-                    chat,
-                    examples=["Python", "JavaScript", "Where is the Eiffel Tower?"],
-                    additional_outputs=[code, map_output]
-                )
-            with gr.Column(scale=1):
-                gr.Markdown("<center><h1>Output Artifacts</h1></center>")
-                with gr.Tab("Code"):
-                    code.render()
-                with gr.Tab("Location Map"):
-                    map_output.render()
-        gr.Markdown(f"<center>Connected to dataset: {geo_core.dataset_id}</center>")
-    return demo
-# Entry point with environmental token acquisition
 if __name__ == "__main__":
-    hf_token = os.environ.get("HF_TOKEN")
-    demo = initialize_gradio_interface(hf_token)
-    demo.launch()

+import os
 import torch
 import folium
 from folium.plugins import HeatMap
 import gradio as gr
+from typing import Dict, List, Any
+from functools import lru_cache
 # GeoCLIP dependencies
 from geoclip import GeoCLIP
+from transformers import CLIPTokenizer
+# Singleton pattern for GeoCLIP engine
+class GeoCLIPEngine:
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super(GeoCLIPEngine, cls).__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self, device=None):
+        if self._initialized:
+            return
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Initializing GeoCLIP on {self.device}")
         self._model = GeoCLIP().to(self.device)
         self._tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
+        self._gps_gallery = None  # Lazy-loaded on first prediction
+        self._initialized = True
+    @lru_cache(maxsize=32)
+    def predict_location(self, text: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        """Vectorized text-to-location prediction with tensor optimization."""
         with torch.no_grad():
+            # Generate text embedding with optimal tensor allocation
             tokens = self._tokenizer(text, return_tensors="pt", padding=True).to(self.device)
+            text_features = self._model.image_encoder.mlp(
                 self._model.image_encoder.CLIP.get_text_features(**tokens)
             )
+            text_features = torch.nn.functional.normalize(text_features, dim=1)
+            # Ensure GPS gallery is loaded with resource pooling
             if self._gps_gallery is None:
                 self._gps_gallery = self._model.gps_gallery.to(self.device)
+            # Generate location embeddings with memory-efficient tensor operations
+            location_features = self._model.location_encoder(self._gps_gallery)
+            location_features = torch.nn.functional.normalize(location_features, dim=1)
+            # Calculate similarity with vectorized matrix multiplication
+            similarity = self._model.logit_scale.exp() * (text_features @ location_features.T)
             probs = similarity.softmax(dim=-1)
+            # Extract top predictions
             top_values, top_indices = torch.topk(probs[0], min(top_k, len(self._gps_gallery)))
             return [
                 {"coordinates": tuple(self._gps_gallery[idx].cpu().numpy()),
+                 "confidence": float(conf)}
                 for idx, conf in zip(top_indices.cpu().numpy(), top_values.cpu().numpy())
             ]
+    def create_map_visualization(self, predictions: List[Dict[str, Any]], title: str = "") -> folium.Map:
+        """Generate geospatial visualization."""
+        # Initialize map centered on highest confidence prediction
+        center_coords = predictions[0]["coordinates"]
+        m = folium.Map(location=center_coords, zoom_start=5, tiles="OpenStreetMap")
+        # Add markers and heatmap
         for i, pred in enumerate(predictions):
             color = 'red' if i == 0 else 'blue' if i < 3 else 'green'
             folium.Marker(
                 popup=f"Prediction #{i+1}<br>Confidence: {pred['confidence']:.6f}",
                 icon=folium.Icon(color=color)
             ).add_to(m)
         if len(predictions) >= 3:
             heat_data = [[p["coordinates"][0], p["coordinates"][1], p["confidence"]]
                         for p in predictions]
         return m
+# Initialize global singleton
+engine = GeoCLIPEngine()
+# Fixed chat function with proper output handling
+def loc_chat(message, history):
+    """Chat function that avoids returning Code objects."""
+    # Process location queries
+    if any(term in message.lower() for term in ["location", "where", "place", "find"]):
+        try:
+            # Execute prediction with tensor acceleration
+            predictions = engine.predict_location(message, top_k=5)
+            # Generate map visualization
+            m = engine.create_map_visualization(predictions, f"Predictions for: {message}")
+            # Format response with structured information
+            result_text = f"Top predictions for: '{message}'\n\n"
+            for i, pred in enumerate(predictions, 1):
+                coords = pred["coordinates"]
+                conf = pred["confidence"]
+                result_text += f"{i}. ({coords[0]:.6f}, {coords[1]:.6f}) - confidence: {conf:.6f}\n"
+            # Return only string and HTML types to avoid validation errors
+            return result_text, gr.HTML(value=m._repr_html_())
+        except Exception as e:
+            return f"Error: {str(e)}", None
+    return "Ask about a location like 'Where is the Eiffel Tower?'", None
+# Interface with minimal dependencies
+with gr.Blocks() as demo:
+    map_output = gr.HTML(render=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("<h1>GeoCLIP Location Intelligence</h1>")
+            chatbot = gr.ChatInterface(
+                loc_chat,
+                examples=["Where is the Eiffel Tower?", "Find ancient pyramids in desert"],
+                additional_outputs=[map_output],
+                type="messages"  # Critical: use messages type to avoid deprecation
+            )
+        with gr.Column():
+            gr.Markdown("<h1>Map Visualization</h1>")
+            map_output.render()
+# Main entrypoint with error mitigation configuration
 if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        cache_examples=False,  # Critical: Disable example caching
+        show_error=True
+    )