Spaces:

latterworks
/

geo-metadata-extractor-gradio

Runtime error

App Files Files Community

latterworks commited on Mar 19

Commit

997cbe9

verified ·

1 Parent(s): 4d7f662

Update app.py

Browse files

Files changed (1) hide show

app.py +586 -225

app.py CHANGED Viewed

@@ -1,243 +1,604 @@
 from pathlib import Path
-from PIL import Image, ExifTags
 import json
 import sys
 import os
-import gradio as gr
 import logging
-from datasets import Dataset
-from typing import Dict, List, Any, Optional
 import traceback
-# Logging setup
 logging.basicConfig(
     level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[logging.StreamHandler(sys.stdout)]
 )
-logger = logging.getLogger(__name__)
-# Config with defaults (editable via UI or env vars)
-DEFAULT_IMAGE_DIR = Path(os.environ.get("IMAGE_DIR", "./images"))
-DEFAULT_OUTPUT_FILE = Path(os.environ.get("OUTPUT_METADATA_FILE", "./metadata.jsonl"))
-HF_USERNAME = os.environ.get("HF_USERNAME", "latterworks")
-DATASET_NAME = os.environ.get("DATASET_NAME", "geo-metadata")
-SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
-# Convert GPS coordinates to decimal degrees
-def convert_to_degrees(value: tuple) -> Optional[float]:
-    try:
-        if not isinstance(value, (tuple, list)) or len(value) != 3:
-            raise ValueError("GPS value must be a tuple of 3 elements")
-        d, m, s = value
-        degrees = float(d) + (float(m) / 60.0) + (float(s) / 3600.0)
-        if not -180 <= degrees <= 180:
-            raise ValueError("GPS degrees out of valid range")
-        return degrees
-    except (TypeError, ValueError) as e:
-        logger.error(f"Failed to convert GPS coordinates: {e}")
-        return None
-# Extract and format GPS metadata
-def extract_gps_info(gps_info: Dict[int, Any]) -> Optional[Dict[str, Any]]:
-    if not isinstance(gps_info, dict):
-        logger.warning("GPSInfo ain’t a dict, skipping")
-        return None
-    gps_data = {}
-    try:
-        for key, val in gps_info.items():
-            tag_name = ExifTags.GPSTAGS.get(key, f"unknown_gps_tag_{key}")
-            gps_data[tag_name] = val
-        if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
-            lat = convert_to_degrees(gps_data['GPSLatitude'])
-            lon = convert_to_degrees(gps_data['GPSLongitude'])
-            if lat is None or lon is None:
-                logger.error("Failed to convert lat/lon, skipping GPS")
-                return None
-            lat_ref = gps_data.get('GPSLatitudeRef', 'N')
-            lon_ref = gps_data.get('GPSLongitudeRef', 'E')
-            if lat_ref not in {'N', 'S'} or lon_ref not in {'E', 'W'}:
-                logger.warning(f"Bad GPS ref: {lat_ref}, {lon_ref}")
-            else:
-                if lat_ref == 'S':
-                    lat = -lat
-                if lon_ref == 'W':
-                    lon = -lon
-            gps_data['Latitude'] = lat
-            gps_data['Longitude'] = lon
-        return gps_data
-    except Exception as e:
-        logger.error(f"GPS extraction crashed: {traceback.format_exc()}")
-        return None
-# Make stuff JSON-serializable
-def make_serializable(value: Any) -> Any:
-    try:
-        if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
-            return float(value.numerator) / float(value.denominator)
-        elif isinstance(value, (tuple, list)):
-            return [make_serializable(item) for item in value]
-        elif isinstance(value, dict):
-            return {str(k): make_serializable(v) for k, v in value.items()}
-        elif isinstance(value, bytes):
-            return value.decode('utf-8', errors='replace')
-        json.dumps(value)
-        return value
-    except Exception as e:
-        logger.warning(f"Serialization failed, stringin’ it: {e}")
-        return str(value)
-# Extract metadata from one image
-def get_image_metadata(image_path: Path) -> Dict[str, Any]:
-    metadata = {"file_name": str(image_path.absolute())}
-    try:
-        with Image.open(image_path) as image:
             metadata.update({
-                "format": image.format or "unknown",
-                "size": list(image.size),
-                "mode": image.mode or "unknown"
             })
-            exif_data = None
-            try:
-                exif_data = image._getexif()
-            except AttributeError:
-                metadata["exif_error"] = "No EXIF data"
-            except Exception as e:
-                metadata["exif_error"] = f"EXIF crashed: {str(e)}"
-            if exif_data and isinstance(exif_data, dict):
-                for tag_id, value in exif_data.items():
-                    tag_name = ExifTags.TAGS.get(tag_id, f"tag_{tag_id}").lower()
-                    if tag_name == "gpsinfo":
-                        gps_info = extract_gps_info(value)
-                        if gps_info:
-                            metadata["gps_info"] = make_serializable(gps_info)
-                    else:
-                        metadata[tag_name] = make_serializable(value)
-        metadata["file_size"] = image_path.stat().st_size
-        metadata["file_extension"] = image_path.suffix.lower()
-        return metadata
-    except Exception as e:
-        logger.error(f"Image {image_path} crashed: {traceback.format_exc()}")
-        return {"file_name": str(image_path.absolute()), "error": str(e)}
-# Process images (single file or directory)
-def process_images(input_data: str | Path) -> List[Dict[str, Any]]:
-    metadata_list = []
-    input_path = Path(input_data)
-    if input_path.is_file() and input_path.suffix.lower() in SUPPORTED_EXTENSIONS:
-        logger.info(f"Processing single image: {input_path}")
-        metadata = get_image_metadata(input_path)
-        if metadata:
-            metadata_list.append(metadata)
-    elif input_path.is_dir():
-        logger.info(f"Processing directory: {input_path}")
-        for image_path in input_path.rglob("*"):
-            if image_path.is_file() and image_path.suffix.lower() in SUPPORTED_EXTENSIONS:
-                logger.info(f"Processing: {image_path}")
-                metadata = get_image_metadata(image_path)
-                if metadata:
-                    metadata_list.append(metadata)
-    else:
-        logger.error(f"Invalid input: {input_data}")
-        return [{"error": f"Invalid input: {input_data}"}]
-    return metadata_list
-# Save to JSONL
-def save_metadata_to_jsonl(metadata_list: List[Dict[str, Any]], output_file: Path) -> bool:
-    try:
-        output_file.parent.mkdir(parents=True, exist_ok=True)
-        with output_file.open('w', encoding='utf-8') as f:
-            for entry in metadata_list:
-                f.write(json.dumps(entry, ensure_ascii=False) + '\n')
-        logger.info(f"Saved {len(metadata_list)} entries to {output_file}")
-        return True
-    except Exception as e:
-        logger.error(f"Save crashed: {traceback.format_exc()}")
-        return False
-# Upload to Hugging Face
-def upload_to_huggingface(metadata_file: Path, username: str, dataset_name: str) -> str:
-    try:
         metadata_list = []
-        with metadata_file.open('r', encoding='utf-8') as f:
-            for line in f:
-                metadata_list.append(json.loads(line))
-        if not metadata_list:
-            return "No metadata to upload, fam!"
-        dataset = Dataset.from_dict({
-            "images": [entry.get("file_name") for entry in metadata_list],
-            "metadata": metadata_list
-        })
-        dataset.push_to_hub(f"{username}/{dataset_name}", private=False)
-        return f"Uploaded to {username}/{dataset_name} with {len(metadata_list)} entries!"
-    except Exception as e:
-        logger.error(f"Upload crashed: {traceback.format_exc()}")
-        return f"Upload failed: {str(e)}"
-# Gradio processing function
-def gradio_process(image_file, dir_path: str, username: str, dataset_name: str) -> str:
-    output = []
-    metadata_list = []
-    # Process single image if uploaded
-    if image_file:
-        image_path = Path(image_file.name)  # Gradio gives temp file path
-        metadata_list = process_images(image_path)
-        output.append("Single Image Metadata:")
-        for entry in metadata_list:
-            output.append(json.dumps(entry, indent=2))
-    # Process directory if provided
-    if dir_path:
-        dir_path = Path(dir_path)
-        if dir_path.is_dir():
-            metadata_list.extend(process_images(dir_path))
-            output.append("Directory Metadata:")
-            for entry in metadata_list[-len(process_images(dir_path)):]:
-                output.append(json.dumps(entry, indent=2))
         else:
-            output.append(f"Error: {dir_path} ain’t a directory, fam!")
-    # Save and upload if we got metadata
-    if metadata_list:
-        temp_output_file = Path("temp_metadata.jsonl")
-        if save_metadata_to_jsonl(metadata_list, temp_output_file):
-            output.append(f"Saved metadata to {temp_output_file}")
-            upload_result = upload_to_huggingface(temp_output_file, username, dataset_name)
-            output.append(upload_result)
         else:
-            output.append("Save failed, dawg!")
-    return "\n\n".join(output) if output else "Drop an image or dir, fam!"
-# Gradio interface
-demo = gr.Interface(
-    fn=gradio_process,
-    inputs=[
-        gr.File(label="Upload Image", file_types=list(SUPPORTED_EXTENSIONS)),
-        gr.Textbox(label="Image Directory", placeholder=str(DEFAULT_IMAGE_DIR), value=str(DEFAULT_IMAGE_DIR)),
-        gr.Textbox(label="Hugging Face Username", value=HF_USERNAME),
-        gr.Textbox(label="Dataset Name", value=DATASET_NAME)
-    ],
-    outputs=gr.Textbox(label="Metadata Output"),
-    title="Geo-Metadata Extractor",
-    description="Upload an image or point to a directory to extract metadata and push to Hugging Face, Bay Area style!",
-    allow_flagging="never"
-)
 if __name__ == "__main__":
-    logger.info("Firin’ up the Gradio geo-metadata extractor...")
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 from pathlib import Path
 import json
 import sys
 import os
 import logging
 import traceback
+from typing import Dict, List, Any, Optional, Union, Tuple
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+# Third-party imports with robust error handling
+try:
+    from PIL import Image, ExifTags
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+    logging.warning("PIL not installed - image processing disabled")
+try:
+    import gradio as gr
+    HAS_GRADIO = True
+except ImportError:
+    HAS_GRADIO = False
+    logging.warning("Gradio not installed - UI disabled")
+try:
+    from datasets import Dataset
+    HAS_DATASETS = True
+except ImportError:
+    HAS_DATASETS = False
+    logging.warning("Datasets library not installed - HF upload disabled")
+# Advanced logging configuration
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        logging.FileHandler("geo_extractor.log")
+    ]
 )
+logger = logging.getLogger("geo_metadata_extractor")
+# Configurable settings with environment variable overrides and validation
+class Config:
+    """Configuration container with validation and defaults"""
+    DEFAULT_IMAGE_DIR = Path(os.environ.get("IMAGE_DIR", "./images"))
+    DEFAULT_OUTPUT_FILE = Path(os.environ.get("OUTPUT_METADATA_FILE", "./metadata.jsonl"))
+    HF_USERNAME = os.environ.get("HF_USERNAME", "latterworks")
+    DATASET_NAME = os.environ.get("DATASET_NAME", "geo-metadata")
+    MAX_WORKERS = int(os.environ.get("MAX_WORKERS", "4"))
+    BATCH_SIZE = int(os.environ.get("BATCH_SIZE", "100"))
+    # Image formats with EXIF support prioritized first
+    SUPPORTED_EXTENSIONS = {
+        # Primary formats with good EXIF support
+        '.jpg', '.jpeg', '.tiff', '.tif',
+        # Secondary formats with limited metadata support
+        '.png', '.heic', '.bmp', '.webp'
+    }
+    @classmethod
+    def validate(cls) -> List[str]:
+        """Validate configuration settings and return warnings"""
+        warnings = []
+        if cls.MAX_WORKERS < 1:
+            cls.MAX_WORKERS = 1
+            warnings.append(f"Invalid MAX_WORKERS value, reset to {cls.MAX_WORKERS}")
+        if cls.BATCH_SIZE < 10:
+            cls.BATCH_SIZE = 10
+            warnings.append(f"BATCH_SIZE too small, reset to {cls.BATCH_SIZE}")
+        return warnings
+# Run config validation at import time
+config_warnings = Config.validate()
+for warning in config_warnings:
+    logger.warning(warning)
+class GeoMetadataExtractor:
+    """Core metadata extraction logic with advanced error handling"""
+    @staticmethod
+    def convert_to_degrees(value: Union[tuple, list]) -> Optional[float]:
+        """
+        Convert GPS coordinates (degrees, minutes, seconds) to decimal degrees
+        Args:
+            value: Tuple of degrees, minutes, seconds
+        Returns:
+            Decimal degrees as float, or None if conversion fails
+        """
+        try:
+            if not isinstance(value, (tuple, list)) or len(value) != 3:
+                raise ValueError(f"GPS value must be a tuple of 3 elements, got {type(value)}")
+            d, m, s = value
+            degrees = float(d) + (float(m) / 60.0) + (float(s) / 3600.0)
+            # Validate range
+            if not -180 <= degrees <= 180:
+                raise ValueError(f"GPS degrees out of valid range: {degrees}")
+            return degrees
+        except (TypeError, ValueError, ZeroDivisionError) as e:
+            logger.error(f"Failed to convert GPS coordinates: {e}")
+            return None
+    @staticmethod
+    def extract_gps_info(gps_info: Dict[int, Any]) -> Optional[Dict[str, Any]]:
+        """
+        Extract and format GPS metadata from EXIF
+        Args:
+            gps_info: Dictionary of GPS EXIF tags
+        Returns:
+            Formatted GPS data including decimal latitude/longitude
+        """
+        if not isinstance(gps_info, dict):
+            logger.warning("GPS info is not a dictionary, skipping")
+            return None
+        gps_data = {}
+        try:
+            # Extract tag data
+            for key, val in gps_info.items():
+                tag_name = ExifTags.GPSTAGS.get(key, f"unknown_gps_tag_{key}")
+                gps_data[tag_name] = val
+            # Process coordinates if available
+            if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
+                lat = GeoMetadataExtractor.convert_to_degrees(gps_data['GPSLatitude'])
+                lon = GeoMetadataExtractor.convert_to_degrees(gps_data['GPSLongitude'])
+                if lat is None or lon is None:
+                    logger.error("Failed to convert latitude/longitude, skipping GPS data")
+                    return None
+                # Apply hemispheric references
+                lat_ref = gps_data.get('GPSLatitudeRef', 'N')
+                lon_ref = gps_data.get('GPSLongitudeRef', 'E')
+                if lat_ref not in {'N', 'S'} or lon_ref not in {'E', 'W'}:
+                    logger.warning(f"Invalid GPS reference values: lat_ref={lat_ref}, lon_ref={lon_ref}")
+                else:
+                    if lat_ref == 'S':
+                        lat = -lat
+                    if lon_ref == 'W':
+                        lon = -lon
+                # Add calculated decimal coordinates
+                gps_data['Latitude'] = round(lat, 6)  # 6 decimal places ≈ 10cm precision
+                gps_data['Longitude'] = round(lon, 6)
+                # Add additional derived fields
+                if 'GPSAltitude' in gps_data:
+                    try:
+                        altitude = gps_data['GPSAltitude']
+                        if hasattr(altitude, 'numerator') and hasattr(altitude, 'denominator'):
+                            gps_data['AltitudeMeters'] = float(altitude.numerator) / float(altitude.denominator)
+                    except Exception as e:
+                        logger.warning(f"Failed to process altitude: {e}")
+            return gps_data
+        except Exception as e:
+            stack_trace = traceback.format_exc()
+            logger.error(f"GPS extraction error: {e}\n{stack_trace}")
+            return None
+    @staticmethod
+    def make_serializable(value: Any) -> Any:
+        """
+        Recursively convert non-serializable types to JSON-compatible values
+        Args:
+            value: Any value to convert
+        Returns:
+            JSON-serializable representation of value
+        """
+        try:
+            # Handle rational numbers (fractions)
+            if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
+                if value.denominator == 0:
+                    return "undefined (division by zero)"
+                return float(value.numerator) / float(value.denominator)
+            # Handle nested structures
+            elif isinstance(value, (tuple, list)):
+                return [GeoMetadataExtractor.make_serializable(item) for item in value]
+            elif isinstance(value, dict):
+                return {str(k): GeoMetadataExtractor.make_serializable(v) for k, v in value.items()}
+            # Handle binary data
+            elif isinstance(value, bytes):
+                return value.decode('utf-8', errors='replace')
+            # Test if directly serializable
+            json.dumps(value)
+            return value
+        except Exception as e:
+            logger.warning(f"Value serialization failed, converting to string: {e}")
+            return str(value)
+    @staticmethod
+    def get_image_metadata(image_path: Path) -> Dict[str, Any]:
+        """
+        Extract comprehensive metadata from an image file
+        Args:
+            image_path: Path to image file
+        Returns:
+            Dictionary of extracted metadata
+        """
+        # Core metadata with absolute file path
+        metadata = {
+            "file_name": str(image_path.absolute()),
+            "extraction_time": time.strftime("%Y-%m-%d %H:%M:%S")
+        }
+        try:
+            # Process file system metadata first (always available)
+            stat_info = image_path.stat()
             metadata.update({
+                "file_size": stat_info.st_size,
+                "file_extension": image_path.suffix.lower(),
+                "last_modified": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(stat_info.st_mtime)),
+                "creation_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(stat_info.st_ctime))
             })
+            # Exit early if PIL not available
+            if not HAS_PIL:
+                metadata["error"] = "PIL library not available"
+                return metadata
+            # Extract image and EXIF data
+            with Image.open(image_path) as image:
+                # Basic image properties
+                metadata.update({
+                    "format": image.format or "unknown",
+                    "size": list(image.size),
+                    "width": image.width,
+                    "height": image.height,
+                    "mode": image.mode or "unknown",
+                    "aspect_ratio": round(image.width / image.height, 3) if image.height > 0 else None
+                })
+                # Extract EXIF data if available
+                exif_data = None
+                try:
+                    # Different methods depending on image format
+                    if hasattr(image, '_getexif'):
+                        exif_data = image._getexif()
+                    elif hasattr(image, 'getexif'):
+                        exif_data = image.getexif()
+                    # Some formats like PNG store metadata differently
+                    if not exif_data and image.format == 'PNG' and 'exif' in image.info:
+                        exif_data = image.info.get('exif')
+                        metadata["exif_source"] = "PNG info block"
+                except AttributeError:
+                    metadata["exif_error"] = "No EXIF extraction method available"
+                except Exception as e:
+                    metadata["exif_error"] = f"EXIF extraction failed: {str(e)}"
+                # Process EXIF data if found
+                if exif_data and isinstance(exif_data, dict):
+                    for tag_id, value in exif_data.items():
+                        # Handle GPS data specially
+                        if tag_id in ExifTags.TAGS and ExifTags.TAGS[tag_id] == "GPSInfo":
+                            gps_info = GeoMetadataExtractor.extract_gps_info(value)
+                            if gps_info:
+                                metadata["gps_info"] = GeoMetadataExtractor.make_serializable(gps_info)
+                        else:
+                            # Get tag name or use numeric ID with tag_ prefix
+                            tag_name = ExifTags.TAGS.get(tag_id, f"tag_{tag_id}").lower()
+                            metadata[tag_name] = GeoMetadataExtractor.make_serializable(value)
+                # Add camera model and date taken for convenience if available
+                if 'model' in metadata:
+                    metadata["camera_model"] = metadata['model']
+                if 'datetimeoriginal' in metadata:
+                    metadata["date_taken"] = metadata['datetimeoriginal']
+            return metadata
+        except Exception as e:
+            # Capture full stack trace for debugging
+            stack_trace = traceback.format_exc()
+            logger.error(f"Image {image_path} processing failed: {e}\n{stack_trace}")
+            # Return partial metadata with error information
+            metadata["error"] = str(e)
+            metadata["error_trace"] = stack_trace
+            return metadata
+class MetadataProcessor:
+    """Handles batch processing and file operations"""
+    @staticmethod
+    def process_images(input_path: Union[str, Path]) -> List[Dict[str, Any]]:
+        """
+        Process image files to extract metadata
+        Args:
+            input_path: Path to image file or directory
+        Returns:
+            List of metadata dictionaries for all processed images
+        """
         metadata_list = []
+        input_path = Path(input_path)
+        start_time = time.time()
+        # Handle single file case
+        if input_path.is_file() and input_path.suffix.lower() in Config.SUPPORTED_EXTENSIONS:
+            logger.info(f"Processing single image: {input_path}")
+            metadata = GeoMetadataExtractor.get_image_metadata(input_path)
+            if metadata:
+                metadata_list.append(metadata)
+        # Handle directory case
+        elif input_path.is_dir():
+            logger.info(f"Processing directory: {input_path}")
+            # Collect all image files first
+            image_paths = [
+                path for path in input_path.rglob("*")
+                if path.is_file() and path.suffix.lower() in Config.SUPPORTED_EXTENSIONS
+            ]
+            total_images = len(image_paths)
+            logger.info(f"Found {total_images} images to process")
+            # Process in parallel with progress tracking
+            if total_images > 0:
+                processed = 0
+                with ThreadPoolExecutor(max_workers=Config.MAX_WORKERS) as executor:
+                    # Submit all tasks
+                    future_to_path = {
+                        executor.submit(GeoMetadataExtractor.get_image_metadata, path): path
+                        for path in image_paths
+                    }
+                    # Process as they complete
+                    for future in as_completed(future_to_path):
+                        path = future_to_path[future]
+                        try:
+                            metadata = future.result()
+                            if metadata:
+                                metadata_list.append(metadata)
+                            # Update progress
+                            processed += 1
+                            if processed % 10 == 0 or processed == total_images:
+                                elapsed = time.time() - start_time
+                                rate = processed / elapsed if elapsed > 0 else 0
+                                logger.info(f"Processed {processed}/{total_images} images ({processed/total_images*100:.1f}%) - {rate:.2f} images/sec")
+                        except Exception as e:
+                            logger.error(f"Error processing {path}: {e}")
+            else:
+                logger.warning(f"No images found in directory: {input_path}")
         else:
+            logger.error(f"Invalid input: {input_path} is not a file or directory")
+            return [{"error": f"Invalid input: {input_path} is not a file or directory"}]
+        # Summarize results
+        elapsed = time.time() - start_time
+        images_per_second = len(metadata_list) / elapsed if elapsed > 0 else 0
+        logger.info(f"Completed processing {len(metadata_list)} images in {elapsed:.2f} seconds ({images_per_second:.2f} images/sec)")
+        return metadata_list
+    @staticmethod
+    def save_metadata_to_jsonl(metadata_list: List[Dict[str, Any]], output_file: Path) -> bool:
+        """
+        Save metadata to JSONL format with error handling
+        Args:
+            metadata_list: List of metadata dictionaries
+            output_file: Path to output file
+        Returns:
+            True if save was successful, False otherwise
+        """
+        try:
+            # Create directory if needed
+            output_file.parent.mkdir(parents=True, exist_ok=True)
+            # Write to file
+            with output_file.open('w', encoding='utf-8') as f:
+                for entry in metadata_list:
+                    f.write(json.dumps(entry, ensure_ascii=False) + '\n')
+            logger.info(f"Successfully saved {len(metadata_list)} entries to {output_file}")
+            return True
+        except Exception as e:
+            stack_trace = traceback.format_exc()
+            logger.error(f"Failed to save metadata: {e}\n{stack_trace}")
+            return False
+    @staticmethod
+    def upload_to_huggingface(metadata_file: Path, username: str, dataset_name: str) -> str:
+        """
+        Upload metadata to Hugging Face as a dataset
+        Args:
+            metadata_file: Path to JSONL file
+            username: Hugging Face username
+            dataset_name: Dataset name to create/update
+        Returns:
+            Status message
+        """
+        if not HAS_DATASETS:
+            return "Hugging Face datasets library not installed"
+        try:
+            # Read metadata
+            metadata_list = []
+            with metadata_file.open('r', encoding='utf-8') as f:
+                for line in f:
+                    metadata_list.append(json.loads(line))
+            if not metadata_list:
+                return "No metadata to upload"
+            # Create dataset
+            logger.info(f"Creating dataset with {len(metadata_list)} entries")
+            dataset = Dataset.from_dict({
+                "images": [entry.get("file_name", "unknown") for entry in metadata_list],
+                "metadata": metadata_list
+            })
+            # Push to Hub
+            dataset_path = f"{username}/{dataset_name}"
+            logger.info(f"Pushing dataset to {dataset_path}")
+            dataset.push_to_hub(dataset_path, private=False)
+            return f"Successfully uploaded to {dataset_path} with {len(metadata_list)} entries"
+        except Exception as e:
+            stack_trace = traceback.format_exc()
+            logger.error(f"Upload failed: {e}\n{stack_trace}")
+            return f"Upload failed: {str(e)}"
+class GradioInterface:
+    """Gradio UI interface"""
+    @staticmethod
+    def create_interface():
+        """
+        Create the Gradio interface
+        Returns:
+            Gradio interface object
+        """
+        if not HAS_GRADIO:
+            logger.error("Gradio not installed, cannot create interface")
+            return None
+        def process_input(image_file, dir_path: str, username: str, dataset_name: str) -> str:
+            """
+            Process inputs from Gradio UI
+            Args:
+                image_file: Uploaded file object or None
+                dir_path: Directory path string
+                username: Hugging Face username
+                dataset_name: Dataset name
+            Returns:
+                Results as formatted text
+            """
+            output_lines = []
+            metadata_list = []
+            # Handle single image upload
+            if image_file:
+                image_path = Path(image_file.name)
+                output_lines.append(f"## Processing Single Image: {image_path.name}")
+                single_metadata = MetadataProcessor.process_images(image_path)
+                metadata_list.extend(single_metadata)
+                # Format first entry for display
+                if single_metadata:
+                    output_lines.append("### Image Metadata:")
+                    output_lines.append("```json")
+                    output_lines.append(json.dumps(single_metadata[0], indent=2))
+                    output_lines.append("```")
+            # Handle directory processing
+            if dir_path:
+                dir_path = Path(dir_path)
+                if dir_path.is_dir():
+                    output_lines.append(f"## Processing Directory: {dir_path}")
+                    dir_metadata = MetadataProcessor.process_images(dir_path)
+                    # Add to full list
+                    metadata_list.extend(dir_metadata)
+                    # Summarize results
+                    output_lines.append(f"### Directory Results:")
+                    output_lines.append(f"- Processed {len(dir_metadata)} images")
+                    # Location data summary
+                    location_count = sum(1 for entry in dir_metadata if entry.get("gps_info") is not None)
+                    output_lines.append(f"- Found location data in {location_count} images ({location_count/len(dir_metadata)*100:.1f}% if len(dir_metadata) > 0 else 0}%)")
+                    # Show a few examples if available
+                    if dir_metadata:
+                        output_lines.append("\n### Sample Entry:")
+                        output_lines.append("```json")
+                        output_lines.append(json.dumps(dir_metadata[0], indent=2))
+                        output_lines.append("```")
+                else:
+                    output_lines.append(f"⚠️ Error: {dir_path} is not a directory")
+            # Save and upload if we have metadata
+            if metadata_list:
+                temp_output_file = Path("temp_metadata.jsonl")
+                output_lines.append(f"\n## Saving and Uploading")
+                if MetadataProcessor.save_metadata_to_jsonl(metadata_list, temp_output_file):
+                    output_lines.append(f"✅ Saved metadata to {temp_output_file}")
+                    # Upload to Hugging Face
+                    upload_result = MetadataProcessor.upload_to_huggingface(
+                        temp_output_file, username, dataset_name
+                    )
+                    output_lines.append(f"📤 {upload_result}")
+                else:
+                    output_lines.append("❌ Failed to save metadata")
+            return "\n".join(output_lines) if output_lines else "Please upload an image or provide a directory path"
+        # Create the interface
+        demo = gr.Interface(
+            fn=process_input,
+            inputs=[
+                gr.File(label="Upload Image", file_types=list(Config.SUPPORTED_EXTENSIONS)),
+                gr.Textbox(label="Image Directory", placeholder=str(Config.DEFAULT_IMAGE_DIR), value=str(Config.DEFAULT_IMAGE_DIR)),
+                gr.Textbox(label="Hugging Face Username", value=Config.HF_USERNAME),
+                gr.Textbox(label="Dataset Name", value=Config.DATASET_NAME)
+            ],
+            outputs=gr.Markdown(label="Results"),
+            title="Enhanced Geo-Metadata Extractor",
+            description=(
+                "Upload an image or process a directory to extract location metadata and other EXIF data. "
+                "Results can be automatically uploaded to Hugging Face Datasets."
+            ),
+            allow_flagging="never",
+            examples=[
+                [None, "sample_images", Config.HF_USERNAME, "sample-geo-metadata"]
+            ]
+        )
+        return demo
+def main():
+    """Main entry point"""
+    logger.info("Starting Geo-Metadata Extractor")
+    # Check dependencies
+    if not HAS_PIL:
+        logger.error("PIL is required for image processing. Please install: pip install pillow")
+        sys.exit(1)
+    # Create and launch the UI if running directly
+    if HAS_GRADIO:
+        logger.info("Creating Gradio interface")
+        demo = GradioInterface.create_interface()
+        if demo:
+            logger.info("Launching Gradio interface")
+            demo.launch(server_name="0.0.0.0", server_port=7860)
         else:
+            logger.error("Failed to create Gradio interface")
+    else:
+        logger.warning("Gradio not installed, running in CLI mode")
+        # Process default directory as fallback
+        if Config.DEFAULT_IMAGE_DIR.exists():
+            logger.info(f"Processing default directory: {Config.DEFAULT_IMAGE_DIR}")
+            metadata = MetadataProcessor.process_images(Config.DEFAULT_IMAGE_DIR)
+            if metadata:
+                logger.info(f"Saving {len(metadata)} entries to {Config.DEFAULT_OUTPUT_FILE}")
+                MetadataProcessor.save_metadata_to_jsonl(metadata, Config.DEFAULT_OUTPUT_FILE)
+                logger.info(f"Metadata saved to {Config.DEFAULT_OUTPUT_FILE}")
+        else:
+            logger.error(f"Default directory not found: {Config.DEFAULT_IMAGE_DIR}")
 if __name__ == "__main__":
+    main()