Spaces:

latterworks
/

geo-metadata-extractor-gradio

Runtime error

App Files Files Community

latterworks commited on Mar 18

Commit

c35918c

verified ·

1 Parent(s): ba846ff

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -510

app.py CHANGED Viewed

@@ -1,556 +1,170 @@
-import gradio as gr
-from pathlib import Path
-from PIL import Image, ExifTags
-import json
 import os
-import logging
 import time
-from datasets import Dataset
-from huggingface_hub import HfApi, create_repo, repository_exists, CommitOperationAdd
-from huggingface_hub.utils import tqdm
 import threading
 import sys
-# Setup logging with timestamp
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler("metadata_uploader.log")
-    ]
-)
-logger = logging.getLogger(__name__)
-# Constants - edit these for your setup
-HF_TOKEN = os.environ.get("HF_TOKEN")
-HF_USERNAME = os.environ.get("HF_USERNAME", "latterworks")
-DATASET_NAME = os.environ.get("DATASET_NAME", "geo-metadata")
 SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
-CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", "3600"))  # Check for files hourly by default
-# Global state
-STATS = {
-    "uploads": 0,
-    "total_files": 0,
-    "files_with_gps": 0,
-    "last_upload": 0,
-    "startup_time": int(time.time())
-}
-def ensure_dataset_exists():
-    """Create dataset repository if it doesn't exist"""
-    if not HF_TOKEN:
-        logger.error("HF_TOKEN not set. Cannot create or check dataset.")
-        return False
-    try:
-        api = HfApi(token=HF_TOKEN)
-        repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
-        # Check if repo exists
-        if not repository_exists(repo_id, repo_type="dataset", token=HF_TOKEN):
-            logger.info(f"Creating dataset repository: {repo_id}")
-            create_repo(
-                repo_id=repo_id,
-                repo_type="dataset",
-                private=False,
-                token=HF_TOKEN
-            )
-            # Create initial README
-            readme_content = f"""# {DATASET_NAME}
-Automatically collected geo-metadata from images using the Geo-Metadata Extractor.
-## Statistics
-- Total files processed: 0
-- Files with GPS data: 0
-- Last updated: {time.strftime('%Y-%m-%d %H:%M:%S')}
-## Data Format
-Each entry contains:
-- Basic image metadata (size, format, mode)
-- EXIF data when available
-- GPS coordinates extracted from EXIF when available
-"""
-            # Upload README
-            api.upload_file(
-                path_or_fileobj=readme_content.encode(),
-                path_in_repo="README.md",
-                repo_id=repo_id,
-                repo_type="dataset",
-                token=HF_TOKEN,
-                commit_message="Initial commit with README"
-            )
-            # Create folder structure
-            for folder in ["batches", "images", "scripts"]:
-                api.upload_file(
-                    path_or_fileobj=b"",
-                    path_in_repo=f"{folder}/.gitkeep",
-                    repo_id=repo_id,
-                    repo_type="dataset",
-                    token=HF_TOKEN,
-                    commit_message=f"Create {folder} directory"
-                )
-            # Upload this script to the repository
-            try:
-                script_path = os.path.abspath(sys.argv[0])
-                if os.path.exists(script_path):
-                    with open(script_path, "rb") as f:
-                        script_content = f.read()
-                    api.upload_file(
-                        path_or_fileobj=script_content,
-                        path_in_repo="scripts/geo_metadata_extractor.py",
-                        repo_id=repo_id,
-                        repo_type="dataset",
-                        token=HF_TOKEN,
-                        commit_message="Upload metadata extractor script"
-                    )
-            except Exception as e:
-                logger.error(f"Failed to upload script: {e}")
-            logger.info(f"Dataset repository created: {repo_id}")
-        else:
-            logger.info(f"Dataset repository already exists: {repo_id}")
         return True
-    except Exception as e:
-        logger.error(f"Error ensuring dataset exists: {e}")
         return False
-def update_readme_stats():
-    """Update README with current statistics"""
-    if not HF_TOKEN:
-        return
-    try:
-        api = HfApi(token=HF_TOKEN)
-        repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
-        # Create updated README content
-        readme_content = f"""# {DATASET_NAME}
-Automatically collected geo-metadata from images using the Geo-Metadata Extractor.
-## Statistics
-- Total files processed: {STATS["total_files"]}
-- Files with GPS data: {STATS["files_with_gps"]}
-- Upload batches: {STATS["uploads"]}
-- Last updated: {time.strftime('%Y-%m-%d %H:%M:%S')}
-- Uptime: {format_duration(int(time.time()) - STATS["startup_time"])}
-## Data Format
-Each entry contains:
-- Basic image metadata (size, format, mode)
-- EXIF data when available
-- GPS coordinates extracted from EXIF when available
-"""
-        # Upload updated README
-        api.upload_file(
-            path_or_fileobj=readme_content.encode(),
-            path_in_repo="README.md",
-            repo_id=repo_id,
-            repo_type="dataset",
-            token=HF_TOKEN,
-            commit_message="Update statistics"
-        )
-        logger.info("Updated README with current statistics")
-    except Exception as e:
-        logger.error(f"Error updating README: {e}")
 def format_duration(seconds):
-    """Format seconds into readable duration"""
-    days, remainder = divmod(seconds, 86400)
-    hours, remainder = divmod(remainder, 3600)
-    minutes, seconds = divmod(remainder, 60)
-    parts = []
-    if days > 0:
-        parts.append(f"{days}d")
-    if hours > 0:
-        parts.append(f"{hours}h")
-    if minutes > 0:
-        parts.append(f"{minutes}m")
-    parts.append(f"{seconds}s")
-    return " ".join(parts)
 def convert_to_degrees(value):
-    """Convert GPS coordinates to decimal degrees"""
     try:
-        if not isinstance(value, (tuple, list)) or len(value) != 3:
-            raise ValueError(f"GPS needs 3 values, got {type(value)}")
-        d, m, s = value
-        # Convert from rational numbers if needed
-        d = d.numerator / d.denominator if hasattr(d, 'numerator') else float(d)
-        m = m.numerator / m.denominator if hasattr(m, 'numerator') else float(m)
-        s = s.numerator / s.denominator if hasattr(s, 'numerator') else float(s)
-        degrees = d + (m / 60.0) + (s / 3600.0)
-        if not -180 <= degrees <= 180:
-            logger.warning(f"GPS out of bounds: {degrees}°")
-        return degrees
-    except Exception as e:
-        logger.error(f"GPS conversion failed: {e}, value={value}")
         return None
 def extract_gps_info(gps_info):
-    """Extract and process GPS data from EXIF"""
     if not isinstance(gps_info, dict):
         return None
-    gps_data = {}
     try:
-        # Extract tags
-        for key, val in gps_info.items():
-            tag_name = ExifTags.GPSTAGS.get(key, f"gps_{key}")
-            gps_data[tag_name] = val
-        # Process lat/long if present
         if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
-            lat = convert_to_degrees(gps_data['GPSLatitude'])
-            lon = convert_to_degrees(gps_data['GPSLongitude'])
-            if lat is None or lon is None:
-                return None
-            # Apply N/S/E/W reference
-            lat_ref = gps_data.get('GPSLatitudeRef', 'N')
-            lon_ref = gps_data.get('GPSLongitudeRef', 'E')
-            # Flip signs based on hemisphere
-            if lat_ref == 'S':
-                lat = -lat
-            if lon_ref == 'W':
-                lon = -lon
-            # Store clean coords with proper precision
-            gps_data['Latitude'] = round(lat, 6)
-            gps_data['Longitude'] = round(lon, 6)
         return gps_data
-    except Exception as e:
-        logger.error(f"GPS extraction error: {e}")
-        return None
-def make_serializable(value):
-    """Make any value JSON serializable"""
-    try:
-        if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
-            return float(value.numerator) / float(value.denominator)
-        elif isinstance(value, (tuple, list)):
-            return [make_serializable(item) for item in value]
-        elif isinstance(value, dict):
-            return {str(k): make_serializable(v) for k, v in value.items()}
-        elif isinstance(value, bytes):
-            return value.decode('utf-8', errors='replace')
-        json.dumps(value)  # Test if serializable
-        return value
     except Exception:
-        return str(value)
 def get_image_metadata(image_path):
-    """Extract all metadata from an image file"""
     file_path = Path(image_path)
-    metadata = {
-        "file_name": str(file_path.absolute()),
-        "file_basename": file_path.name,
-        "image_path_in_repo": f"images/{file_path.name}"  # Path where image will be stored in repo
-    }
     try:
-        with Image.open(image_path) as image:
-            metadata.update({
-                "format": image.format or "unknown",
-                "size": list(image.size),
-                "mode": image.mode or "unknown"
-            })
-            # Extract EXIF if available
-            exif_data = None
-            try:
-                exif_data = image._getexif()
-            except (AttributeError, Exception) as e:
-                metadata["exif_error"] = str(e)
-            if exif_data and isinstance(exif_data, dict):
-                for tag_id, value in exif_data.items():
-                    try:
-                        tag_name = ExifTags.TAGS.get(tag_id, f"tag_{tag_id}").lower()
-                        if tag_name == "gpsinfo":
-                            gps_info = extract_gps_info(value)
-                            if gps_info:
-                                metadata["gps_info"] = make_serializable(gps_info)
-                        else:
-                            metadata[tag_name] = make_serializable(value)
-                    except Exception as e:
-                        metadata[f"error_tag_{tag_id}"] = str(e)
-        # Add file details
         metadata["file_size"] = os.path.getsize(image_path)
-        metadata["file_extension"] = file_path.suffix.lower()
-        metadata["extraction_timestamp"] = int(time.time())
-        # Test serialization
-        json.dumps(metadata)
         return metadata
-    except Exception as e:
-        logger.error(f"Error processing {image_path}: {e}")
-        return {"file_name": str(file_path.absolute()), "error": str(e)}
-def process_images(image_files):
-    """Process images and upload metadata to Hugging Face"""
-    if not image_files:
-        return "🚫 Upload some fucking images first! 📷", None
-    # Ensure dataset exists
-    if not ensure_dataset_exists():
-        return "❌ Failed to create or verify dataset repository. Check logs.", None
-    # Create temp directory for storing files if needed
-    os.makedirs("temp_uploads", exist_ok=True)
-    # Reset stats for this batch
-    batch_stats = {
-        "processed": 0,
-        "skipped": 0,
-        "errors": 0,
-        "with_gps": 0
-    }
-    metadata_list = []
-    filenames = []
-    # Process each image
-    for image_file in image_files:
-        if not image_file or not os.path.exists(image_file.name):
-            continue
-        file_ext = Path(image_file.name).suffix.lower()
-        if file_ext not in SUPPORTED_EXTENSIONS:
-            logger.info(f"Skipping unsupported file: {image_file.name}")
-            batch_stats["skipped"] += 1
-            continue
-        logger.info(f"Processing: {image_file.name}")
-        try:
-            metadata = get_image_metadata(image_file.name)
-            if metadata:
-                if "gps_info" in metadata:
-                    batch_stats["with_gps"] += 1
-                    STATS["files_with_gps"] += 1
-                metadata_list.append(metadata)
-                filenames.append(Path(image_file.name).name)
-                batch_stats["processed"] += 1
-            else:
-                batch_stats["errors"] += 1
-        except Exception as e:
-            logger.error(f"Failed on {image_file.name}: {e}")
-            batch_stats["errors"] += 1
-    # Exit if nothing processed
     if not metadata_list:
-        return f"❌ No valid images. Skipped: {batch_stats['skipped']}, Errors: {batch_stats['errors']}", None
-    # Generate unique filename
-    timestamp = int(time.time())
-    STATS["total_files"] += batch_stats["processed"]
-    output_file = f"metadata_{timestamp}.jsonl"
-    # Save locally
-    with open(output_file, 'w', encoding='utf-8') as f:
-        for entry in metadata_list:
-            f.write(json.dumps(entry, ensure_ascii=False) + '\n')
-    # Upload to HF
-    upload_status = "not uploaded (no token)"
-    if HF_TOKEN:
         try:
-            logger.info(f"Uploading to {HF_USERNAME}/{DATASET_NAME}...")
-            # Create dataset object with both filenames and full metadata
-            dataset = Dataset.from_dict({
-                "filename": filenames,
-                "image_path": [f"images/{f}" for f in filenames],  # Path to actual image in repo
-                "metadata": metadata_list
-            })
-            # Push to hub
-            dataset.push_to_hub(
-                f"{HF_USERNAME}/{DATASET_NAME}",
-                token=HF_TOKEN,
-                commit_message=f"Added metadata for {len(metadata_list)} images"
-            )
-            # Upload raw JSONL file
-            api = HfApi(token=HF_TOKEN)
-            api.upload_file(
-                path_or_fileobj=output_file,
-                path_in_repo=f"batches/metadata_{timestamp}.jsonl",
-                repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
-                repo_type="dataset",
-                token=HF_TOKEN,
-                commit_message=f"Raw metadata batch {timestamp}"
-            )
-            # Upload the actual image files
-            logger.info(f"Uploading {len(image_files)} image files...")
-            operations = []
-            # Process images in batches to avoid memory issues with large datasets
-            MAX_BATCH_SIZE = 20  # Maximum images per commit
-            total_uploaded = 0
-            # Group image files into batches
-            image_batches = [image_files[i:i+MAX_BATCH_SIZE] for i in range(0, len(image_files), MAX_BATCH_SIZE)]
-            for batch_idx, img_batch in enumerate(image_batches):
-                operations = []
-                for img_file in tqdm(img_batch, desc=f"Preparing batch {batch_idx+1}/{len(image_batches)}"):
-                    try:
-                        file_path = img_file.name
-                        file_name = os.path.basename(file_path)
-                        target_path = f"images/{file_name}"
-                        # Add file to operations list
-                        with open(file_path, "rb") as f:
-                            content = f.read()
-                            operations.append(
-                                CommitOperationAdd(
-                                    path_in_repo=target_path,
-                                    path_or_fileobj=content
-                                )
-                            )
-                    except Exception as e:
-                        logger.error(f"Error preparing image {img_file.name} for upload: {e}")
-                # Commit this batch of images
-                if operations:
-                    try:
-                        logger.info(f"Committing batch {batch_idx+1}/{len(image_batches)} with {len(operations)} images...")
-                        api.create_commit(
-                            repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
-                            repo_type="dataset",
-                            operations=operations,
-                            commit_message=f"Upload {len(operations)} images (batch {batch_idx+1}/{len(image_batches)}) from upload {timestamp}"
-                        )
-                        total_uploaded += len(operations)
-                        logger.info(f"Successfully uploaded batch {batch_idx+1} ({total_uploaded}/{len(image_files)} total)")
-                    except Exception as e:
-                        logger.error(f"Failed to upload image batch {batch_idx+1}: {e}")
-            logger.info(f"Image upload complete: {total_uploaded}/{len(image_files)} files uploaded")
-            # Update stats
-            STATS["uploads"] += 1
-            STATS["last_upload"] = timestamp
-            upload_status = "✅ success"
-            # Update README in background thread
-            threading.Thread(target=update_readme_stats).start()
-        except Exception as e:
-            logger.error(f"HF upload failed: {e}")
-            upload_status = f"❌ failed: {str(e)[:100]}..."
-    # Return stats with all info
-    result = (
-        f"🔥 BATCH STATS 🔥\n"
-        f"✓ Processed: {batch_stats['processed']} images\n"
-        f"🌍 With GPS: {batch_stats['with_gps']}\n"
-        f"🚫 Skipped: {batch_stats['skipped']}\n"
-        f"⚠️ Errors: {batch_stats['errors']}\n"
-        f"☁️ Upload: {upload_status}\n\n"
-        f"📊 TOTAL STATS 📊\n"
-        f"Total files: {STATS['total_files']}\n"
-        f"Files with GPS: {STATS['files_with_gps']}\n"
-        f"Upload batches: {STATS['uploads']}\n"
-        f"Uptime: {format_duration(int(time.time()) - STATS['startup_time'])}"
-    )
-    return result, output_file
-def scan_and_process_directory(directory_path):
-    """Scan directory for images and process them"""
-    if not os.path.isdir(directory_path):
-        logger.error(f"Not a directory: {directory_path}")
-        return
-    logger.info(f"Scanning directory: {directory_path}")
-    image_files = []
-    # Find all image files in directory
-    for root, _, files in os.walk(directory_path):
-        for file in files:
-            file_path = os.path.join(root, file)
-            if Path(file_path).suffix.lower() in SUPPORTED_EXTENSIONS:
-                image_files.append(file_path)
-    if not image_files:
-        logger.info(f"No image files found in {directory_path}")
-        return
-    logger.info(f"Found {len(image_files)} image files in {directory_path}")
-    # Create file-like objects for processing
-    class FileObject:
-        def __init__(self, path):
-            self.name = path
-    process_images([FileObject(path) for path in image_files])
 def schedule_directory_scan():
-    """Check for new files in directory periodically"""
-    watch_dir = os.environ.get("WATCH_DIRECTORY")
     if watch_dir and os.path.isdir(watch_dir):
-        logger.info(f"Scheduled scan of directory: {watch_dir}")
-        scan_and_process_directory(watch_dir)
-    # Schedule next check
     threading.Timer(CHECK_INTERVAL, schedule_directory_scan).start()
-# Create the UI
-demo = gr.Interface(
-    fn=process_images,
-    inputs=gr.Files(label="DROP IMAGES HERE 📸", file_types=["image"], file_count="multiple"),
-    outputs=[
-        gr.Textbox(label="Status Report", lines=10),
-        gr.File(label="Download Metadata JSONL")
-    ],
-    title="🌍 Geo-Metadata Extractor 🔥",
-    description=(
-        f"Upload images to extract all metadata including GPS coordinates. "
-        f"Supported formats: {', '.join(sorted(ext[1:] for ext in SUPPORTED_EXTENSIONS))}. "
-        f"Data automatically uploads to {HF_USERNAME}/{DATASET_NAME} on Hugging Face."
-    ),
-    allow_flagging="never",
-    theme="huggingface"
-)
-# Launch app and start background processes
 if __name__ == "__main__":
-    # Ensure dataset exists on startup
     ensure_dataset_exists()
-    # Start directory watcher if configured
-    if os.environ.get("WATCH_DIRECTORY"):
         threading.Thread(target=schedule_directory_scan).start()
-        logger.info(f"Starting directory watcher for {os.environ.get('WATCH_DIRECTORY')}")
-    # Log startup info
-    logger.info(f"=== Application Startup at {time.strftime('%Y-%m-%d %H:%M:%S')} ===")
-    logger.info(f"Dataset: {HF_USERNAME}/{DATASET_NAME}")
-    logger.info(f"Token available: {bool(HF_TOKEN)}")
-    # Launch Gradio app
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import json
 import time
+import logging
 import threading
 import sys
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
+from datasets import Dataset
+from huggingface_hub import HfApi, create_repo, CommitOperationAdd
+from PIL import Image, ExifTags
+import gradio as gr
+# ----------------- CONFIGURATION -----------------
+HF_USERNAME = os.getenv("HF_USERNAME", "latterworks")
+DATASET_NAME = os.getenv("DATASET_NAME", "geo-metadata")
+HF_TOKEN = os.getenv("HF_TOKEN")
+CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", "3600"))  # Check every hour
+MAX_BATCH_SIZE = int(os.getenv("MAX_BATCH_SIZE", "20"))
+MAX_LOG_SIZE_MB = int(os.getenv("MAX_LOG_SIZE_MB", "10"))
 SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
+# Logging Setup
+os.makedirs("logs", exist_ok=True)
+log_handler = logging.handlers.RotatingFileHandler("logs/uploader.log", maxBytes=MAX_LOG_SIZE_MB * 1024 * 1024, backupCount=5)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(), log_handler])
+logger = logging.getLogger(__name__)
+# Global State
+STATS = {"uploads": 0, "total_files": 0, "files_with_gps": 0, "startup_time": int(time.time())}
+# Initialize HF API once
+api = HfApi(token=HF_TOKEN)
+# ----------------- UTILITIES -----------------
+def repository_exists(repo_id, repo_type="dataset"):
+    """Check if a Hugging Face dataset repo exists."""
+    try:
+        api.repo_info(repo_id=repo_id, repo_type=repo_type)
         return True
+    except Exception:
         return False
+def ensure_dataset_exists():
+    """Ensure dataset repository exists or create it."""
+    repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
+    if not repository_exists(repo_id):
+        logger.info(f"Creating dataset repository: {repo_id}")
+        create_repo(repo_id=repo_id, repo_type="dataset", private=False, token=HF_TOKEN)
+        api.upload_file(path_or_fileobj=b"", path_in_repo="images/.gitkeep", repo_id=repo_id, repo_type="dataset", commit_message="Initialize images folder")
+    return True
 def format_duration(seconds):
+    """Convert seconds to human-readable duration."""
+    d, h, m, s = seconds // 86400, (seconds % 86400) // 3600, (seconds % 3600) // 60, seconds % 60
+    return f"{d}d {h}h {m}m {s}s" if d else f"{h}h {m}m {s}s" if h else f"{m}m {s}s"
 def convert_to_degrees(value):
+    """Convert GPS coordinates to decimal degrees."""
     try:
+        d, m, s = [float(x.numerator) / float(x.denominator) if hasattr(x, 'numerator') else float(x) for x in value]
+        return d + (m / 60.0) + (s / 3600.0)
+    except Exception:
         return None
 def extract_gps_info(gps_info):
+    """Extract and process GPS data from EXIF."""
     if not isinstance(gps_info, dict):
         return None
     try:
+        gps_data = {ExifTags.GPSTAGS.get(k, f"gps_{k}"): v for k, v in gps_info.items()}
         if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
+            lat, lon = convert_to_degrees(gps_data['GPSLatitude']), convert_to_degrees(gps_data['GPSLongitude'])
+            if lat and lon:
+                if gps_data.get('GPSLatitudeRef', 'N') == 'S':
+                    lat = -lat
+                if gps_data.get('GPSLongitudeRef', 'E') == 'W':
+                    lon = -lon
+                gps_data.update({'Latitude': round(lat, 6), 'Longitude': round(lon, 6)})
         return gps_data
     except Exception:
+        return None
 def get_image_metadata(image_path):
+    """Extract metadata from an image file."""
     file_path = Path(image_path)
+    metadata = {"file_name": str(file_path.absolute()), "file_extension": file_path.suffix.lower()}
     try:
+        with Image.open(image_path) as img:
+            metadata.update({"format": img.format, "size": list(img.size), "mode": img.mode})
+            exif_data = img._getexif()
+            if exif_data:
+                metadata.update({ExifTags.TAGS.get(k, f"tag_{k}").lower(): v for k, v in exif_data.items()})
+                if 'gpsinfo' in metadata:
+                    metadata["gps_info"] = extract_gps_info(metadata.pop('gpsinfo'))
         metadata["file_size"] = os.path.getsize(image_path)
+        metadata["timestamp"] = int(time.time())
         return metadata
+    except Exception:
+        return None
+# ----------------- UPLOADING -----------------
+def upload_metadata(metadata_list):
+    """Upload metadata to Hugging Face."""
     if not metadata_list:
+        return "No metadata to upload"
+    repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
+    dataset = Dataset.from_dict({"metadata": metadata_list})
+    dataset.push_to_hub(repo_id, commit_message=f"Add {len(metadata_list)} image metadata entries")
+    return "Upload successful"
+def upload_images(image_paths):
+    """Upload images to Hugging Face."""
+    repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
+    operations = []
+    for image_path in image_paths:
         try:
+            with open(image_path, "rb") as f:
+                operations.append(CommitOperationAdd(path_in_repo=f"images/{Path(image_path).name}", path_or_fileobj=f.read()))
+        except Exception:
+            continue
+    if operations:
+        api.create_commit(repo_id=repo_id, repo_type="dataset", operations=operations, commit_message="Batch upload images")
+# ----------------- PROCESSING -----------------
+def process_images(image_files):
+    """Process images, extract metadata, and upload to Hugging Face."""
+    if not ensure_dataset_exists():
+        return "Dataset creation failed."
+    metadata_list = []
+    image_paths = []
+    with ThreadPoolExecutor(max_workers=MAX_BATCH_SIZE) as executor:
+        results = executor.map(get_image_metadata, [file.name for file in image_files])
+        for result, file in zip(results, image_files):
+            if result:
+                metadata_list.append(result)
+                image_paths.append(file.name)
+    if metadata_list:
+        upload_metadata(metadata_list)
+        upload_images(image_paths)
+        return f"Processed {len(metadata_list)} images, uploaded metadata & images."
+    return "No valid images processed."
+# ----------------- GRADIO UI -----------------
+demo = gr.Interface(
+    fn=process_images,
+    inputs=gr.Files(label="Upload Images"),
+    outputs=gr.Textbox(label="Status Report"),
+    title="Geo-Metadata Uploader",
+    description=f"Upload images for automatic metadata extraction and upload to Hugging Face ({HF_USERNAME}/{DATASET_NAME}).",
+    allow_flagging="never"
+)
+# ----------------- AUTO-SCHEDULING -----------------
 def schedule_directory_scan():
+    """Periodically scan a directory for new images."""
+    watch_dir = os.getenv("WATCH_DIRECTORY")
     if watch_dir and os.path.isdir(watch_dir):
+        image_files = [Path(watch_dir) / f for f in os.listdir(watch_dir) if f.lower().endswith(tuple(SUPPORTED_EXTENSIONS))]
+        process_images(image_files)
     threading.Timer(CHECK_INTERVAL, schedule_directory_scan).start()
 if __name__ == "__main__":
+    logger.info(f"Starting uploader for {HF_USERNAME}/{DATASET_NAME}...")
     ensure_dataset_exists()
+    if os.getenv("WATCH_DIRECTORY"):
         threading.Thread(target=schedule_directory_scan).start()
+    demo.launch(server_name="0.0.0.0", server_port=7860)