latterworks commited on
Commit
fb4d504
·
verified ·
1 Parent(s): 2adce56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -168
app.py CHANGED
@@ -1,172 +1,29 @@
1
- import os
2
- import json
3
- import time
4
- import logging
5
- import threading
6
- import sys
7
- from pathlib import Path
8
- from concurrent.futures import ThreadPoolExecutor
9
- from datasets import Dataset
10
- from huggingface_hub import HfApi, create_repo, CommitOperationAdd, hf_hub_download
11
- from PIL import Image, ExifTags
12
  import gradio as gr
13
- import logging.handlers
14
-
15
- # ----------------- CONFIGURATION -----------------
16
- HF_USERNAME = os.getenv("HF_USERNAME", "latticeworks")
17
- DATASET_NAME = os.getenv("DATASET_NAME", "geo-metadata")
18
- HF_TOKEN = os.getenv("HF_TOKEN")
19
- CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", "3600")) # Check every hour
20
- MAX_BATCH_SIZE = int(os.getenv("MAX_BATCH_SIZE", "20"))
21
- MAX_LOG_SIZE_MB = int(os.getenv("MAX_LOG_SIZE_MB", "10"))
22
- SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
23
-
24
- # Logging Setup
25
- os.makedirs("logs", exist_ok=True)
26
- log_handler = logging.handlers.RotatingFileHandler("logs/uploader.log", maxBytes=MAX_LOG_SIZE_MB * 1024 * 1024, backupCount=5)
27
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(), log_handler])
28
- logger = logging.getLogger(__name__)
29
-
30
- # Global State
31
- STATS = {"uploads": 0, "total_files": 0, "files_with_gps": 0, "startup_time": int(time.time())}
32
-
33
- # Initialize HF API once
34
- api = HfApi()
35
-
36
- # ----------------- UTILITIES -----------------
37
- def repository_exists(repo_id, repo_type="dataset"):
38
- """Check if a Hugging Face dataset repo exists."""
39
- try:
40
- api.repo_info(repo_id=repo_id, repo_type=repo_type)
41
- return True
42
- except Exception:
43
- return False
44
-
45
- def ensure_dataset_exists():
46
- """Ensure dataset repository exists or create it."""
47
- repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
48
- if not repository_exists(repo_id):
49
- logger.info(f"Creating dataset repository: {repo_id}")
50
- create_repo(repo_id=repo_id, repo_type="dataset", private=False, token=HF_TOKEN)
51
- api.upload_file(path_or_fileobj=b"", path_in_repo="images/.gitkeep", repo_id=repo_id, repo_type="dataset", commit_message="Initialize images folder")
52
- return True
53
-
54
- def format_duration(seconds):
55
- """Convert seconds to human-readable duration."""
56
- d, h, m, s = seconds // 86400, (seconds % 86400) // 3600, (seconds % 3600) // 60, seconds % 60
57
- return f"{d}d {h}h {m}m {s}s" if d else f"{h}h {m}m {s}s" if h else f"{m}m {s}s"
58
-
59
- def convert_to_degrees(value):
60
- """Convert GPS coordinates to decimal degrees."""
61
- try:
62
- d, m, s = [float(x.numerator) / float(x.denominator) if hasattr(x, 'numerator') else float(x) for x in value]
63
- return d + (m / 60.0) + (s / 3600.0)
64
- except Exception:
65
- return None
66
-
67
- def extract_gps_info(gps_info):
68
- """Extract and process GPS data from EXIF."""
69
- if not isinstance(gps_info, dict):
70
- return None
71
- try:
72
- gps_data = {ExifTags.GPSTAGS.get(k, f"gps_{k}"): v for k, v in gps_info.items()}
73
- if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
74
- lat, lon = convert_to_degrees(gps_data['GPSLatitude']), convert_to_degrees(gps_data['GPSLongitude'])
75
- if lat and lon:
76
- if gps_data.get('GPSLatitudeRef', 'N') == 'S':
77
- lat = -lat
78
- if gps_data.get('GPSLongitudeRef', 'E') == 'W':
79
- lon = -lon
80
- gps_data.update({'Latitude': round(lat, 6), 'Longitude': round(lon, 6)})
81
- return gps_data
82
- except Exception:
83
- return None
84
-
85
- def get_image_metadata(image_path):
86
- """Extract metadata from an image file."""
87
- file_path = Path(image_path)
88
- metadata = {"file_name": str(file_path.absolute()), "file_extension": file_path.suffix.lower()}
89
- try:
90
- with Image.open(image_path) as img:
91
- metadata.update({"format": img.format, "size": list(img.size), "mode": img.mode})
92
- exif_data = img._getexif()
93
- if exif_data:
94
- metadata.update({ExifTags.TAGS.get(k, f"tag_{k}").lower(): v for k, v in exif_data.items()})
95
- if 'gpsinfo' in metadata:
96
- metadata["gps_info"] = extract_gps_info(metadata.pop('gpsinfo'))
97
- metadata["file_size"] = os.path.getsize(image_path)
98
- metadata["timestamp"] = int(time.time())
99
- return metadata
100
- except Exception:
101
- return None
102
-
103
- # ----------------- UPLOADING -----------------
104
- def upload_metadata(metadata_list):
105
- """Upload metadata to Hugging Face."""
106
- if not metadata_list:
107
- return "No metadata to upload"
108
- repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
109
- dataset = Dataset.from_dict({"metadata": metadata_list})
110
- dataset.push_to_hub(repo_id, commit_message=f"Add {len(metadata_list)} image metadata entries", token=HF_TOKEN)
111
- return "Upload successful"
112
-
113
- def upload_images(image_paths):
114
- """Upload images to Hugging Face."""
115
- repo_id = f"{HF_USERNAME}/{DATASET_NAME}"
116
- operations = []
117
- for image_path in image_paths:
118
- try:
119
- with open(image_path, "rb") as f:
120
- operations.append(CommitOperationAdd(path_in_repo=f"images/{Path(image_path).name}", path_or_fileobj=f.read()))
121
- except Exception as e:
122
- logger.error(f"Failed to process image {image_path}: {e}")
123
- continue
124
- if operations:
125
- api.create_commit(repo_id=repo_id, repo_type="dataset", operations=operations, commit_message="Batch upload images", token=HF_TOKEN)
126
-
127
- # ----------------- PROCESSING -----------------
128
- def process_images(image_files):
129
- """Process images, extract metadata, and upload to Hugging Face."""
130
- if not ensure_dataset_exists():
131
- return "Dataset creation failed."
132
-
133
- metadata_list = []
134
- image_paths = []
135
- with ThreadPoolExecutor(max_workers=MAX_BATCH_SIZE) as executor:
136
- results = executor.map(get_image_metadata, [file.name for file in image_files])
137
- for result, file in zip(results, image_files):
138
- if result:
139
- metadata_list.append(result)
140
- image_paths.append(file.name)
141
-
142
- if metadata_list:
143
- upload_metadata(metadata_list)
144
- upload_images(image_paths)
145
- return f"Processed {len(metadata_list)} images, uploaded metadata & images."
146
- return "No valid images processed."
147
-
148
- # ----------------- GRADIO UI -----------------
149
- demo = gr.Interface(
150
- fn=process_images,
151
- inputs=gr.Files(label="Upload Images"),
152
- outputs=gr.Textbox(label="Status Report"),
153
- title="Geo-Metadata Uploader",
154
- description=f"Upload images for automatic metadata extraction and upload to Hugging Face ({HF_USERNAME}/{DATASET_NAME}).",
155
- allow_flagging="never"
156
  )
157
 
158
- # ----------------- AUTO-SCHEDULING -----------------
159
- def schedule_directory_scan():
160
- """Periodically scan a directory for new images."""
161
- watch_dir = os.getenv("WATCH_DIRECTORY")
162
- if watch_dir and os.path.isdir(watch_dir):
163
- image_files = [Path(watch_dir) / f for f in os.listdir(watch_dir) if f.lower().endswith(tuple(SUPPORTED_EXTENSIONS))]
164
- process_images(image_files)
165
- threading.Timer(CHECK_INTERVAL, schedule_directory_scan).start()
166
-
167
  if __name__ == "__main__":
168
- logger.info(f"Starting uploader for {HF_USERNAME}/{DATASET_NAME}...")
169
- ensure_dataset_exists()
170
- if os.getenv("WATCH_DIRECTORY"):
171
- threading.Thread(target=schedule_directory_scan).start()
172
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from geoclip import GeoCLIP
4
+
5
+ # Load the GeoCLIP model
6
+ model = GeoCLIP()
7
+
8
+ # Define the function for geolocation prediction
9
+ def predict_location(image_path):
10
+ top_pred_gps, top_pred_prob = model.predict(image_path, top_k=5)
11
+ results = []
12
+ for i in range(5):
13
+ lat, lon = top_pred_gps[i]
14
+ prob = top_pred_prob[i]
15
+ results.append(f"Prediction {i+1}: ({lat:.6f}, {lon:.6f}) | Probability: {prob:.6f}")
16
+ return "\n".join(results)
17
+
18
+ # Define Gradio interface
19
+ interface = gr.Interface(
20
+ fn=predict_location,
21
+ inputs=gr.Image(type="filepath", label="Upload Image"),
22
+ outputs=gr.Textbox(label="Predicted Locations"),
23
+ title="GeoCLIP Geolocation",
24
+ description="Upload an image, and GeoCLIP will predict the top 5 GPS locations."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
 
27
+ # Launch the Gradio app
 
 
 
 
 
 
 
 
28
  if __name__ == "__main__":
29
+ interface.launch()