Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,68 +1,98 @@
|
|
1 |
-
|
2 |
-
# Geo-Metadata Extractor (v1742324215)
|
3 |
import gradio as gr
|
4 |
from pathlib import Path
|
5 |
from PIL import Image, ExifTags
|
6 |
import json
|
7 |
import os
|
8 |
import logging
|
|
|
9 |
from datasets import Dataset
|
10 |
from huggingface_hub import HfApi
|
11 |
|
12 |
-
logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
-
#
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
-
HF_USERNAME = "latterworks"
|
18 |
-
DATASET_NAME = "geo-metadata"
|
19 |
SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def convert_to_degrees(value):
|
|
|
22 |
try:
|
23 |
if not isinstance(value, (tuple, list)) or len(value) != 3:
|
24 |
-
raise ValueError("GPS
|
|
|
25 |
d, m, s = value
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
if not -180 <= degrees <= 180:
|
28 |
-
|
29 |
return degrees
|
30 |
except Exception as e:
|
31 |
-
logger.error(f"
|
32 |
return None
|
33 |
|
34 |
def extract_gps_info(gps_info):
|
|
|
35 |
if not isinstance(gps_info, dict):
|
36 |
-
logger.warning("GPSInfo is not a dictionary, skipping")
|
37 |
return None
|
|
|
38 |
gps_data = {}
|
39 |
try:
|
|
|
40 |
for key, val in gps_info.items():
|
41 |
-
tag_name = ExifTags.GPSTAGS.get(key, f"
|
42 |
gps_data[tag_name] = val
|
|
|
|
|
43 |
if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
|
44 |
lat = convert_to_degrees(gps_data['GPSLatitude'])
|
45 |
lon = convert_to_degrees(gps_data['GPSLongitude'])
|
|
|
46 |
if lat is None or lon is None:
|
47 |
-
logger.error("Failed to convert latitude or longitude")
|
48 |
return None
|
|
|
|
|
49 |
lat_ref = gps_data.get('GPSLatitudeRef', 'N')
|
50 |
lon_ref = gps_data.get('GPSLongitudeRef', 'E')
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
gps_data['
|
|
|
|
|
60 |
return gps_data
|
61 |
except Exception as e:
|
62 |
-
logger.error(f"
|
63 |
return None
|
64 |
|
65 |
def make_serializable(value):
|
|
|
66 |
try:
|
67 |
if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
|
68 |
return float(value.numerator) / float(value.denominator)
|
@@ -72,13 +102,13 @@ def make_serializable(value):
|
|
72 |
return {str(k): make_serializable(v) for k, v in value.items()}
|
73 |
elif isinstance(value, bytes):
|
74 |
return value.decode('utf-8', errors='replace')
|
75 |
-
json.dumps(value)
|
76 |
return value
|
77 |
-
except Exception
|
78 |
-
logger.warning(f"Converting to string due to serialization failure: {e}")
|
79 |
return str(value)
|
80 |
|
81 |
def get_image_metadata(image_path):
|
|
|
82 |
metadata = {"file_name": str(Path(image_path).absolute())}
|
83 |
try:
|
84 |
with Image.open(image_path) as image:
|
@@ -87,13 +117,14 @@ def get_image_metadata(image_path):
|
|
87 |
"size": list(image.size),
|
88 |
"mode": image.mode or "unknown"
|
89 |
})
|
|
|
|
|
90 |
exif_data = None
|
91 |
try:
|
92 |
exif_data = image._getexif()
|
93 |
-
except AttributeError:
|
94 |
-
metadata["exif_error"] =
|
95 |
-
|
96 |
-
metadata["exif_error"] = f"EXIF extraction failed: {str(e)}"
|
97 |
if exif_data and isinstance(exif_data, dict):
|
98 |
for tag_id, value in exif_data.items():
|
99 |
try:
|
@@ -106,8 +137,13 @@ def get_image_metadata(image_path):
|
|
106 |
metadata[tag_name] = make_serializable(value)
|
107 |
except Exception as e:
|
108 |
metadata[f"error_tag_{tag_id}"] = str(e)
|
|
|
|
|
109 |
metadata["file_size"] = os.path.getsize(image_path)
|
110 |
metadata["file_extension"] = Path(image_path).suffix.lower()
|
|
|
|
|
|
|
111 |
json.dumps(metadata)
|
112 |
return metadata
|
113 |
except Exception as e:
|
@@ -115,43 +151,132 @@ def get_image_metadata(image_path):
|
|
115 |
return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
|
116 |
|
117 |
def process_images(image_files):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
metadata_list = []
|
|
|
|
|
|
|
119 |
for image_file in image_files:
|
120 |
-
if image_file
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
metadata = get_image_metadata(image_file.name)
|
123 |
if metadata:
|
|
|
|
|
|
|
124 |
metadata_list.append(metadata)
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
with open(output_file, 'w', encoding='utf-8') as f:
|
127 |
for entry in metadata_list:
|
128 |
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
129 |
|
130 |
-
# Upload to
|
|
|
131 |
if HF_TOKEN:
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
|
|
148 |
demo = gr.Interface(
|
149 |
fn=process_images,
|
150 |
-
inputs=gr.Files(label="
|
151 |
-
outputs=[
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
)
|
155 |
|
|
|
156 |
if __name__ == "__main__":
|
157 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
from PIL import Image, ExifTags
|
4 |
import json
|
5 |
import os
|
6 |
import logging
|
7 |
+
import time
|
8 |
from datasets import Dataset
|
9 |
from huggingface_hub import HfApi
|
10 |
|
11 |
+
# Setup logging with timestamp
|
12 |
+
logging.basicConfig(
|
13 |
+
level=logging.INFO,
|
14 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
15 |
+
handlers=[
|
16 |
+
logging.StreamHandler(),
|
17 |
+
logging.FileHandler("metadata_uploader.log")
|
18 |
+
]
|
19 |
+
)
|
20 |
logger = logging.getLogger(__name__)
|
21 |
|
22 |
+
# Constants - put your shit here
|
23 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
24 |
+
HF_USERNAME = "latterworks" # Your username
|
25 |
+
DATASET_NAME = "geo-metadata" # Your dataset name
|
26 |
SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
|
27 |
|
28 |
+
# Status tracking
|
29 |
+
STATS = {
|
30 |
+
"uploads": 0,
|
31 |
+
"total_files": 0,
|
32 |
+
"files_with_gps": 0
|
33 |
+
}
|
34 |
+
|
35 |
def convert_to_degrees(value):
|
36 |
+
"""Convert GPS coordinates to decimal degrees - handles all the edge cases"""
|
37 |
try:
|
38 |
if not isinstance(value, (tuple, list)) or len(value) != 3:
|
39 |
+
raise ValueError(f"GPS needs 3 values, got {type(value)}")
|
40 |
+
|
41 |
d, m, s = value
|
42 |
+
# Convert from rational numbers if needed
|
43 |
+
d = d.numerator / d.denominator if hasattr(d, 'numerator') else float(d)
|
44 |
+
m = m.numerator / m.denominator if hasattr(m, 'numerator') else float(m)
|
45 |
+
s = s.numerator / s.denominator if hasattr(s, 'numerator') else float(s)
|
46 |
+
|
47 |
+
degrees = d + (m / 60.0) + (s / 3600.0)
|
48 |
if not -180 <= degrees <= 180:
|
49 |
+
logger.warning(f"GPS out of bounds: {degrees}Β°")
|
50 |
return degrees
|
51 |
except Exception as e:
|
52 |
+
logger.error(f"GPS conversion failed: {e}, value={value}")
|
53 |
return None
|
54 |
|
55 |
def extract_gps_info(gps_info):
|
56 |
+
"""Extract and process GPS data from EXIF"""
|
57 |
if not isinstance(gps_info, dict):
|
|
|
58 |
return None
|
59 |
+
|
60 |
gps_data = {}
|
61 |
try:
|
62 |
+
# Extract tags
|
63 |
for key, val in gps_info.items():
|
64 |
+
tag_name = ExifTags.GPSTAGS.get(key, f"gps_{key}")
|
65 |
gps_data[tag_name] = val
|
66 |
+
|
67 |
+
# Process lat/long if present
|
68 |
if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
|
69 |
lat = convert_to_degrees(gps_data['GPSLatitude'])
|
70 |
lon = convert_to_degrees(gps_data['GPSLongitude'])
|
71 |
+
|
72 |
if lat is None or lon is None:
|
|
|
73 |
return None
|
74 |
+
|
75 |
+
# Apply N/S/E/W reference
|
76 |
lat_ref = gps_data.get('GPSLatitudeRef', 'N')
|
77 |
lon_ref = gps_data.get('GPSLongitudeRef', 'E')
|
78 |
+
|
79 |
+
# Flip signs based on hemisphere
|
80 |
+
if lat_ref == 'S':
|
81 |
+
lat = -lat
|
82 |
+
if lon_ref == 'W':
|
83 |
+
lon = -lon
|
84 |
+
|
85 |
+
# Store clean coords with proper precision
|
86 |
+
gps_data['Latitude'] = round(lat, 6)
|
87 |
+
gps_data['Longitude'] = round(lon, 6)
|
88 |
+
|
89 |
return gps_data
|
90 |
except Exception as e:
|
91 |
+
logger.error(f"GPS extraction error: {e}")
|
92 |
return None
|
93 |
|
94 |
def make_serializable(value):
|
95 |
+
"""Make any value JSON serializable"""
|
96 |
try:
|
97 |
if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
|
98 |
return float(value.numerator) / float(value.denominator)
|
|
|
102 |
return {str(k): make_serializable(v) for k, v in value.items()}
|
103 |
elif isinstance(value, bytes):
|
104 |
return value.decode('utf-8', errors='replace')
|
105 |
+
json.dumps(value) # Test if serializable
|
106 |
return value
|
107 |
+
except Exception:
|
|
|
108 |
return str(value)
|
109 |
|
110 |
def get_image_metadata(image_path):
|
111 |
+
"""Extract all metadata from an image file"""
|
112 |
metadata = {"file_name": str(Path(image_path).absolute())}
|
113 |
try:
|
114 |
with Image.open(image_path) as image:
|
|
|
117 |
"size": list(image.size),
|
118 |
"mode": image.mode or "unknown"
|
119 |
})
|
120 |
+
|
121 |
+
# Extract EXIF if available
|
122 |
exif_data = None
|
123 |
try:
|
124 |
exif_data = image._getexif()
|
125 |
+
except (AttributeError, Exception) as e:
|
126 |
+
metadata["exif_error"] = str(e)
|
127 |
+
|
|
|
128 |
if exif_data and isinstance(exif_data, dict):
|
129 |
for tag_id, value in exif_data.items():
|
130 |
try:
|
|
|
137 |
metadata[tag_name] = make_serializable(value)
|
138 |
except Exception as e:
|
139 |
metadata[f"error_tag_{tag_id}"] = str(e)
|
140 |
+
|
141 |
+
# Add file details
|
142 |
metadata["file_size"] = os.path.getsize(image_path)
|
143 |
metadata["file_extension"] = Path(image_path).suffix.lower()
|
144 |
+
metadata["extraction_timestamp"] = int(time.time())
|
145 |
+
|
146 |
+
# Test serialization
|
147 |
json.dumps(metadata)
|
148 |
return metadata
|
149 |
except Exception as e:
|
|
|
151 |
return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
|
152 |
|
153 |
def process_images(image_files):
|
154 |
+
"""Process images and upload metadata to Hugging Face"""
|
155 |
+
if not image_files:
|
156 |
+
return "π« Upload some fucking images first! π·", None
|
157 |
+
|
158 |
+
# Reset stats for this batch
|
159 |
+
batch_stats = {
|
160 |
+
"processed": 0,
|
161 |
+
"skipped": 0,
|
162 |
+
"errors": 0,
|
163 |
+
"with_gps": 0
|
164 |
+
}
|
165 |
+
|
166 |
metadata_list = []
|
167 |
+
filenames = []
|
168 |
+
|
169 |
+
# Process each image
|
170 |
for image_file in image_files:
|
171 |
+
if not image_file or not os.path.exists(image_file.name):
|
172 |
+
continue
|
173 |
+
|
174 |
+
file_ext = Path(image_file.name).suffix.lower()
|
175 |
+
if file_ext not in SUPPORTED_EXTENSIONS:
|
176 |
+
logger.info(f"Skipping unsupported file: {image_file.name}")
|
177 |
+
batch_stats["skipped"] += 1
|
178 |
+
continue
|
179 |
+
|
180 |
+
logger.info(f"Processing: {image_file.name}")
|
181 |
+
try:
|
182 |
metadata = get_image_metadata(image_file.name)
|
183 |
if metadata:
|
184 |
+
if "gps_info" in metadata:
|
185 |
+
batch_stats["with_gps"] += 1
|
186 |
+
STATS["files_with_gps"] += 1
|
187 |
metadata_list.append(metadata)
|
188 |
+
filenames.append(Path(image_file.name).name)
|
189 |
+
batch_stats["processed"] += 1
|
190 |
+
else:
|
191 |
+
batch_stats["errors"] += 1
|
192 |
+
except Exception as e:
|
193 |
+
logger.error(f"Failed on {image_file.name}: {e}")
|
194 |
+
batch_stats["errors"] += 1
|
195 |
+
|
196 |
+
# Exit if nothing processed
|
197 |
+
if not metadata_list:
|
198 |
+
return f"β No valid images. Skipped: {batch_stats['skipped']}, Errors: {batch_stats['errors']}", None
|
199 |
+
|
200 |
+
# Generate unique filename
|
201 |
+
timestamp = int(time.time())
|
202 |
+
STATS["total_files"] += batch_stats["processed"]
|
203 |
+
output_file = f"metadata_{timestamp}.jsonl"
|
204 |
+
|
205 |
+
# Save locally
|
206 |
with open(output_file, 'w', encoding='utf-8') as f:
|
207 |
for entry in metadata_list:
|
208 |
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
209 |
|
210 |
+
# Upload to HF
|
211 |
+
upload_status = "not uploaded (no token)"
|
212 |
if HF_TOKEN:
|
213 |
+
try:
|
214 |
+
logger.info(f"Uploading to {HF_USERNAME}/{DATASET_NAME}...")
|
215 |
+
|
216 |
+
# Create dataset object with both filenames and full metadata
|
217 |
+
dataset = Dataset.from_dict({
|
218 |
+
"filename": filenames,
|
219 |
+
"metadata": metadata_list
|
220 |
+
})
|
221 |
+
|
222 |
+
# Push to hub
|
223 |
+
dataset.push_to_hub(
|
224 |
+
f"{HF_USERNAME}/{DATASET_NAME}",
|
225 |
+
token=HF_TOKEN,
|
226 |
+
commit_message=f"Added metadata for {len(metadata_list)} images"
|
227 |
+
)
|
228 |
+
|
229 |
+
# Upload raw JSONL file
|
230 |
+
api = HfApi()
|
231 |
+
api.upload_file(
|
232 |
+
path_or_fileobj=output_file,
|
233 |
+
path_in_repo=f"batches/metadata_{timestamp}.jsonl",
|
234 |
+
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
235 |
+
repo_type="dataset",
|
236 |
+
token=HF_TOKEN,
|
237 |
+
commit_message=f"Raw metadata batch {timestamp}"
|
238 |
+
)
|
239 |
+
|
240 |
+
STATS["uploads"] += 1
|
241 |
+
upload_status = "β
success"
|
242 |
+
except Exception as e:
|
243 |
+
logger.error(f"HF upload failed: {e}")
|
244 |
+
upload_status = f"β failed: {str(e)[:100]}..."
|
245 |
+
|
246 |
+
# Return stats with all info
|
247 |
+
result = (
|
248 |
+
f"π₯ BATCH STATS π₯\n"
|
249 |
+
f"β Processed: {batch_stats['processed']} images\n"
|
250 |
+
f"π With GPS: {batch_stats['with_gps']}\n"
|
251 |
+
f"π« Skipped: {batch_stats['skipped']}\n"
|
252 |
+
f"β οΈ Errors: {batch_stats['errors']}\n"
|
253 |
+
f"βοΈ Upload: {upload_status}\n\n"
|
254 |
+
f"π TOTAL STATS π\n"
|
255 |
+
f"Total files: {STATS['total_files']}\n"
|
256 |
+
f"Files with GPS: {STATS['files_with_gps']}\n"
|
257 |
+
f"Upload batches: {STATS['uploads']}"
|
258 |
+
)
|
259 |
+
|
260 |
+
return result, output_file
|
261 |
|
262 |
+
# Create the UI that actually fucking works
|
263 |
demo = gr.Interface(
|
264 |
fn=process_images,
|
265 |
+
inputs=gr.Files(label="DROP IMAGES HERE πΈ", file_types=["image"], file_count="multiple"),
|
266 |
+
outputs=[
|
267 |
+
gr.Textbox(label="Status Report", lines=10),
|
268 |
+
gr.File(label="Download Metadata JSONL")
|
269 |
+
],
|
270 |
+
title="π Geo-Metadata Extractor π₯",
|
271 |
+
description=(
|
272 |
+
f"Upload images to extract all metadata including GPS coordinates. "
|
273 |
+
f"Supported formats: {', '.join(sorted(ext[1:] for ext in SUPPORTED_EXTENSIONS))}. "
|
274 |
+
f"Data automatically uploads to {HF_USERNAME}/{DATASET_NAME} on Hugging Face."
|
275 |
+
),
|
276 |
+
allow_flagging="never",
|
277 |
+
theme="huggingface"
|
278 |
)
|
279 |
|
280 |
+
# Only launch when run directly
|
281 |
if __name__ == "__main__":
|
282 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|