latterworks commited on
Commit
b9f8627
Β·
verified Β·
1 Parent(s): 03d4d27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -53
app.py CHANGED
@@ -1,68 +1,98 @@
1
-
2
- # Geo-Metadata Extractor (v1742324215)
3
  import gradio as gr
4
  from pathlib import Path
5
  from PIL import Image, ExifTags
6
  import json
7
  import os
8
  import logging
 
9
  from datasets import Dataset
10
  from huggingface_hub import HfApi
11
 
12
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 
 
 
 
 
 
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
- # Token should be set as a secret in Space settings
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
- HF_USERNAME = "latterworks"
18
- DATASET_NAME = "geo-metadata"
19
  SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
20
 
 
 
 
 
 
 
 
21
  def convert_to_degrees(value):
 
22
  try:
23
  if not isinstance(value, (tuple, list)) or len(value) != 3:
24
- raise ValueError("GPS value must be a tuple of 3 elements")
 
25
  d, m, s = value
26
- degrees = float(d) + (float(m) / 60.0) + (float(s) / 3600.0)
 
 
 
 
 
27
  if not -180 <= degrees <= 180:
28
- raise ValueError("GPS degrees out of valid range")
29
  return degrees
30
  except Exception as e:
31
- logger.error(f"Failed to convert GPS coordinates: {e}")
32
  return None
33
 
34
  def extract_gps_info(gps_info):
 
35
  if not isinstance(gps_info, dict):
36
- logger.warning("GPSInfo is not a dictionary, skipping")
37
  return None
 
38
  gps_data = {}
39
  try:
 
40
  for key, val in gps_info.items():
41
- tag_name = ExifTags.GPSTAGS.get(key, f"unknown_gps_tag_{key}")
42
  gps_data[tag_name] = val
 
 
43
  if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
44
  lat = convert_to_degrees(gps_data['GPSLatitude'])
45
  lon = convert_to_degrees(gps_data['GPSLongitude'])
 
46
  if lat is None or lon is None:
47
- logger.error("Failed to convert latitude or longitude")
48
  return None
 
 
49
  lat_ref = gps_data.get('GPSLatitudeRef', 'N')
50
  lon_ref = gps_data.get('GPSLongitudeRef', 'E')
51
- if lat_ref not in {'N', 'S'} or lon_ref not in {'E', 'W'}:
52
- logger.warning(f"Invalid GPS reference: {lat_ref}, {lon_ref}")
53
- else:
54
- if lat_ref == 'S':
55
- lat = -lat
56
- if lon_ref == 'W':
57
- lon = -lon
58
- gps_data['Latitude'] = lat
59
- gps_data['Longitude'] = lon
 
 
60
  return gps_data
61
  except Exception as e:
62
- logger.error(f"Error extracting GPS info: {e}")
63
  return None
64
 
65
  def make_serializable(value):
 
66
  try:
67
  if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
68
  return float(value.numerator) / float(value.denominator)
@@ -72,13 +102,13 @@ def make_serializable(value):
72
  return {str(k): make_serializable(v) for k, v in value.items()}
73
  elif isinstance(value, bytes):
74
  return value.decode('utf-8', errors='replace')
75
- json.dumps(value)
76
  return value
77
- except Exception as e:
78
- logger.warning(f"Converting to string due to serialization failure: {e}")
79
  return str(value)
80
 
81
  def get_image_metadata(image_path):
 
82
  metadata = {"file_name": str(Path(image_path).absolute())}
83
  try:
84
  with Image.open(image_path) as image:
@@ -87,13 +117,14 @@ def get_image_metadata(image_path):
87
  "size": list(image.size),
88
  "mode": image.mode or "unknown"
89
  })
 
 
90
  exif_data = None
91
  try:
92
  exif_data = image._getexif()
93
- except AttributeError:
94
- metadata["exif_error"] = "No EXIF data available"
95
- except Exception as e:
96
- metadata["exif_error"] = f"EXIF extraction failed: {str(e)}"
97
  if exif_data and isinstance(exif_data, dict):
98
  for tag_id, value in exif_data.items():
99
  try:
@@ -106,8 +137,13 @@ def get_image_metadata(image_path):
106
  metadata[tag_name] = make_serializable(value)
107
  except Exception as e:
108
  metadata[f"error_tag_{tag_id}"] = str(e)
 
 
109
  metadata["file_size"] = os.path.getsize(image_path)
110
  metadata["file_extension"] = Path(image_path).suffix.lower()
 
 
 
111
  json.dumps(metadata)
112
  return metadata
113
  except Exception as e:
@@ -115,43 +151,132 @@ def get_image_metadata(image_path):
115
  return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
116
 
117
  def process_images(image_files):
 
 
 
 
 
 
 
 
 
 
 
 
118
  metadata_list = []
 
 
 
119
  for image_file in image_files:
120
- if image_file and Path(image_file.name).suffix.lower() in SUPPORTED_EXTENSIONS:
121
- logger.info(f"Processing: {image_file.name}")
 
 
 
 
 
 
 
 
 
122
  metadata = get_image_metadata(image_file.name)
123
  if metadata:
 
 
 
124
  metadata_list.append(metadata)
125
- output_file = "metadata.jsonl"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  with open(output_file, 'w', encoding='utf-8') as f:
127
  for entry in metadata_list:
128
  f.write(json.dumps(entry, ensure_ascii=False) + '\n')
129
 
130
- # Upload to Hugging Face
 
131
  if HF_TOKEN:
132
- dataset = Dataset.from_dict({
133
- "images": [entry.get("file_name") for entry in metadata_list],
134
- "metadata": metadata_list
135
- })
136
- dataset.push_to_hub(f"latterworks/geo-metadata", token=HF_TOKEN)
137
- api = HfApi()
138
- api.upload_file(
139
- path_or_fileobj=output_file,
140
- path_in_repo=f"metadata_{int(time.time())}.jsonl",
141
- repo_id=f"latterworks/geo-metadata",
142
- repo_type="dataset",
143
- token=HF_TOKEN
144
- )
145
- return f"Processed {len(metadata_list)} images. Metadata saved to {output_file} and uploaded to latterworks/geo-metadata", output_file
146
- return f"Processed {len(metadata_list)} images. Metadata saved to {output_file} but not uploaded (no token)", output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
 
148
  demo = gr.Interface(
149
  fn=process_images,
150
- inputs=gr.Files(label="Upload Images", file_types=["image"]),
151
- outputs=[gr.Textbox(label="Status"), gr.File(label="Download Metadata")],
152
- title="Geo-Metadata Extractor",
153
- description=f"Upload images to extract metadata (including GPS) and automatically upload to latterworks/geo-metadata on Hugging Face Hub."
 
 
 
 
 
 
 
 
 
154
  )
155
 
 
156
  if __name__ == "__main__":
157
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
  import gradio as gr
2
  from pathlib import Path
3
  from PIL import Image, ExifTags
4
  import json
5
  import os
6
  import logging
7
+ import time
8
  from datasets import Dataset
9
  from huggingface_hub import HfApi
10
 
11
+ # Setup logging with timestamp
12
+ logging.basicConfig(
13
+ level=logging.INFO,
14
+ format="%(asctime)s [%(levelname)s] %(message)s",
15
+ handlers=[
16
+ logging.StreamHandler(),
17
+ logging.FileHandler("metadata_uploader.log")
18
+ ]
19
+ )
20
  logger = logging.getLogger(__name__)
21
 
22
+ # Constants - put your shit here
23
  HF_TOKEN = os.environ.get("HF_TOKEN")
24
+ HF_USERNAME = "latterworks" # Your username
25
+ DATASET_NAME = "geo-metadata" # Your dataset name
26
  SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
27
 
28
+ # Status tracking
29
+ STATS = {
30
+ "uploads": 0,
31
+ "total_files": 0,
32
+ "files_with_gps": 0
33
+ }
34
+
35
  def convert_to_degrees(value):
36
+ """Convert GPS coordinates to decimal degrees - handles all the edge cases"""
37
  try:
38
  if not isinstance(value, (tuple, list)) or len(value) != 3:
39
+ raise ValueError(f"GPS needs 3 values, got {type(value)}")
40
+
41
  d, m, s = value
42
+ # Convert from rational numbers if needed
43
+ d = d.numerator / d.denominator if hasattr(d, 'numerator') else float(d)
44
+ m = m.numerator / m.denominator if hasattr(m, 'numerator') else float(m)
45
+ s = s.numerator / s.denominator if hasattr(s, 'numerator') else float(s)
46
+
47
+ degrees = d + (m / 60.0) + (s / 3600.0)
48
  if not -180 <= degrees <= 180:
49
+ logger.warning(f"GPS out of bounds: {degrees}Β°")
50
  return degrees
51
  except Exception as e:
52
+ logger.error(f"GPS conversion failed: {e}, value={value}")
53
  return None
54
 
55
  def extract_gps_info(gps_info):
56
+ """Extract and process GPS data from EXIF"""
57
  if not isinstance(gps_info, dict):
 
58
  return None
59
+
60
  gps_data = {}
61
  try:
62
+ # Extract tags
63
  for key, val in gps_info.items():
64
+ tag_name = ExifTags.GPSTAGS.get(key, f"gps_{key}")
65
  gps_data[tag_name] = val
66
+
67
+ # Process lat/long if present
68
  if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
69
  lat = convert_to_degrees(gps_data['GPSLatitude'])
70
  lon = convert_to_degrees(gps_data['GPSLongitude'])
71
+
72
  if lat is None or lon is None:
 
73
  return None
74
+
75
+ # Apply N/S/E/W reference
76
  lat_ref = gps_data.get('GPSLatitudeRef', 'N')
77
  lon_ref = gps_data.get('GPSLongitudeRef', 'E')
78
+
79
+ # Flip signs based on hemisphere
80
+ if lat_ref == 'S':
81
+ lat = -lat
82
+ if lon_ref == 'W':
83
+ lon = -lon
84
+
85
+ # Store clean coords with proper precision
86
+ gps_data['Latitude'] = round(lat, 6)
87
+ gps_data['Longitude'] = round(lon, 6)
88
+
89
  return gps_data
90
  except Exception as e:
91
+ logger.error(f"GPS extraction error: {e}")
92
  return None
93
 
94
  def make_serializable(value):
95
+ """Make any value JSON serializable"""
96
  try:
97
  if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
98
  return float(value.numerator) / float(value.denominator)
 
102
  return {str(k): make_serializable(v) for k, v in value.items()}
103
  elif isinstance(value, bytes):
104
  return value.decode('utf-8', errors='replace')
105
+ json.dumps(value) # Test if serializable
106
  return value
107
+ except Exception:
 
108
  return str(value)
109
 
110
  def get_image_metadata(image_path):
111
+ """Extract all metadata from an image file"""
112
  metadata = {"file_name": str(Path(image_path).absolute())}
113
  try:
114
  with Image.open(image_path) as image:
 
117
  "size": list(image.size),
118
  "mode": image.mode or "unknown"
119
  })
120
+
121
+ # Extract EXIF if available
122
  exif_data = None
123
  try:
124
  exif_data = image._getexif()
125
+ except (AttributeError, Exception) as e:
126
+ metadata["exif_error"] = str(e)
127
+
 
128
  if exif_data and isinstance(exif_data, dict):
129
  for tag_id, value in exif_data.items():
130
  try:
 
137
  metadata[tag_name] = make_serializable(value)
138
  except Exception as e:
139
  metadata[f"error_tag_{tag_id}"] = str(e)
140
+
141
+ # Add file details
142
  metadata["file_size"] = os.path.getsize(image_path)
143
  metadata["file_extension"] = Path(image_path).suffix.lower()
144
+ metadata["extraction_timestamp"] = int(time.time())
145
+
146
+ # Test serialization
147
  json.dumps(metadata)
148
  return metadata
149
  except Exception as e:
 
151
  return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
152
 
153
  def process_images(image_files):
154
+ """Process images and upload metadata to Hugging Face"""
155
+ if not image_files:
156
+ return "🚫 Upload some fucking images first! πŸ“·", None
157
+
158
+ # Reset stats for this batch
159
+ batch_stats = {
160
+ "processed": 0,
161
+ "skipped": 0,
162
+ "errors": 0,
163
+ "with_gps": 0
164
+ }
165
+
166
  metadata_list = []
167
+ filenames = []
168
+
169
+ # Process each image
170
  for image_file in image_files:
171
+ if not image_file or not os.path.exists(image_file.name):
172
+ continue
173
+
174
+ file_ext = Path(image_file.name).suffix.lower()
175
+ if file_ext not in SUPPORTED_EXTENSIONS:
176
+ logger.info(f"Skipping unsupported file: {image_file.name}")
177
+ batch_stats["skipped"] += 1
178
+ continue
179
+
180
+ logger.info(f"Processing: {image_file.name}")
181
+ try:
182
  metadata = get_image_metadata(image_file.name)
183
  if metadata:
184
+ if "gps_info" in metadata:
185
+ batch_stats["with_gps"] += 1
186
+ STATS["files_with_gps"] += 1
187
  metadata_list.append(metadata)
188
+ filenames.append(Path(image_file.name).name)
189
+ batch_stats["processed"] += 1
190
+ else:
191
+ batch_stats["errors"] += 1
192
+ except Exception as e:
193
+ logger.error(f"Failed on {image_file.name}: {e}")
194
+ batch_stats["errors"] += 1
195
+
196
+ # Exit if nothing processed
197
+ if not metadata_list:
198
+ return f"❌ No valid images. Skipped: {batch_stats['skipped']}, Errors: {batch_stats['errors']}", None
199
+
200
+ # Generate unique filename
201
+ timestamp = int(time.time())
202
+ STATS["total_files"] += batch_stats["processed"]
203
+ output_file = f"metadata_{timestamp}.jsonl"
204
+
205
+ # Save locally
206
  with open(output_file, 'w', encoding='utf-8') as f:
207
  for entry in metadata_list:
208
  f.write(json.dumps(entry, ensure_ascii=False) + '\n')
209
 
210
+ # Upload to HF
211
+ upload_status = "not uploaded (no token)"
212
  if HF_TOKEN:
213
+ try:
214
+ logger.info(f"Uploading to {HF_USERNAME}/{DATASET_NAME}...")
215
+
216
+ # Create dataset object with both filenames and full metadata
217
+ dataset = Dataset.from_dict({
218
+ "filename": filenames,
219
+ "metadata": metadata_list
220
+ })
221
+
222
+ # Push to hub
223
+ dataset.push_to_hub(
224
+ f"{HF_USERNAME}/{DATASET_NAME}",
225
+ token=HF_TOKEN,
226
+ commit_message=f"Added metadata for {len(metadata_list)} images"
227
+ )
228
+
229
+ # Upload raw JSONL file
230
+ api = HfApi()
231
+ api.upload_file(
232
+ path_or_fileobj=output_file,
233
+ path_in_repo=f"batches/metadata_{timestamp}.jsonl",
234
+ repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
235
+ repo_type="dataset",
236
+ token=HF_TOKEN,
237
+ commit_message=f"Raw metadata batch {timestamp}"
238
+ )
239
+
240
+ STATS["uploads"] += 1
241
+ upload_status = "βœ… success"
242
+ except Exception as e:
243
+ logger.error(f"HF upload failed: {e}")
244
+ upload_status = f"❌ failed: {str(e)[:100]}..."
245
+
246
+ # Return stats with all info
247
+ result = (
248
+ f"πŸ”₯ BATCH STATS πŸ”₯\n"
249
+ f"βœ“ Processed: {batch_stats['processed']} images\n"
250
+ f"🌍 With GPS: {batch_stats['with_gps']}\n"
251
+ f"🚫 Skipped: {batch_stats['skipped']}\n"
252
+ f"⚠️ Errors: {batch_stats['errors']}\n"
253
+ f"☁️ Upload: {upload_status}\n\n"
254
+ f"πŸ“Š TOTAL STATS πŸ“Š\n"
255
+ f"Total files: {STATS['total_files']}\n"
256
+ f"Files with GPS: {STATS['files_with_gps']}\n"
257
+ f"Upload batches: {STATS['uploads']}"
258
+ )
259
+
260
+ return result, output_file
261
 
262
+ # Create the UI that actually fucking works
263
  demo = gr.Interface(
264
  fn=process_images,
265
+ inputs=gr.Files(label="DROP IMAGES HERE πŸ“Έ", file_types=["image"], file_count="multiple"),
266
+ outputs=[
267
+ gr.Textbox(label="Status Report", lines=10),
268
+ gr.File(label="Download Metadata JSONL")
269
+ ],
270
+ title="🌍 Geo-Metadata Extractor πŸ”₯",
271
+ description=(
272
+ f"Upload images to extract all metadata including GPS coordinates. "
273
+ f"Supported formats: {', '.join(sorted(ext[1:] for ext in SUPPORTED_EXTENSIONS))}. "
274
+ f"Data automatically uploads to {HF_USERNAME}/{DATASET_NAME} on Hugging Face."
275
+ ),
276
+ allow_flagging="never",
277
+ theme="huggingface"
278
  )
279
 
280
+ # Only launch when run directly
281
  if __name__ == "__main__":
282
  demo.launch(server_name="0.0.0.0", server_port=7860)