Spaces:
Runtime error
Runtime error
Upload app.py with huggingface_hub
Browse files
app.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
from PIL import Image, ExifTags
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
from datasets import Dataset
|
8 |
+
from huggingface_hub import HfApi
|
9 |
+
|
10 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
14 |
+
HF_USERNAME = "latterworks"
|
15 |
+
DATASET_NAME = "geo-metadata"
|
16 |
+
SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
|
17 |
+
|
18 |
+
def convert_to_degrees(value):
|
19 |
+
try:
|
20 |
+
if not isinstance(value, (tuple, list)) or len(value) != 3:
|
21 |
+
raise ValueError("GPS value must be a tuple of 3 elements")
|
22 |
+
d, m, s = value
|
23 |
+
degrees = float(d) + (float(m) / 60.0) + (float(s) / 3600.0)
|
24 |
+
if not -180 <= degrees <= 180:
|
25 |
+
raise ValueError("GPS degrees out of valid range")
|
26 |
+
return degrees
|
27 |
+
except Exception as e:
|
28 |
+
logger.error(f"Failed to convert GPS coordinates: {e}")
|
29 |
+
return None
|
30 |
+
|
31 |
+
def extract_gps_info(gps_info):
|
32 |
+
if not isinstance(gps_info, dict):
|
33 |
+
logger.warning("GPSInfo is not a dictionary, skipping")
|
34 |
+
return None
|
35 |
+
gps_data = {}
|
36 |
+
try:
|
37 |
+
for key, val in gps_info.items():
|
38 |
+
tag_name = ExifTags.GPSTAGS.get(key, f"unknown_gps_tag_{key}")
|
39 |
+
gps_data[tag_name] = val
|
40 |
+
if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
|
41 |
+
lat = convert_to_degrees(gps_data['GPSLatitude'])
|
42 |
+
lon = convert_to_degrees(gps_data['GPSLongitude'])
|
43 |
+
if lat is None or lon is None:
|
44 |
+
logger.error("Failed to convert latitude or longitude")
|
45 |
+
return None
|
46 |
+
lat_ref = gps_data.get('GPSLatitudeRef', 'N')
|
47 |
+
lon_ref = gps_data.get('GPSLongitudeRef', 'E')
|
48 |
+
if lat_ref not in {'N', 'S'} or lon_ref not in {'E', 'W'}:
|
49 |
+
logger.warning(f"Invalid GPS reference: {lat_ref}, {lon_ref}")
|
50 |
+
else:
|
51 |
+
if lat_ref == 'S':
|
52 |
+
lat = -lat
|
53 |
+
if lon_ref == 'W':
|
54 |
+
lon = -lon
|
55 |
+
gps_data['Latitude'] = lat
|
56 |
+
gps_data['Longitude'] = lon
|
57 |
+
return gps_data
|
58 |
+
except Exception as e:
|
59 |
+
logger.error(f"Error extracting GPS info: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
def make_serializable(value):
|
63 |
+
try:
|
64 |
+
if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
|
65 |
+
return float(value.numerator) / float(value.denominator)
|
66 |
+
elif isinstance(value, (tuple, list)):
|
67 |
+
return [make_serializable(item) for item in value]
|
68 |
+
elif isinstance(value, dict):
|
69 |
+
return {str(k): make_serializable(v) for k, v in value.items()}
|
70 |
+
elif isinstance(value, bytes):
|
71 |
+
return value.decode('utf-8', errors='replace')
|
72 |
+
json.dumps(value)
|
73 |
+
return value
|
74 |
+
except Exception as e:
|
75 |
+
logger.warning(f"Converting to string due to serialization failure: {e}")
|
76 |
+
return str(value)
|
77 |
+
|
78 |
+
def get_image_metadata(image_path):
|
79 |
+
metadata = {"file_name": str(Path(image_path).absolute())}
|
80 |
+
try:
|
81 |
+
with Image.open(image_path) as image:
|
82 |
+
metadata.update({
|
83 |
+
"format": image.format or "unknown",
|
84 |
+
"size": list(image.size),
|
85 |
+
"mode": image.mode or "unknown"
|
86 |
+
})
|
87 |
+
exif_data = None
|
88 |
+
try:
|
89 |
+
exif_data = image._getexif()
|
90 |
+
except AttributeError:
|
91 |
+
metadata["exif_error"] = "No EXIF data available"
|
92 |
+
except Exception as e:
|
93 |
+
metadata["exif_error"] = f"EXIF extraction failed: {str(e)}"
|
94 |
+
if exif_data and isinstance(exif_data, dict):
|
95 |
+
for tag_id, value in exif_data.items():
|
96 |
+
try:
|
97 |
+
tag_name = ExifTags.TAGS.get(tag_id, f"tag_{tag_id}").lower()
|
98 |
+
if tag_name == "gpsinfo":
|
99 |
+
gps_info = extract_gps_info(value)
|
100 |
+
if gps_info:
|
101 |
+
metadata["gps_info"] = make_serializable(gps_info)
|
102 |
+
else:
|
103 |
+
metadata[tag_name] = make_serializable(value)
|
104 |
+
except Exception as e:
|
105 |
+
metadata[f"error_tag_{tag_id}"] = str(e)
|
106 |
+
metadata["file_size"] = os.path.getsize(image_path)
|
107 |
+
metadata["file_extension"] = Path(image_path).suffix.lower()
|
108 |
+
json.dumps(metadata)
|
109 |
+
return metadata
|
110 |
+
except Exception as e:
|
111 |
+
logger.error(f"Error processing {image_path}: {e}")
|
112 |
+
return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
|
113 |
+
|
114 |
+
def process_images(image_files):
|
115 |
+
metadata_list = []
|
116 |
+
for image_file in image_files:
|
117 |
+
if image_file and Path(image_file.name).suffix.lower() in SUPPORTED_EXTENSIONS:
|
118 |
+
logger.info(f"Processing: {image_file.name}")
|
119 |
+
metadata = get_image_metadata(image_file.name)
|
120 |
+
if metadata:
|
121 |
+
metadata_list.append(metadata)
|
122 |
+
output_file = "metadata.jsonl"
|
123 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
124 |
+
for entry in metadata_list:
|
125 |
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
126 |
+
|
127 |
+
# Upload to Hugging Face
|
128 |
+
if HF_TOKEN:
|
129 |
+
dataset = Dataset.from_dict({
|
130 |
+
"images": [entry.get("file_name") for entry in metadata_list],
|
131 |
+
"metadata": metadata_list
|
132 |
+
})
|
133 |
+
dataset.push_to_hub(f"{HF_USERNAME}/{DATASET_NAME}", token=HF_TOKEN)
|
134 |
+
api = HfApi()
|
135 |
+
api.upload_file(
|
136 |
+
path_or_fileobj=output_file,
|
137 |
+
path_in_repo="metadata.jsonl",
|
138 |
+
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
139 |
+
repo_type="dataset",
|
140 |
+
token=HF_TOKEN
|
141 |
+
)
|
142 |
+
return f"Processed {len(metadata_list)} images. Metadata saved to {output_file} and uploaded to {HF_USERNAME}/{DATASET_NAME}", output_file
|
143 |
+
return f"Processed {len(metadata_list)} images. Metadata saved to {output_file}", output_file
|
144 |
+
|
145 |
+
demo = gr.Interface(
|
146 |
+
fn=process_images,
|
147 |
+
inputs=gr.Files(label="Upload Images", file_types=["image"]),
|
148 |
+
outputs=[gr.Textbox(label="Status"), gr.File(label="Download Metadata")],
|
149 |
+
title="Geo-Metadata Extractor",
|
150 |
+
description="Upload images to extract metadata (including GPS) and upload to Hugging Face Hub."
|
151 |
+
)
|
152 |
+
|
153 |
+
if __name__ == "__main__":
|
154 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|