latterworks commited on
Commit
34561c5
·
verified ·
1 Parent(s): 603dd41

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from PIL import Image, ExifTags
4
+ import json
5
+ import os
6
+ import logging
7
+ from datasets import Dataset
8
+ from huggingface_hub import HfApi
9
+
10
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
11
+ logger = logging.getLogger(__name__)
12
+
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ HF_USERNAME = "latterworks"
15
+ DATASET_NAME = "geo-metadata"
16
+ SUPPORTED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.tiff', '.bmp', '.webp'}
17
+
18
+ def convert_to_degrees(value):
19
+ try:
20
+ if not isinstance(value, (tuple, list)) or len(value) != 3:
21
+ raise ValueError("GPS value must be a tuple of 3 elements")
22
+ d, m, s = value
23
+ degrees = float(d) + (float(m) / 60.0) + (float(s) / 3600.0)
24
+ if not -180 <= degrees <= 180:
25
+ raise ValueError("GPS degrees out of valid range")
26
+ return degrees
27
+ except Exception as e:
28
+ logger.error(f"Failed to convert GPS coordinates: {e}")
29
+ return None
30
+
31
+ def extract_gps_info(gps_info):
32
+ if not isinstance(gps_info, dict):
33
+ logger.warning("GPSInfo is not a dictionary, skipping")
34
+ return None
35
+ gps_data = {}
36
+ try:
37
+ for key, val in gps_info.items():
38
+ tag_name = ExifTags.GPSTAGS.get(key, f"unknown_gps_tag_{key}")
39
+ gps_data[tag_name] = val
40
+ if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
41
+ lat = convert_to_degrees(gps_data['GPSLatitude'])
42
+ lon = convert_to_degrees(gps_data['GPSLongitude'])
43
+ if lat is None or lon is None:
44
+ logger.error("Failed to convert latitude or longitude")
45
+ return None
46
+ lat_ref = gps_data.get('GPSLatitudeRef', 'N')
47
+ lon_ref = gps_data.get('GPSLongitudeRef', 'E')
48
+ if lat_ref not in {'N', 'S'} or lon_ref not in {'E', 'W'}:
49
+ logger.warning(f"Invalid GPS reference: {lat_ref}, {lon_ref}")
50
+ else:
51
+ if lat_ref == 'S':
52
+ lat = -lat
53
+ if lon_ref == 'W':
54
+ lon = -lon
55
+ gps_data['Latitude'] = lat
56
+ gps_data['Longitude'] = lon
57
+ return gps_data
58
+ except Exception as e:
59
+ logger.error(f"Error extracting GPS info: {e}")
60
+ return None
61
+
62
+ def make_serializable(value):
63
+ try:
64
+ if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
65
+ return float(value.numerator) / float(value.denominator)
66
+ elif isinstance(value, (tuple, list)):
67
+ return [make_serializable(item) for item in value]
68
+ elif isinstance(value, dict):
69
+ return {str(k): make_serializable(v) for k, v in value.items()}
70
+ elif isinstance(value, bytes):
71
+ return value.decode('utf-8', errors='replace')
72
+ json.dumps(value)
73
+ return value
74
+ except Exception as e:
75
+ logger.warning(f"Converting to string due to serialization failure: {e}")
76
+ return str(value)
77
+
78
+ def get_image_metadata(image_path):
79
+ metadata = {"file_name": str(Path(image_path).absolute())}
80
+ try:
81
+ with Image.open(image_path) as image:
82
+ metadata.update({
83
+ "format": image.format or "unknown",
84
+ "size": list(image.size),
85
+ "mode": image.mode or "unknown"
86
+ })
87
+ exif_data = None
88
+ try:
89
+ exif_data = image._getexif()
90
+ except AttributeError:
91
+ metadata["exif_error"] = "No EXIF data available"
92
+ except Exception as e:
93
+ metadata["exif_error"] = f"EXIF extraction failed: {str(e)}"
94
+ if exif_data and isinstance(exif_data, dict):
95
+ for tag_id, value in exif_data.items():
96
+ try:
97
+ tag_name = ExifTags.TAGS.get(tag_id, f"tag_{tag_id}").lower()
98
+ if tag_name == "gpsinfo":
99
+ gps_info = extract_gps_info(value)
100
+ if gps_info:
101
+ metadata["gps_info"] = make_serializable(gps_info)
102
+ else:
103
+ metadata[tag_name] = make_serializable(value)
104
+ except Exception as e:
105
+ metadata[f"error_tag_{tag_id}"] = str(e)
106
+ metadata["file_size"] = os.path.getsize(image_path)
107
+ metadata["file_extension"] = Path(image_path).suffix.lower()
108
+ json.dumps(metadata)
109
+ return metadata
110
+ except Exception as e:
111
+ logger.error(f"Error processing {image_path}: {e}")
112
+ return {"file_name": str(Path(image_path).absolute()), "error": str(e)}
113
+
114
+ def process_images(image_files):
115
+ metadata_list = []
116
+ for image_file in image_files:
117
+ if image_file and Path(image_file.name).suffix.lower() in SUPPORTED_EXTENSIONS:
118
+ logger.info(f"Processing: {image_file.name}")
119
+ metadata = get_image_metadata(image_file.name)
120
+ if metadata:
121
+ metadata_list.append(metadata)
122
+ output_file = "metadata.jsonl"
123
+ with open(output_file, 'w', encoding='utf-8') as f:
124
+ for entry in metadata_list:
125
+ f.write(json.dumps(entry, ensure_ascii=False) + '\n')
126
+
127
+ # Upload to Hugging Face
128
+ if HF_TOKEN:
129
+ dataset = Dataset.from_dict({
130
+ "images": [entry.get("file_name") for entry in metadata_list],
131
+ "metadata": metadata_list
132
+ })
133
+ dataset.push_to_hub(f"{HF_USERNAME}/{DATASET_NAME}", token=HF_TOKEN)
134
+ api = HfApi()
135
+ api.upload_file(
136
+ path_or_fileobj=output_file,
137
+ path_in_repo="metadata.jsonl",
138
+ repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
139
+ repo_type="dataset",
140
+ token=HF_TOKEN
141
+ )
142
+ return f"Processed {len(metadata_list)} images. Metadata saved to {output_file} and uploaded to {HF_USERNAME}/{DATASET_NAME}", output_file
143
+ return f"Processed {len(metadata_list)} images. Metadata saved to {output_file}", output_file
144
+
145
+ demo = gr.Interface(
146
+ fn=process_images,
147
+ inputs=gr.Files(label="Upload Images", file_types=["image"]),
148
+ outputs=[gr.Textbox(label="Status"), gr.File(label="Download Metadata")],
149
+ title="Geo-Metadata Extractor",
150
+ description="Upload images to extract metadata (including GPS) and upload to Hugging Face Hub."
151
+ )
152
+
153
+ if __name__ == "__main__":
154
+ demo.launch(server_name="0.0.0.0", server_port=7860)