kevinconka commited on
Commit
7fb47c4
·
1 Parent(s): b3f650d

Refactor decode_blob_data usage in flag_img_input to streamline blob handling and improve code clarity in app.py

Browse files
Files changed (2) hide show
  1. app.py +5 -105
  2. blob_utils.py +166 -0
app.py CHANGED
@@ -7,8 +7,6 @@ Any new model should implement the following functions:
7
 
8
  import os
9
  import glob
10
- import hashlib
11
- import struct
12
 
13
  # import spaces
14
  import gradio as gr
@@ -20,6 +18,7 @@ from utils import (
20
  FlaggedCounter,
21
  )
22
  from flagging import HuggingFaceDatasetSaver
 
23
 
24
  import install_private_repos # noqa: F401
25
  from seavision import load_model
@@ -61,113 +60,14 @@ def inference(image):
61
  return results.draw(image)
62
 
63
 
64
- def decode_blob_data(image_data):
65
- """
66
- Decode blob data from Gradio image component.
67
- Handles blob format and converts to proper image file format.
68
- """
69
- if not isinstance(image_data, dict):
70
- return image_data
71
-
72
- print(f"DEBUG: Original input - image: {image_data}")
73
-
74
- # Check if this is blob data - more comprehensive check
75
- is_blob = (
76
- 'path' in image_data and
77
- 'blob' in image_data['path'] and
78
- image_data.get('size') is None and
79
- image_data.get('orig_name') is None and
80
- image_data.get('mime_type') is None
81
- )
82
-
83
- if is_blob:
84
- print(f"DEBUG: Converting blob data: {image_data}")
85
- print("DEBUG: Detected blob format, converting...")
86
-
87
- blob_path = image_data['path']
88
- print(f"DEBUG: Blob path: {blob_path}")
89
-
90
- # Read the blob file
91
- with open(blob_path, 'rb') as f:
92
- blob_content = f.read()
93
-
94
- file_size = len(blob_content)
95
- print(f"DEBUG: File size: {file_size}")
96
-
97
- # Check file header to determine format
98
- if len(blob_content) >= 8:
99
- header = blob_content[:8].hex()
100
- print(f"DEBUG: File header: {header}")
101
-
102
- # PNG header: 89 50 4E 47 0D 0A 1A 0A
103
- if header.startswith('89504e470d0a1a0a'):
104
- extension = '.png'
105
- mime_type = 'image/png'
106
- # JPEG header: FF D8 FF
107
- elif header.startswith('ffd8ff'):
108
- extension = '.jpg'
109
- mime_type = 'image/jpeg'
110
- # GIF header: 47 49 46 38
111
- elif header.startswith('47494638'):
112
- extension = '.gif'
113
- mime_type = 'image/gif'
114
- else:
115
- # Default to PNG if we can't determine
116
- extension = '.png'
117
- mime_type = 'image/png'
118
- else:
119
- extension = '.png'
120
- mime_type = 'image/png'
121
-
122
- print(f"DEBUG: Detected extension: {extension}, MIME type: {mime_type}")
123
-
124
- # Generate a unique filename
125
- content_hash = hashlib.md5(blob_content).hexdigest()[:8]
126
- new_filename = f"flagged_image_{content_hash}{extension}"
127
- print(f"DEBUG: Generated filename: {new_filename}")
128
-
129
- # Create new path in the same directory
130
- import tempfile
131
- temp_dir = os.path.dirname(blob_path)
132
- new_path = os.path.join(temp_dir, new_filename)
133
- print(f"DEBUG: New path: {new_path}")
134
-
135
- # Write the content to the new file
136
- with open(new_path, 'wb') as f:
137
- f.write(blob_content)
138
-
139
- print(f"DEBUG: Successfully renamed blob to: {new_path}")
140
-
141
- # Update the image data
142
- converted_data = {
143
- 'path': new_path,
144
- 'url': image_data['url'].replace('blob', new_filename),
145
- 'size': file_size,
146
- 'orig_name': new_filename,
147
- 'mime_type': mime_type,
148
- 'is_stream': False,
149
- 'meta': image_data.get('meta', {})
150
- }
151
-
152
- print(f"DEBUG: Converted data: {converted_data}")
153
- return converted_data
154
- else:
155
- print("DEBUG: Not a blob, skipping conversion")
156
-
157
- print(f"DEBUG: Converted image: {image_data}")
158
- return image_data
159
-
160
-
161
  def flag_img_input(
162
  image: gr.Image, flag_option: str = "misdetection", username: str = "anonymous"
163
  ):
164
- """Wrapper for flagging"""
165
- print(f"{image=}, {flag_option=}, {username=}")
166
-
167
  # Decode blob data if necessary
168
- decoded_image = decode_blob_data(image)
169
-
170
- hf_writer.flag([decoded_image], flag_option=flag_option, username=username)
171
 
172
 
173
  # Flagging
 
7
 
8
  import os
9
  import glob
 
 
10
 
11
  # import spaces
12
  import gradio as gr
 
18
  FlaggedCounter,
19
  )
20
  from flagging import HuggingFaceDatasetSaver
21
+ from blob_utils import decode_blob_data, is_blob_data
22
 
23
  import install_private_repos # noqa: F401
24
  from seavision import load_model
 
60
  return results.draw(image)
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def flag_img_input(
64
  image: gr.Image, flag_option: str = "misdetection", username: str = "anonymous"
65
  ):
66
+ """Wrapper for flagging"""
 
 
67
  # Decode blob data if necessary
68
+ if is_blob_data(image):
69
+ image = decode_blob_data(image)
70
+ hf_writer.flag([image], flag_option=flag_option, username=username)
71
 
72
 
73
  # Flagging
blob_utils.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Blob conversion utilities for Gradio image components.
3
+ Handles conversion of blob data to proper image file formats.
4
+ """
5
+
6
+ import hashlib
7
+ import os
8
+ from typing import Dict, Any
9
+
10
+
11
+ class BlobConverter:
12
+ """Handles conversion of blob data to proper image file formats."""
13
+
14
+ # File format signatures
15
+ FORMAT_SIGNATURES = {
16
+ b"\x89PNG\r\n\x1a\n": (".png", "image/png"),
17
+ b"\xff\xd8\xff": (".jpg", "image/jpeg"),
18
+ b"GIF87a": (".gif", "image/gif"),
19
+ b"GIF89a": (".gif", "image/gif"),
20
+ }
21
+
22
+ @classmethod
23
+ def is_blob_data(cls, image_data: Dict[str, Any]) -> bool:
24
+ """
25
+ Check if the image data represents a blob that needs conversion.
26
+
27
+ Args:
28
+ image_data: Dictionary containing image metadata
29
+
30
+ Returns:
31
+ True if the data is a blob that needs conversion
32
+ """
33
+ return (
34
+ isinstance(image_data, dict)
35
+ and "path" in image_data
36
+ and "blob" in image_data["path"]
37
+ and image_data.get("size") is None
38
+ and image_data.get("orig_name") is None
39
+ and image_data.get("mime_type") is None
40
+ )
41
+
42
+ @classmethod
43
+ def detect_format(cls, content: bytes) -> tuple[str, str]:
44
+ """
45
+ Detect image format from file content.
46
+
47
+ Args:
48
+ content: Binary content of the file
49
+
50
+ Returns:
51
+ Tuple of (extension, mime_type)
52
+ """
53
+ for signature, (ext, mime_type) in cls.FORMAT_SIGNATURES.items():
54
+ if content.startswith(signature):
55
+ return ext, mime_type
56
+
57
+ # Default to PNG if format cannot be determined
58
+ return ".png", "image/png"
59
+
60
+ @classmethod
61
+ def generate_filename(cls, content: bytes, extension: str) -> str:
62
+ """
63
+ Generate a unique filename for the converted blob.
64
+
65
+ Args:
66
+ content: Binary content of the file
67
+ extension: File extension to use
68
+
69
+ Returns:
70
+ Unique filename
71
+ """
72
+ content_hash = hashlib.md5(content).hexdigest()[:8]
73
+ return f"flagged_image_{content_hash}{extension}"
74
+
75
+ @classmethod
76
+ def convert_blob(cls, image_data: Dict[str, Any]) -> Dict[str, Any]:
77
+ """
78
+ Convert blob data to proper image file format.
79
+
80
+ Args:
81
+ image_data: Original image data dictionary
82
+
83
+ Returns:
84
+ Updated image data with proper file information
85
+ """
86
+ if not cls.is_blob_data(image_data):
87
+ return image_data
88
+
89
+ print(f"DEBUG: Converting blob data: {image_data}")
90
+
91
+ blob_path = image_data["path"]
92
+ print(f"DEBUG: Blob path: {blob_path}")
93
+
94
+ # Read blob content
95
+ with open(blob_path, "rb") as f:
96
+ content = f.read()
97
+
98
+ file_size = len(content)
99
+ print(f"DEBUG: File size: {file_size}")
100
+
101
+ # Detect format
102
+ extension, mime_type = cls.detect_format(content)
103
+ print(f"DEBUG: Detected format: {extension}, MIME type: {mime_type}")
104
+
105
+ # Generate filename and path
106
+ filename = cls.generate_filename(content, extension)
107
+ temp_dir = os.path.dirname(blob_path)
108
+ new_path = os.path.join(temp_dir, filename)
109
+
110
+ print(f"DEBUG: Generated filename: {filename}")
111
+ print(f"DEBUG: New path: {new_path}")
112
+
113
+ # Write converted file
114
+ with open(new_path, "wb") as f:
115
+ f.write(content)
116
+
117
+ print(f"DEBUG: Successfully converted blob to: {new_path}")
118
+
119
+ # Return updated image data
120
+ converted_data = {
121
+ "path": new_path,
122
+ "url": image_data["url"].replace("blob", filename),
123
+ "size": file_size,
124
+ "orig_name": filename,
125
+ "mime_type": mime_type,
126
+ "is_stream": False,
127
+ "meta": image_data.get("meta", {}),
128
+ }
129
+
130
+ print(f"DEBUG: Converted data: {converted_data}")
131
+ return converted_data
132
+
133
+
134
+ def decode_blob_data(image_data: Dict[str, Any]) -> Dict[str, Any]:
135
+ """
136
+ Convenience function to decode blob data from Gradio image component.
137
+
138
+ Args:
139
+ image_data: Image data dictionary from Gradio
140
+
141
+ Returns:
142
+ Converted image data or original data if not a blob
143
+ """
144
+ print(f"DEBUG: Original input - image: {image_data}")
145
+
146
+ result = BlobConverter.convert_blob(image_data)
147
+
148
+ if result is image_data:
149
+ print("DEBUG: Not a blob, skipping conversion")
150
+ else:
151
+ print("DEBUG: Blob conversion completed")
152
+
153
+ return result
154
+
155
+
156
+ def is_blob_data(image_data: Dict[str, Any]) -> bool:
157
+ """
158
+ Check if the image data represents a blob that needs conversion.
159
+
160
+ Args:
161
+ image_data: Dictionary containing image metadata
162
+
163
+ Returns:
164
+ True if the data is a blob that needs conversion
165
+ """
166
+ return BlobConverter.is_blob_data(image_data)