Spaces:
Sleeping
Sleeping
File size: 10,201 Bytes
975f9c6 5234a64 0bb13f0 5234a64 0f29b7c 975f9c6 2b694be 5234a64 7c31f9a 0f29b7c 12c2109 0f29b7c 7c31f9a 2b694be 12c2109 0f29b7c 2b694be 7c31f9a 12c2109 7c31f9a 0f29b7c 2b694be fcdea18 12c2109 7c31f9a 12c2109 7c31f9a 2b694be 0f29b7c 12c2109 7c31f9a 12c2109 7c31f9a 12c2109 0f29b7c 2b694be 0f29b7c 2b694be 0f29b7c 2b694be 0f29b7c fcdea18 12c2109 0f29b7c fcdea18 12c2109 5234a64 12c2109 7c31f9a 5234a64 7c31f9a 2b694be 5234a64 7c31f9a 5234a64 7c31f9a 5234a64 2b694be fcdea18 975f9c6 5234a64 0f29b7c 12c2109 975f9c6 2b694be 12c2109 2b694be 975f9c6 8ccdb60 2b694be 7c31f9a 12c2109 2b694be 12c2109 7c31f9a 0f29b7c 2b694be 975f9c6 8ccdb60 5234a64 385a153 975f9c6 2154cf1 975f9c6 5234a64 975f9c6 2b694be 975f9c6 385a153 975f9c6 5234a64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import easyocr
import numpy as np
import cv2
import re
import logging
# Set up logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize EasyOCR
easyocr_reader = easyocr.Reader(['en'], gpu=False)
def estimate_brightness(img):
"""Estimate image brightness to detect illuminated displays"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return np.mean(gray)
def detect_roi(img):
"""Detect and crop the region of interest (likely the digital display)"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Stricter threshold for bright areas
brightness = estimate_brightness(img)
thresh_value = 230 if brightness > 100 else 190
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
# Morphological operations to connect digits
kernel = np.ones((9, 9), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=3)
# Find contours
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# Filter contours by size and aspect ratio
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
if valid_contours:
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
x, y = max(0, x-40), max(0, y-40)
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
return img[y:y+h, x:x+w]
return img
except Exception as e:
logging.error(f"ROI detection failed: {str(e)}")
return img
def correct_seven_segment(text, bbox, img):
"""Correct common seven-segment misreads based on bounding box and pixel distribution"""
if "2" in text or "6" in text:
# Extract bounding box coordinates
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
x_min, x_max = min(x1, x4), max(x2, x3)
y_min, y_max = min(y1, y2), max(y3, y4)
# Ensure bounds are within image
x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
if x_max <= x_min or y_max <= y_min:
return text
# Crop the digit area
digit_area = img[y_min:y_max, x_min:x_max]
if digit_area.size == 0:
return text
# Convert to grayscale and threshold
gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Calculate pixel distribution in upper vs lower half
h, w = thresh.shape
upper_half = thresh[:h//2, :]
lower_half = thresh[h//2:, :]
upper_pixels = np.sum(upper_half == 255)
lower_pixels = np.sum(lower_half == 255)
# "6" has more pixels in the lower half due to the loop, "2" is more balanced
if lower_pixels > upper_pixels * 1.5:
text = text.replace("2", "6")
else:
text = text.replace("6", "2")
return text
def enhance_image(img, mode="standard"):
"""Enhance image with different modes for multi-scale processing"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
if mode == "seven_segment":
# Extremely minimal preprocessing for seven-segment displays
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresh
elif mode == "minimal":
# No blurring, just threshold
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresh
elif mode == "raw":
# No preprocessing, just convert to grayscale
return gray
elif mode == "high_contrast":
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
elif mode == "low_noise":
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
else:
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
if mode not in ["seven_segment", "minimal", "raw"]:
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# Morphological operations only for non-seven-segment modes
if mode not in ["seven_segment", "minimal", "raw"]:
kernel = np.ones((3, 3), np.uint8)
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
else:
morphed = thresh
# Skip sharpening for seven-segment, minimal, and raw modes
if mode not in ["seven_segment", "minimal", "raw"]:
brightness = estimate_brightness(img)
sharpen_strength = 3 if brightness > 100 else 5
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
# Dynamic resizing
h, w = morphed.shape
target_size = 800
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
if scale_factor != 1.0:
morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
return morphed
except Exception as e:
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
return img
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# Estimate brightness for adaptive thresholding
brightness = estimate_brightness(img)
conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays
# Detect ROI
roi_img = detect_roi(img)
# Process multiple image versions
images_to_process = [
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
]
best_weight = None
best_conf = 0.0
best_score = 0.0
for mode, proc_img, ocr_params in images_to_process:
# EasyOCR detection
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
for (bbox, text, conf) in results:
# Apply seven-segment correction
text = correct_seven_segment(text, bbox, roi_img)
original_text = text
text = text.lower().strip()
# Fix common OCR errors
text = text.replace(",", ".").replace(";", ".")
text = text.replace("o", "0").replace("O", "0")
text = text.replace("s", "5").replace("S", "5")
text = text.replace("g", "9").replace("G", "6")
text = text.replace("l", "1").replace("I", "1")
text = text.replace("b", "8").replace("B", "8")
text = text.replace("z", "2").replace("Z", "2")
text = text.replace("q", "9").replace("Q", "9")
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
text = re.sub(r"[^\d\.]", "", text)
# Regex for weight (0.0 to 9999.999)
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
try:
weight = float(text)
# Score based on realistic weight range (0.1–500 kg)
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
# Prefer two-digit weights for scales
digit_score = 1.5 if 10 <= weight < 100 else 1.0
score = conf * range_score * digit_score
if score > best_score and conf > conf_threshold:
best_weight = text
best_conf = conf
best_score = score
except ValueError:
continue
if not best_weight:
logging.info("No valid weight detected")
return "Not detected", 0.0
# Format output
if "." in best_weight:
int_part, dec_part = best_weight.split(".")
int_part = int_part.lstrip("0") or "0"
best_weight = f"{int_part}.{dec_part.rstrip('0')}"
else:
best_weight = best_weight.lstrip('0') or "0"
return best_weight, round(best_conf * 100, 2)
except Exception as e:
logging.error(f"Weight extraction failed: {str(e)}")
return "Not detected", 0.0 |