File size: 2,425 Bytes
a481416
8b5815c
eff70bd
 
a481416
 
8b5815c
a481416
8b5815c
eff70bd
 
a481416
2bcb746
8b5815c
e8bd3b9
 
2bcb746
e8bd3b9
 
2bcb746
 
 
 
 
 
 
 
eff70bd
 
2bcb746
8b5815c
e8bd3b9
2bcb746
8b5815c
2bcb746
 
 
 
 
 
 
 
6a56695
2bcb746
 
eff70bd
a481416
2bcb746
eff70bd
 
8c50e18
e8bd3b9
eff70bd
 
a481416
eff70bd
 
 
 
 
2bcb746
 
eff70bd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from PIL import Image, ImageEnhance, ImageOps
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from datetime import datetime
import pytz
import re

# Load model and processor
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

# Preprocess image to enhance OCR accuracy
def enhance_image(image):
    # Convert to grayscale
    image = image.convert("L")
    # Invert for better contrast
    image = ImageOps.invert(image)
    # Increase contrast and sharpness
    image = ImageEnhance.Contrast(image).enhance(2.5)
    image = ImageEnhance.Sharpness(image).enhance(3.0)
    # Resize (bigger = easier to read digits clearly)
    image = image.resize((image.width * 3, image.height * 3))
    # Convert back to RGB for model
    return image.convert("RGB")

# Extract accurate decimal weight
def detect_weight(image):
    try:
        # Enhance image
        processed_image = enhance_image(image)

        # OCR using Hugging Face
        pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values
        # Use slightly longer decoding to improve accuracy
        generated_ids = model.generate(
            pixel_values,
            max_length=64,
            num_beams=4,         # Beam search to improve precision
            early_stopping=True
        )
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

        # Extract full decimal weight like 52.75 or 18.89
        match = re.search(r"(\d{1,4}(?:\.\d{1,4})?)", generated_text)
        weight = match.group(1) if match else "Not detected"

        # Timestamp in IST
        ist = pytz.timezone('Asia/Kolkata')
        current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S")

        return f"Weight: {weight} kg\nCaptured At: {current_time} (IST)", image
    except Exception as e:
        return f"Error: {str(e)}", image

# Gradio UI
interface = gr.Interface(
    fn=detect_weight,
    inputs=gr.Image(type="pil", label="Upload or Capture Image"),
    outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")],
    title="⚖️ Auto Weight Detector (Decimal Accurate)",
    description="Detects full weight including decimals (e.g., 52.75 kg) from digital scale image using Hugging Face OCR."
)

interface.launch()