File size: 2,170 Bytes
a481416
8b5815c
eff70bd
 
a481416
 
8b5815c
a481416
8b5815c
eff70bd
 
a481416
8b5815c
 
e8bd3b9
 
 
 
 
 
 
 
 
 
 
8b5815c
 
 
eff70bd
 
8b5815c
e8bd3b9
 
8b5815c
eff70bd
 
6a56695
e8bd3b9
8b5815c
eff70bd
a481416
e8bd3b9
eff70bd
 
8c50e18
e8bd3b9
eff70bd
 
a481416
eff70bd
 
 
 
 
 
e8bd3b9
eff70bd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from PIL import Image, ImageEnhance, ImageOps
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from datetime import datetime
import pytz
import re

# Load model and processor
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

# Enhance image before OCR
def enhance_image(image):
    # Convert to grayscale
    image = image.convert("L")
    # Invert (light text on dark bg works better)
    image = ImageOps.invert(image)
    # Increase contrast and sharpness
    image = ImageEnhance.Contrast(image).enhance(2.0)
    image = ImageEnhance.Sharpness(image).enhance(2.0)
    # Resize (bigger = easier for OCR)
    image = image.resize((image.width * 2, image.height * 2))
    # Convert back to RGB for model compatibility
    image = image.convert("RGB")
    return image

# Extract weight
def detect_weight(image):
    try:
        processed_image = enhance_image(image)

        # Send to Hugging Face OCR model
        pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        # Extract number using regex
        match = re.search(r"(\d{1,4}(?:\.\d{1,2})?)", generated_text)
        weight = match.group(1) if match else "Not detected"

        # Get current IST time
        ist = pytz.timezone('Asia/Kolkata')
        current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S")

        return f"Weight: {weight} kg\nCaptured At: {current_time} (IST)", image
    except Exception as e:
        return f"Error: {str(e)}", image

# Gradio UI
interface = gr.Interface(
    fn=detect_weight,
    inputs=gr.Image(type="pil", label="Upload or Capture Image"),
    outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")],
    title="⚖️ Auto Weight Detector (No Tesseract)",
    description="Detects weight from digital scale image using Hugging Face TrOCR. Shows weight and capture time (IST)."
)

interface.launch()