File size: 3,332 Bytes
dd2ba72
 
2405743
1f353b4
2405743
1f353b4
4735088
2405743
9efaee0
2405743
307c8f3
1dd705c
307c8f3
 
d85faf4
2405743
 
581d1f5
2405743
307c8f3
 
 
dd2ba72
e51c033
307c8f3
 
 
 
 
 
 
 
2405743
307c8f3
 
 
 
2405743
 
307c8f3
2405743
 
 
 
 
 
dd2ba72
2405743
e51c033
1f353b4
 
2405743
1f353b4
2405743
1f353b4
58a562d
07eed04
58a562d
2405743
07eed04
 
 
 
2405743
58a562d
2405743
 
 
07eed04
 
 
 
 
 
 
 
2405743
1f353b4
2405743
 
07eed04
2405743
1f353b4
 
 
07eed04
dd2ba72
07eed04
2405743
07eed04
 
 
307c8f3
dd2ba72
1f353b4
 
28eb4e5
1f353b4
f84d408
 
1f353b4
 
 
2405743
 
1f353b4
dd2ba72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import cv2
import easyocr
import numpy as np
import os
from PIL import Image
from ultralytics import YOLO
from datetime import datetime

# Load YOLO model
model = YOLO("/home/user/app/best.pt")

# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}

# EasyOCR Bengali
reader = easyocr.Reader(['bn'])

def annotate_frame(frame):
    input_img = cv2.resize(frame, (640, 640))
    results = model(input_img)[0]
    detections = results.boxes.data.cpu().numpy()

    for det in detections:
        if len(det) < 6:
            continue

        x1, y1, x2, y2, conf, cls = det
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        label = label_map.get(int(cls), "Unknown")
        percent = f"{conf * 100:.2f}%"

        # Draw box and label
        cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # OCR
        cropped = frame[y1:y2, x1:x2]
        if cropped.size > 0:
            ocr_result = reader.readtext(cropped)
            for i, item in enumerate(ocr_result):
                text = item[1].strip()
                conf = item[2]
                cv2.putText(input_img, text, (x1, y2 + 20 + i*25),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

    return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)

def process_input(input_file):
    file_path = input_file.name
    ext = os.path.splitext(file_path)[-1].lower()

    if ext in ['.mp4', '.avi', '.mov']:
        cap = cv2.VideoCapture(file_path)
        if not cap.isOpened():
            return None, None, "Could not open video file", ""

        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_skip = 5
        frame_id = 0
        output_path = "annotated_output.mp4"
        out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (640, 640))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_id % frame_skip != 0:
                frame_id += 1
                continue

            frame_id += 1
            frame = cv2.resize(frame, (640, 640))
            annotated = annotate_frame(frame)
            out.write(cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR))

        cap.release()
        out.release()

        return output_path, None, "Bangla text in video (see frames)", "OCR confidence displayed"

    else:
        frame = cv2.imread(file_path)
        if frame is None:
            return None, None, "Invalid image", ""

        frame = cv2.resize(frame, (640, 640))
        annotated = annotate_frame(frame)
        pil_img = Image.fromarray(annotated)
        return None, pil_img, "Bangla text in image", "OCR confidence in image"



interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="filepath", label="Upload Image or Video"),
    outputs=[
        gr.Video(label="Output Video"),
        gr.Image(type="pil", label="Output Image"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv5 License Plate Detector (Bangla OCR)",
    description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR."
)

interface.launch()