Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

License_Classification / app.py

syedfaisalabrar

Update app.py

e8b709f verified 4 months ago

raw

history blame

5.3 kB

	import gradio as gr
	import torch
	import cv2
	import os
	import numpy as np
	from PIL import Image, ImageEnhance
	from ultralytics import YOLO
	from decord import VideoReader, cpu
	from torchvision.transforms.functional import InterpolationMode
	from transformers import AutoModel, AutoTokenizer
	from backPrompt import main as main_b
	from frontPrompt import main as main_f
	import sentencepiece as spm

	model_path = "best.pt"
	modelY = YOLO(model_path)
	os.environ["TRANSFORMERS_CACHE"] = "./.cache"
	cache_folder = "./.cache"
	path = "OpenGVLab/InternVL2_5-2B"
	# Load the Hugging Face model and tokenizer globally (downloaded only once)
	model = AutoModel.from_pretrained(
	path,
	cache_dir=cache_folder,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	# load_in_8bit=True,
	low_cpu_mem_usage=True,
	use_flash_attn=True,
	trust_remote_code=True
	).eval().cpu()

	tokenizer = AutoTokenizer.from_pretrained(
	path,
	cache_dir=cache_folder,
	trust_remote_code=True,
	use_fast=False
	)


	def preprocessing(image):
	"""Apply three enhancement filters without resizing or cropping."""

	# Ensure the image is a PIL Image
	if not isinstance(image, Image.Image):
	image = Image.fromarray(np.array(image))

	# Apply enhancements
	image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
	image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
	image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness

	# Convert to tensor without resizing
	# image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0 # Shape: [C, H, W]

	return image





	def imageRotation(image):

	return image


	def detect_document(image):
	"""Detects front and back of the document using YOLO."""
	image = ensure_numpy(image) # Ensure valid format
	results = modelY(image, conf=0.85)

	detected_classes = set()
	labels = []
	bounding_boxes = []

	for result in results:
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	conf = box.conf[0]
	cls = int(box.cls[0])
	class_name = modelY.names[cls]

	detected_classes.add(class_name)
	label = f"{class_name} {conf:.2f}"
	labels.append(label)
	bounding_boxes.append((x1, y1, x2, y2, class_name, conf))

	# Draw bounding box
	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	possible_classes = {"front", "back"}
	missing_classes = possible_classes - detected_classes
	if missing_classes:
	labels.append(f"Missing: {', '.join(missing_classes)}")

	return Image.fromarray(image.astype(np.uint8)), labels, bounding_boxes


	def crop_image(image, bounding_boxes):
	"""Crops detected bounding boxes from the image safely."""
	image = ensure_numpy(image) # Ensure image is NumPy format
	cropped_images = {}

	for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
	# Ensure the bounding box is within image bounds
	x1, y1, x2, y2 = max(0, x1), max(0, y1), min(image.shape[1], x2), min(image.shape[0], y2)
	cropped = image[y1:y2, x1:x2]

	if cropped.size > 0: # Check if valid
	cropped_images[class_name] = Image.fromarray(cropped)

	return cropped_images


	def vision_ai_api(image, doc_type):

	if doc_type == "front":
	results = main_f(image,model,tokenizer)
	if doc_type == "back":
	results = main_b(image,model,tokenizer)

	return results

	def ensure_numpy(image):
	"""Ensure image is a valid NumPy array."""
	if isinstance(image, torch.Tensor):
	# Convert PyTorch tensor to NumPy array
	image = image.permute(1, 2, 0).cpu().numpy()
	elif isinstance(image, Image.Image):
	# Convert PIL image to NumPy array
	image = np.array(image)

	if len(image.shape) == 2:
	# Convert grayscale to 3-channel image
	image = np.stack([image] * 3, axis=-1)

	# return image
	return image.astype(np.uint8)

	def predict(image):
	"""Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
	processed_image = preprocessing(image) # Enhanced PIL image
	rotated_image = ensure_numpy(processed_image) # Convert to NumPy
	detected_image, labels, bounding_boxes = detect_document(rotated_image)

	if not bounding_boxes:
	return detected_image, labels, {"error": "No document detected!"}

	cropped_images = crop_image(rotated_image, bounding_boxes)

	# Call Vision AI separately for front and back if detected
	front_result = back_result = None
	if "front" in cropped_images:
	front_result = vision_ai_api(cropped_images["front"], "front")
	if "back" in cropped_images:
	back_result = vision_ai_api(cropped_images["back"], "back")

	api_results = {
	"front": front_result,
	"back": back_result
	}

	return detected_image, labels, api_results



	iface = gr.Interface(
	fn=predict,
	inputs="image",
	outputs=["image", "text", "json"],
	title="License Field Detection (Front & Back Card)"
	)

	iface.launch()