File size: 1,854 Bytes
235c3b6 7e5af81 fcaf9a0 235c3b6 fcaf9a0 235c3b6 fcaf9a0 7e5af81 235c3b6 fcaf9a0 235c3b6 fcaf9a0 235c3b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import pytesseract # Install via `pip install pytesseract` and ensure Tesseract OCR is installed on your system
# Load your fine-tuned model and tokenizer
model_name = "quadranttechnologies/Receipt_Image_Analyzer"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Function to preprocess image and extract text using OCR
def ocr_extract_text(image):
# Convert image to grayscale for better OCR accuracy
gray_image = image.convert("L")
# Use Tesseract OCR to extract text
extracted_text = pytesseract.image_to_string(gray_image)
return extracted_text
# Define a function to analyze the receipt image
def analyze_receipt_image(receipt_image):
# Extract text from the image
receipt_text = ocr_extract_text(receipt_image)
if not receipt_text.strip():
return {"error": "No text detected in the image."}
# Use the fine-tuned model to analyze the extracted text
inputs = tokenizer(receipt_text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
logits = outputs.logits
predicted_class = logits.argmax(-1).item()
# Return the extracted text and predicted class as JSON
return {
"extracted_text": receipt_text,
"predicted_class": predicted_class
}
# Create a Gradio interface
interface = gr.Interface(
fn=analyze_receipt_image,
inputs=gr.Image(type="pil"), # Updated to use gr.Image
outputs="json", # Output will be displayed as JSON
title="Receipt Image Analyzer",
description="Upload an image of a receipt. The app extracts text and analyzes it using a fine-tuned LLM model.",
)
# Launch the Gradio app
interface.launch()
|