File size: 1,854 Bytes
235c3b6
 
7e5af81
fcaf9a0
235c3b6
 
 
 
 
 
fcaf9a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235c3b6
 
 
fcaf9a0
 
 
 
7e5af81
 
235c3b6
 
 
fcaf9a0
 
 
235c3b6
fcaf9a0
235c3b6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import pytesseract  # Install via `pip install pytesseract` and ensure Tesseract OCR is installed on your system

# Load your fine-tuned model and tokenizer
model_name = "quadranttechnologies/Receipt_Image_Analyzer"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to preprocess image and extract text using OCR
def ocr_extract_text(image):
    # Convert image to grayscale for better OCR accuracy
    gray_image = image.convert("L")
    # Use Tesseract OCR to extract text
    extracted_text = pytesseract.image_to_string(gray_image)
    return extracted_text

# Define a function to analyze the receipt image
def analyze_receipt_image(receipt_image):
    # Extract text from the image
    receipt_text = ocr_extract_text(receipt_image)
    if not receipt_text.strip():
        return {"error": "No text detected in the image."}

    # Use the fine-tuned model to analyze the extracted text
    inputs = tokenizer(receipt_text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = logits.argmax(-1).item()

    # Return the extracted text and predicted class as JSON
    return {
        "extracted_text": receipt_text,
        "predicted_class": predicted_class
    }

# Create a Gradio interface
interface = gr.Interface(
    fn=analyze_receipt_image,
    inputs=gr.Image(type="pil"),  # Updated to use gr.Image
    outputs="json",  # Output will be displayed as JSON
    title="Receipt Image Analyzer",
    description="Upload an image of a receipt. The app extracts text and analyzes it using a fine-tuned LLM model.",
)

# Launch the Gradio app
interface.launch()