|
import gradio as gr |
|
from PIL import Image |
|
import google.generativeai as genai |
|
import os |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
|
|
def get_gemini_response(uploaded_file_path, query): |
|
try: |
|
|
|
input_prompt = """ |
|
You are an expert in understanding invoices. You will receive input images as invoices and |
|
you will have to answer questions based on the input image. |
|
""" |
|
|
|
if not uploaded_file_path or not os.path.exists(uploaded_file_path): |
|
return "Please upload a valid image." |
|
|
|
|
|
with open(uploaded_file_path, "rb") as f: |
|
image_data = f.read() |
|
|
|
|
|
mime_type = f"image/{uploaded_file_path.split('.')[-1]}" |
|
image_parts = [{"mime_type": mime_type, "data": image_data}] |
|
|
|
|
|
model = genai.GenerativeModel("gemini-1.5-flash") |
|
response = model.generate_content([input_prompt, image_parts[0], query]) |
|
return response.text |
|
except Exception as e: |
|
return f"Error: {e}" |
|
|
|
|
|
|
|
with gr.Blocks() as invoice_extractor: |
|
gr.Markdown("# Invoice Extractor") |
|
gr.Markdown( |
|
""" |
|
Upload an invoice image and ask specific questions about it. |
|
The system uses Google's Gemini model to extract and interpret the invoice details. |
|
""" |
|
) |
|
image_input = gr.Image(label="Upload Invoice Image", type="filepath") |
|
query_input = gr.Textbox(label="Enter your query about the invoice", placeholder="e.g., What is the total amount?") |
|
output_response = gr.Textbox(label="Response", lines=5) |
|
|
|
|
|
submit_btn = gr.Button("Process Invoice") |
|
|
|
|
|
submit_btn.click( |
|
get_gemini_response, |
|
inputs=[image_input, query_input], |
|
outputs=output_response |
|
) |
|
|
|
|
|
invoice_extractor.launch() |
|
|