pratikshahp's picture
Create app.py
2ce8106 verified
raw
history blame
2.11 kB
import gradio as gr
from PIL import Image
import google.generativeai as genai
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Configure the API key for Google Gemini
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
# Function to process the image and get response from Gemini model
def get_gemini_response(input_prompt, uploaded_file, query):
try:
# Load the image file as bytes
if uploaded_file is None:
return "Please upload an image."
bytes_data = uploaded_file.read()
image_parts = [{"mime_type": uploaded_file.type, "data": bytes_data}]
# Load the Gemini model and get the response
model = genai.GenerativeModel("gemini-pro-vision")
response = model.generate_content([input_prompt, image_parts[0], query])
return response.text
except Exception as e:
return f"Error: {e}"
# Define input prompt
default_prompt = """
You are an expert in understanding invoices. You will receive input images as invoices and
you will have to answer questions based on the input image.
"""
# Define Gradio interface
with gr.Blocks() as invoice_extractor:
gr.Markdown("# Invoice Extractor")
gr.Markdown(
"""
Upload an invoice image and ask specific questions about it.
The system uses Google's Gemini model to extract and interpret the invoice details.
"""
)
input_prompt = gr.Textbox(label="Input Prompt", value=default_prompt, lines=3)
image_input = gr.Image(label="Upload Invoice Image", type="file")
query_input = gr.Textbox(label="Enter your query about the invoice", placeholder="e.g., What is the total amount?")
output_response = gr.Textbox(label="Response", lines=5)
# Button to process the image and query
submit_btn = gr.Button("Process Invoice")
# Set the button to call the processing function
submit_btn.click(
get_gemini_response,
inputs=[input_prompt, image_input, query_input],
outputs=output_response
)
# Launch the app
invoice_extractor.launch()