File size: 2,111 Bytes
2ce8106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from PIL import Image
import google.generativeai as genai
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure the API key for Google Gemini
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Function to process the image and get response from Gemini model
def get_gemini_response(input_prompt, uploaded_file, query):
    try:
        # Load the image file as bytes
        if uploaded_file is None:
            return "Please upload an image."
        bytes_data = uploaded_file.read()
        image_parts = [{"mime_type": uploaded_file.type, "data": bytes_data}]
        
        # Load the Gemini model and get the response
        model = genai.GenerativeModel("gemini-pro-vision")
        response = model.generate_content([input_prompt, image_parts[0], query])
        return response.text
    except Exception as e:
        return f"Error: {e}"

# Define input prompt
default_prompt = """
You are an expert in understanding invoices. You will receive input images as invoices and 
you will have to answer questions based on the input image.
"""

# Define Gradio interface
with gr.Blocks() as invoice_extractor:
    gr.Markdown("# Invoice Extractor")
    gr.Markdown(
        """
        Upload an invoice image and ask specific questions about it. 
        The system uses Google's Gemini model to extract and interpret the invoice details.
        """
    )
    
    input_prompt = gr.Textbox(label="Input Prompt", value=default_prompt, lines=3)
    image_input = gr.Image(label="Upload Invoice Image", type="file")
    query_input = gr.Textbox(label="Enter your query about the invoice", placeholder="e.g., What is the total amount?")
    output_response = gr.Textbox(label="Response", lines=5)

    # Button to process the image and query
    submit_btn = gr.Button("Process Invoice")

    # Set the button to call the processing function
    submit_btn.click(
        get_gemini_response, 
        inputs=[input_prompt, image_input, query_input],
        outputs=output_response
    )

# Launch the app
invoice_extractor.launch()