import gradio as gr from PIL import Image import google.generativeai as genai import os from dotenv import load_dotenv # Load environment variables load_dotenv() # Configure the API key for Google Gemini genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Function to process the image and get response from Gemini model def get_gemini_response(input_prompt, uploaded_file, query): try: # Load the image file as bytes if uploaded_file is None: return "Please upload an image." bytes_data = uploaded_file.read() image_parts = [{"mime_type": uploaded_file.type, "data": bytes_data}] # Load the Gemini model and get the response model = genai.GenerativeModel("gemini-pro-vision") response = model.generate_content([input_prompt, image_parts[0], query]) return response.text except Exception as e: return f"Error: {e}" # Define input prompt default_prompt = """ You are an expert in understanding invoices. You will receive input images as invoices and you will have to answer questions based on the input image. """ # Define Gradio interface with gr.Blocks() as invoice_extractor: gr.Markdown("# Invoice Extractor") gr.Markdown( """ Upload an invoice image and ask specific questions about it. The system uses Google's Gemini model to extract and interpret the invoice details. """ ) input_prompt = gr.Textbox(label="Input Prompt", value=default_prompt, lines=3) image_input = gr.Image(label="Upload Invoice Image", type="file") query_input = gr.Textbox(label="Enter your query about the invoice", placeholder="e.g., What is the total amount?") output_response = gr.Textbox(label="Response", lines=5) # Button to process the image and query submit_btn = gr.Button("Process Invoice") # Set the button to call the processing function submit_btn.click( get_gemini_response, inputs=[input_prompt, image_input, query_input], outputs=output_response ) # Launch the app invoice_extractor.launch()