import streamlit as st from dotenv import load_dotenv load_dotenv() ## load all env vriables from .env import os from PIL import Image import google.generativeai as genai genai.configure(api_key = os.getenv('GOOGLE_API_KEY')) # create a function to load gemini pro vision model = genai.GenerativeModel('gemini-pro-vision') def get_gemini_response(input,image,prompt): response = model.generate_content([input,image[0],prompt]) return response.text ## create a function to return images bayets data def input_image_details(uploaded_file): if uploaded_file is not None : # read the file into a bytes bytes_data = uploaded_file.getvalue() image_parts = [ { "mime_type" : uploaded_file.type, # get the mime type of the uploaded file "data" : bytes_data } ] return image_parts else : raise FileNotFoundError('No file uploaded') ## initialize our streamlit app st.set_page_config(page_title = 'MultiLanguage Invoice Extractor') st.header("MultiLanguage Invoice Extractor using Gemini Vision PRO ") input = st.text_input('Input Prompt: ',key = 'input') uploaded_file = st.file_uploader('Choose an image of the Invoice',type = ["jpg",'jpeg','png']) if uploaded_file is not None : image = Image.open(uploaded_file) st.image(image,caption='Uploaded Image', use_column_width = True) submit = st.button("Tell me about the invoice") input_prompt = """ you are a expert in understaning invoices. we will upload an image as invoice and you will have to answer any questions based on the uploaded invoice image """ # if submit button is clicked if submit : image_data = input_image_details(uploaded_file) response = get_gemini_response(input_prompt,image_data,input) st.subheader("The Response is ") st.write(response)