import PIL.Image
import gradio as gr
import base64
import os
import google.generativeai as genai
from dotenv import load_dotenv
load_dotenv()
# Set Google API key
GOOGLe_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key = GOOGLe_API_KEY)
# Create the Model
txt_model = genai.GenerativeModel('gemini-pro')
vis_model = genai.GenerativeModel('gemini-pro-vision')
# Image to Base 64 Converter
def image_to_base64(image_path):
with open(image_path, 'rb') as img:
encoded_string = base64.b64encode(img.read())
return encoded_string.decode('utf-8')
# Function that takes User Inputs and displays it on ChatUI
def query_message(history,txt,img):
if not img:
history += [(txt,None)]
return history
base64 = image_to_base64(img)
data_url = f"data:image/jpeg;base64,{base64}"
history += [(f"{txt} ", None)]
return history
# Function that takes User Inputs, generates Response and displays on Chat UI
def llm_response(history,text,img):
if not img:
response = txt_model.generate_content(text)
history += [(None,response.text)]
return history, gr.update(value = "")
else:
img = PIL.Image.open(img)
response = vis_model.generate_content([text,img])
history += [(None,response.text)]
return history, gr.update(value = "")
def image_to_base64(image_path):
"""
Reads an image file and returns its base64 encoded representation.
Args:
image_path (str): The path to the image file.
Returns:
str: The base64 encoded representation of the image data.
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Encode the logo image into base64
logo_base64 = image_to_base64("pixelpk_logo.png")
markdown_content = f"""
Multimodal chatbot is designed to chat with text and images.
""" css = """ h1 { text-align: center; display:block; } """ # Interface Code with gr.Blocks(gr.themes.Monochrome(), css = css) as app: # Display introductory markdown content gr.Markdown(f"