Vision-bot / app.py
simran0608's picture
Upload 2 files
79541e0 verified
raw
history blame
3.71 kB
import os
import base64
from io import BytesIO
from PIL import Image
import streamlit as st
from langchain.memory import ConversationSummaryBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from datetime import datetime
from langchain_core.messages import HumanMessage
os.environ["GOOGLE_API_KEY"] = "AIzaSyAc0VslmJlmiTFx7GB8QPYEHUZ5nZb5_Nk"
st.title("Vision Bot")
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
max_tokens=4000
)
IMAGE_SAVE_FOLDER = "./uploaded_images"
if not os.path.exists(IMAGE_SAVE_FOLDER):
os.makedirs(IMAGE_SAVE_FOLDER)
st.markdown(
"""
<style>
.st-emotion-cache-janbn0 {
flex-direction: row-reverse;
text-align: right;
}
</style>
""",
unsafe_allow_html=True,
)
# Initialize session states
if "messages" not in st.session_state:
st.session_state.messages = []
if "llm" not in st.session_state:
st.session_state.llm = llm
if "rag_memory" not in st.session_state:
st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit=5000)
if "current_image" not in st.session_state:
st.session_state.current_image = None
if "last_displayed_image" not in st.session_state:
st.session_state.last_displayed_image = None
container = st.container()
# Upload image
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"], key="image_uploader")
# Check if a new image is uploaded
if uploaded_image and uploaded_image != st.session_state.current_image:
st.session_state.current_image = uploaded_image
st.image(uploaded_image, caption="Newly Uploaded Image")
# Add a system message to mark the new image in the conversation
st.session_state.messages.append({
"role": "system",
"content": f"New image uploaded: {uploaded_image.name}",
"image": uploaded_image
})
# Display messages
for message in st.session_state.messages:
with container.chat_message(message["role"]):
if message["role"] == "system" and "image" in message:
st.image(message["image"])
st.write(message["content"])
# Take prompt
if prompt := st.chat_input("Enter your query here..."):
with container.chat_message("user"):
st.write(prompt)
# Save user input in session state
st.session_state.messages.append({"role": "user", "content": prompt})
if st.session_state.current_image:
# Save uploaded image to disk
image = Image.open(st.session_state.current_image)
current_date = datetime.now().strftime("%Y%m%d")
image_name = f"{current_date}_{st.session_state.current_image.name}"
image_path = os.path.join(IMAGE_SAVE_FOLDER, image_name)
image.save(image_path)
# Encode image in base64
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
# Send image and text to the model
chat = HumanMessage(
content=[
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
]
)
else:
# Send only text to the model if no image is uploaded
chat = HumanMessage(content=prompt)
# Get AI response
ai_msg = llm.invoke([chat]).content
with container.chat_message("assistant"):
st.write(ai_msg)
# Save the conversation context in memory
st.session_state.rag_memory.save_context({'input': prompt}, {'output': ai_msg})
# Append the assistant's message to the session state
st.session_state.messages.append({"role": "assistant", "content": ai_msg})