File size: 3,711 Bytes
79541e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os 
import base64
from io import BytesIO
from PIL import Image
import streamlit as st
from langchain.memory import ConversationSummaryBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from datetime import datetime
from langchain_core.messages import HumanMessage

os.environ["GOOGLE_API_KEY"] = "AIzaSyAc0VslmJlmiTFx7GB8QPYEHUZ5nZb5_Nk"
st.title("Vision Bot")

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    max_tokens=4000
)

IMAGE_SAVE_FOLDER = "./uploaded_images"
if not os.path.exists(IMAGE_SAVE_FOLDER):
    os.makedirs(IMAGE_SAVE_FOLDER)

st.markdown(
    """
<style>
    .st-emotion-cache-janbn0 {
        flex-direction: row-reverse;
        text-align: right;
    }
</style>
""",
    unsafe_allow_html=True,
)

# Initialize session states
if "messages" not in st.session_state:
    st.session_state.messages = []
if "llm" not in st.session_state:
    st.session_state.llm = llm
if "rag_memory" not in st.session_state:
    st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit=5000)
if "current_image" not in st.session_state:
    st.session_state.current_image = None
if "last_displayed_image" not in st.session_state:
    st.session_state.last_displayed_image = None

container = st.container()

# Upload image
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"], key="image_uploader")

# Check if a new image is uploaded
if uploaded_image and uploaded_image != st.session_state.current_image:
    st.session_state.current_image = uploaded_image
    st.image(uploaded_image, caption="Newly Uploaded Image")
    
    # Add a system message to mark the new image in the conversation
    st.session_state.messages.append({
        "role": "system", 
        "content": f"New image uploaded: {uploaded_image.name}",
        "image": uploaded_image
    })

# Display messages
for message in st.session_state.messages:
    with container.chat_message(message["role"]):
        if message["role"] == "system" and "image" in message:
            st.image(message["image"])
        st.write(message["content"])

# Take prompt
if prompt := st.chat_input("Enter your query here..."):
    with container.chat_message("user"):
        st.write(prompt)

    # Save user input in session state
    st.session_state.messages.append({"role": "user", "content": prompt})

    if st.session_state.current_image:
        # Save uploaded image to disk
        image = Image.open(st.session_state.current_image)
        current_date = datetime.now().strftime("%Y%m%d")
        image_name = f"{current_date}_{st.session_state.current_image.name}"
        image_path = os.path.join(IMAGE_SAVE_FOLDER, image_name)
        image.save(image_path)

        # Encode image in base64
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode()

        # Send image and text to the model
        chat = HumanMessage(
            content=[
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
            ]
        )
    else:
        # Send only text to the model if no image is uploaded
        chat = HumanMessage(content=prompt)

    # Get AI response
    ai_msg = llm.invoke([chat]).content
    with container.chat_message("assistant"): 
        st.write(ai_msg)

    # Save the conversation context in memory
    st.session_state.rag_memory.save_context({'input': prompt}, {'output': ai_msg})
    
    # Append the assistant's message to the session state
    st.session_state.messages.append({"role": "assistant", "content": ai_msg})