File size: 5,004 Bytes
62d725c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
067fc57
 
 
 
 
 
 
 
 
 
 
 
 
 
62d725c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import streamlit as st
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from dotenv import load_dotenv
import PyPDF2
import os
import io
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


# Define SPEAKER_TYPES to distinguish between user and bot roles
SPEAKER_TYPES = {
    "USER": "user",
    "BOT": "bot"
}

# Define the initial prompt to show when the app starts
initial_prompt = {
    'role': SPEAKER_TYPES["BOT"],
    'content': "Hello! I am your Gemini Pro RAG chatbot. You can ask me questions after uploading a PDF."
}


# --- Your RAG chatbot logic ---
source_data_folder = "MyData"
text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", ". ", " ", ""],
    chunk_size=2000,
    chunk_overlap=200
)
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
path_db = "/content/VectorDB"
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w")

# --- Streamlit app starts here ---
# Set up the Streamlit app configuration
st.set_page_config(
    page_title="Gemini Pro RAG App",
    page_icon="πŸ”",
    layout="wide",
    initial_sidebar_state="expanded",
)

# Initialize session state for chat history and vectorstore (PDF context)
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = [initial_prompt]
if 'vectorstore' not in st.session_state:
    st.session_state.vectorstore = None

# Function to clear chat history
def clear_chat_history():
    st.session_state.chat_history = [initial_prompt]

# Extract text from PDF
def extract_text_from_pdf(pdf_bytes):
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Initialize vectorstore
def initialize_vector_index(text):
    docs = [{'page_content': text}]
    splits = text_splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=path_db)
    return vectorstore

# Sidebar configuration
with st.sidebar:
    st.title('πŸ” Gemini RAG Chatbot')
    st.write('This chatbot uses the Gemini Pro API with RAG capabilities.')
    st.button('Clear Chat History', on_click=clear_chat_history, type='primary')
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"], help="Upload your PDF file here to start the analysis.")
    if uploaded_file is not None:
        st.success("PDF File Uploaded Successfully!")
        text = extract_text_from_pdf(uploaded_file.read())
        vectorstore = initialize_vector_index(text)
        st.session_state.vectorstore = vectorstore

# Main interface
st.header('Gemini Pro RAG Chatbot')
st.subheader('Upload a PDF and ask questions about its content!')

# Display the welcome prompt if chat history is only the initial prompt
if len(st.session_state.chat_history) == 1:
    with st.chat_message(SPEAKER_TYPES.BOT, avatar="πŸ”"):
        st.write(initial_prompt['content'])

# Get user input
prompt = st.chat_input("Ask a question about the PDF content:", key="user_input")

# Function to get a response from RAG chain
def get_rag_response(prompt):
    retriever = st.session_state.vectorstore.as_retriever()  # Use the stored vectorstore retriever
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    response = rag_chain.invoke(prompt)
    return response

# Handle the user prompt and generate response
if prompt:
    # Add user prompt to chat history
    st.session_state.chat_history.append({'role': SPEAKER_TYPES.USER, 'content': prompt})
    
    # Display chat messages from the chat history
    for message in st.session_state.chat_history[1:]:
        with st.chat_message(message["role"], avatar="πŸ‘€" if message['role'] == SPEAKER_TYPES.USER else "πŸ”"):
            st.write(message["content"])
    
    # Get the response using the RAG chain
    with st.spinner(text='Generating response...'):
        response_text = get_rag_response(prompt)
        st.session_state.chat_history.append({'role': SPEAKER_TYPES.BOT, 'content': response_text})
    
    # Display the bot response
    with st.chat_message(SPEAKER_TYPES.BOT, avatar="πŸ”"):
        st.write(response_text)

# Add footer for additional information or credits
st.markdown("""
<hr>
<div style="text-align: center;">
    <small>Powered by Gemini Pro API | Developed by Christian Thomas BADOLO</small>
</div>
""", unsafe_allow_html=True)