Spaces:
Sleeping
Sleeping
File size: 5,004 Bytes
62d725c 067fc57 62d725c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from dotenv import load_dotenv
import PyPDF2
import os
import io
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Define SPEAKER_TYPES to distinguish between user and bot roles
SPEAKER_TYPES = {
"USER": "user",
"BOT": "bot"
}
# Define the initial prompt to show when the app starts
initial_prompt = {
'role': SPEAKER_TYPES["BOT"],
'content': "Hello! I am your Gemini Pro RAG chatbot. You can ask me questions after uploading a PDF."
}
# --- Your RAG chatbot logic ---
source_data_folder = "MyData"
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n", ". ", " ", ""],
chunk_size=2000,
chunk_overlap=200
)
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
path_db = "/content/VectorDB"
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w")
# --- Streamlit app starts here ---
# Set up the Streamlit app configuration
st.set_page_config(
page_title="Gemini Pro RAG App",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded",
)
# Initialize session state for chat history and vectorstore (PDF context)
if 'chat_history' not in st.session_state:
st.session_state.chat_history = [initial_prompt]
if 'vectorstore' not in st.session_state:
st.session_state.vectorstore = None
# Function to clear chat history
def clear_chat_history():
st.session_state.chat_history = [initial_prompt]
# Extract text from PDF
def extract_text_from_pdf(pdf_bytes):
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Initialize vectorstore
def initialize_vector_index(text):
docs = [{'page_content': text}]
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=path_db)
return vectorstore
# Sidebar configuration
with st.sidebar:
st.title('π Gemini RAG Chatbot')
st.write('This chatbot uses the Gemini Pro API with RAG capabilities.')
st.button('Clear Chat History', on_click=clear_chat_history, type='primary')
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"], help="Upload your PDF file here to start the analysis.")
if uploaded_file is not None:
st.success("PDF File Uploaded Successfully!")
text = extract_text_from_pdf(uploaded_file.read())
vectorstore = initialize_vector_index(text)
st.session_state.vectorstore = vectorstore
# Main interface
st.header('Gemini Pro RAG Chatbot')
st.subheader('Upload a PDF and ask questions about its content!')
# Display the welcome prompt if chat history is only the initial prompt
if len(st.session_state.chat_history) == 1:
with st.chat_message(SPEAKER_TYPES.BOT, avatar="π"):
st.write(initial_prompt['content'])
# Get user input
prompt = st.chat_input("Ask a question about the PDF content:", key="user_input")
# Function to get a response from RAG chain
def get_rag_response(prompt):
retriever = st.session_state.vectorstore.as_retriever() # Use the stored vectorstore retriever
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = rag_chain.invoke(prompt)
return response
# Handle the user prompt and generate response
if prompt:
# Add user prompt to chat history
st.session_state.chat_history.append({'role': SPEAKER_TYPES.USER, 'content': prompt})
# Display chat messages from the chat history
for message in st.session_state.chat_history[1:]:
with st.chat_message(message["role"], avatar="π€" if message['role'] == SPEAKER_TYPES.USER else "π"):
st.write(message["content"])
# Get the response using the RAG chain
with st.spinner(text='Generating response...'):
response_text = get_rag_response(prompt)
st.session_state.chat_history.append({'role': SPEAKER_TYPES.BOT, 'content': response_text})
# Display the bot response
with st.chat_message(SPEAKER_TYPES.BOT, avatar="π"):
st.write(response_text)
# Add footer for additional information or credits
st.markdown("""
<hr>
<div style="text-align: center;">
<small>Powered by Gemini Pro API | Developed by Christian Thomas BADOLO</small>
</div>
""", unsafe_allow_html=True)
|