Spaces:
Runtime error
Runtime error
# import os | |
import os | |
import tempfile | |
import streamlit as st | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.vectorstores import FAISS | |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
from streamlit_extras.add_vertical_space import add_vertical_space | |
def load_retriever(pdf_files): | |
"""load pdf files""" | |
docs = [] | |
temp_dir = tempfile.TemporaryDirectory() | |
for pdf_file in pdf_files: | |
temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name) | |
with open(temp_pdf_file_path, "wb") as f: | |
f.write(pdf_file.getvalue()) | |
loader = PyPDFLoader(temp_pdf_file_path) | |
docs.extend(loader.load()) | |
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( | |
chunk_size=1500, chunk_overlap=200 | |
) | |
chunks = text_splitter.split_documents(docs) | |
# embeddings | |
embeddings = OpenAIEmbeddings() | |
vector_db = FAISS.from_documents(chunks, embeddings) | |
retriever = vector_db.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={"score_threshold": 0.5, "k": 5}, | |
) | |
return retriever | |
def main(): | |
"""main""" | |
st.set_page_config( | |
page_title="Talk to PDF using GPT 3.5", | |
page_icon="π°", | |
layout="centered", | |
initial_sidebar_state="expanded", | |
) | |
st.header("Talk to PDF files π°", divider="rainbow") | |
st.subheader( | |
"Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo" | |
) | |
st.sidebar.title("Talk to PDF π°") | |
st.sidebar.markdown( | |
"[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)" | |
) | |
st.sidebar.markdown( | |
""" | |
### This is a LLM powered chatbot, built using: | |
* [Streamlit](https://streamlit.io) | |
* [LangChain](https://python.langchain.com/) | |
* [OpenAI](https://platform.openai.com/docs/models) | |
___ | |
""" | |
) | |
add_vertical_space(2) | |
openai_key = st.sidebar.text_input(label="Enter the OpenAI key π", type="password") | |
if not openai_key: | |
st.info("π :red[Please enter the OpenAI key] β") | |
st.stop() | |
# set the OPENAI_API_KEY to environment | |
os.environ["OPENAI_API_KEY"] = openai_key | |
add_vertical_space(1) | |
upload_pdf_files = st.sidebar.file_uploader( | |
"Upload a pdf files π€", type="pdf", accept_multiple_files=True | |
) | |
if not upload_pdf_files: | |
st.info("π :red[Please upload pdf files] β") | |
st.stop() | |
retriever = load_retriever(upload_pdf_files) | |
chat_history = StreamlitChatMessageHistory() | |
# init chat history memory | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", chat_memory=chat_history, return_messages=True | |
) | |
llm = ChatOpenAI( | |
model_name="gpt-3.5-turbo", | |
openai_api_key=openai_key, | |
temperature=0, | |
streaming=True, | |
) | |
chain = ConversationalRetrievalChain.from_llm( | |
llm, retriever=retriever, memory=memory, verbose=False | |
) | |
# load previous chat history | |
# re-draw the chat history in the chat window | |
for message in chat_history.messages: | |
st.chat_message(message.type).write(message.content) | |
if prompt := st.chat_input("Ask questions"): | |
with st.chat_message("human"): | |
st.markdown(prompt) | |
response = chain.run(prompt) | |
with st.chat_message("ai"): | |
st.write(response) | |
if __name__ == "__main__": | |
# init streamlit | |
main() | |