Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import streamlit as st
|
3 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
from langchain.vectorstores import Chroma
|
5 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
from langchain.chains import ConversationalRetrievalChain, ConversationChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
@@ -44,15 +44,12 @@ def load_texts_from_papers(papers):
|
|
44 |
file_path = save_uploaded_file(paper)
|
45 |
loader = PyPDFLoader(file_path)
|
46 |
documents = loader.load()
|
47 |
-
|
48 |
-
# Using RecursiveCharacterTextSplitter with proper encoding handling
|
49 |
text_splitter = RecursiveCharacterTextSplitter(
|
50 |
chunk_size=1000,
|
51 |
chunk_overlap=200,
|
52 |
length_function=len,
|
53 |
is_separator_regex=False,
|
54 |
)
|
55 |
-
|
56 |
texts = text_splitter.split_documents(documents)
|
57 |
all_texts.extend(texts)
|
58 |
os.remove(file_path)
|
@@ -61,8 +58,8 @@ def load_texts_from_papers(papers):
|
|
61 |
return all_texts
|
62 |
|
63 |
@st.cache_resource
|
64 |
-
def initialize_vectorstore():
|
65 |
-
embedding = OpenAIEmbeddings(openai_api_key=
|
66 |
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
|
67 |
return vectorstore
|
68 |
|
@@ -71,9 +68,6 @@ def main():
|
|
71 |
|
72 |
# Get API key from sidebar
|
73 |
api_key = create_sidebar()
|
74 |
-
|
75 |
-
if api_key:
|
76 |
-
st.session_state.api_key = api_key
|
77 |
|
78 |
st.title("Chat with PDF")
|
79 |
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
|
@@ -86,13 +80,13 @@ def main():
|
|
86 |
return
|
87 |
|
88 |
try:
|
89 |
-
vectorstore = initialize_vectorstore()
|
90 |
texts = load_texts_from_papers(papers) if papers else []
|
91 |
|
92 |
if texts:
|
93 |
vectorstore.add_documents(texts)
|
94 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
95 |
-
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
|
96 |
vectorstore.as_retriever(),
|
97 |
memory=ConversationBufferMemory(
|
98 |
memory_key="chat_history",
|
@@ -103,7 +97,7 @@ def main():
|
|
103 |
else:
|
104 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
105 |
qa_chain = ConversationChain(
|
106 |
-
llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
|
107 |
memory=memory
|
108 |
)
|
109 |
|
|
|
2 |
import streamlit as st
|
3 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
from langchain.vectorstores import Chroma
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
from langchain.chains import ConversationalRetrievalChain, ConversationChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
|
|
44 |
file_path = save_uploaded_file(paper)
|
45 |
loader = PyPDFLoader(file_path)
|
46 |
documents = loader.load()
|
|
|
|
|
47 |
text_splitter = RecursiveCharacterTextSplitter(
|
48 |
chunk_size=1000,
|
49 |
chunk_overlap=200,
|
50 |
length_function=len,
|
51 |
is_separator_regex=False,
|
52 |
)
|
|
|
53 |
texts = text_splitter.split_documents(documents)
|
54 |
all_texts.extend(texts)
|
55 |
os.remove(file_path)
|
|
|
58 |
return all_texts
|
59 |
|
60 |
@st.cache_resource
|
61 |
+
def initialize_vectorstore(api_key): # Added api_key parameter
|
62 |
+
embedding = OpenAIEmbeddings(openai_api_key=api_key)
|
63 |
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
|
64 |
return vectorstore
|
65 |
|
|
|
68 |
|
69 |
# Get API key from sidebar
|
70 |
api_key = create_sidebar()
|
|
|
|
|
|
|
71 |
|
72 |
st.title("Chat with PDF")
|
73 |
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
|
|
|
80 |
return
|
81 |
|
82 |
try:
|
83 |
+
vectorstore = initialize_vectorstore(api_key)
|
84 |
texts = load_texts_from_papers(papers) if papers else []
|
85 |
|
86 |
if texts:
|
87 |
vectorstore.add_documents(texts)
|
88 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
89 |
+
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key), # Added api_key here
|
90 |
vectorstore.as_retriever(),
|
91 |
memory=ConversationBufferMemory(
|
92 |
memory_key="chat_history",
|
|
|
97 |
else:
|
98 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
99 |
qa_chain = ConversationChain(
|
100 |
+
llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key), # Added api_key here
|
101 |
memory=memory
|
102 |
)
|
103 |
|