Waseem771 commited on
Commit
935976c
·
verified ·
1 Parent(s): 20b6d1f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PyPDF2 import PdfReader
4
+ import docx
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.llms import OpenAI
7
+ from dotenv import load_dotenv
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.memory import ConversationBufferMemory
13
+ from streamlit_chat import message
14
+ from langchain.callbacks import get_openai_callback
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+ openapi_key = os.getenv("OPENAI_API_KEY")
19
+
20
+ def main():
21
+ st.set_page_config(page_title="Chat with your file")
22
+ st.header("DocumentGPT")
23
+
24
+ if "conversation" not in st.session_state:
25
+ st.session_state.conversation = None
26
+ if "chat_history" not in st.session_state:
27
+ st.session_state.chat_history = None
28
+ if "processComplete" not in st.session_state:
29
+ st.session_state.processComplete = None
30
+
31
+ with st.sidebar:
32
+ uploaded_files = st.file_uploader("Upload your file", type=['pdf', 'docx'], accept_multiple_files=True)
33
+ process = st.button("Process")
34
+
35
+ if process:
36
+ if not openapi_key:
37
+ st.info("Please add your OpenAI API key to continue.")
38
+ st.stop()
39
+ files_text = get_files_text(uploaded_files)
40
+ st.write("File loaded...")
41
+ text_chunks = get_text_chunks(files_text)
42
+ st.write("File chunks created...")
43
+ vectorstore = get_vectorstore(text_chunks)
44
+ st.write("Vector Store Created...")
45
+ st.session_state.conversation = get_conversation_chain(vectorstore, openapi_key)
46
+ st.session_state.processComplete = True
47
+
48
+ if st.session_state.processComplete:
49
+ user_question = st.chat_input("Ask a question about your files.")
50
+ if user_question:
51
+ handle_user_input(user_question)
52
+
53
+ def get_files_text(uploaded_files):
54
+ text = ""
55
+ for uploaded_file in uploaded_files:
56
+ file_extension = os.path.splitext(uploaded_file.name)[1]
57
+ if file_extension == ".pdf":
58
+ text += get_pdf_text(uploaded_file)
59
+ elif file_extension == ".docx":
60
+ text += get_docx_text(uploaded_file)
61
+ return text
62
+
63
+ def get_pdf_text(pdf):
64
+ pdf_reader = PdfReader(pdf)
65
+ text = ""
66
+ for page in pdf_reader.pages:
67
+ text += page.extract_text()
68
+ return text
69
+
70
+ def get_docx_text(file):
71
+ doc = docx.Document(file)
72
+ return ' '.join([para.text for para in doc.paragraphs])
73
+
74
+ def get_text_chunks(text):
75
+ text_splitter = CharacterTextSplitter(
76
+ separator="\n",
77
+ chunk_size=900,
78
+ chunk_overlap=100,
79
+ length_function=len
80
+ )
81
+ return text_splitter.split_text(text)
82
+
83
+ def get_vectorstore(text_chunks):
84
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
85
+ return FAISS.from_texts(text_chunks, embeddings)
86
+
87
+ def get_conversation_chain(vectorstore, openapi_key):
88
+ llm = ChatOpenAI(openai_api_key=openapi_key, model_name='gpt-3.5-turbo', temperature=0)
89
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
90
+ return ConversationalRetrievalChain.from_llm(
91
+ llm=llm,
92
+ retriever=vectorstore.as_retriever(),
93
+ memory=memory
94
+ )
95
+
96
+ def handle_user_input(user_question):
97
+ with get_openai_callback() as cb:
98
+ response = st.session_state.conversation({'question': user_question})
99
+ st.session_state.chat_history = response['chat_history']
100
+
101
+ response_container = st.container()
102
+ with response_container:
103
+ for i, message in enumerate(st.session_state.chat_history):
104
+ message(message.content, is_user=(i % 2 == 0), key=str(i))
105
+
106
+ if __name__ == '__main__':
107
+ main()