araeyn commited on
Commit
a500b2e
1 Parent(s): ac0cadc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -110
app.py CHANGED
@@ -47,116 +47,119 @@ async def echo(websocket):
47
  async def main():
48
  async with serve(echo, "0.0.0.0", 7860):
49
  await asyncio.Future()
50
-
51
- if not os.path.isdir('database'):
52
- os.system("unzip database.zip")
53
-
54
- loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
55
-
56
- documents = loader.load()
57
-
58
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
59
- splits = text_splitter.split_documents(documents)
60
-
61
- print()
62
- print("-------")
63
- print("TextSplitter, DirectoryLoader")
64
- print("-------")
65
-
66
- persist_directory = 'db'
67
-
68
- # embedding = HuggingFaceInferenceAPIEmbeddings(api_key=os.environ["HUGGINGFACE_API_KEY"], model=)
69
- model_name = "BAAI/bge-large-en"
70
- model_kwargs = {'device': 'cpu'}
71
- encode_kwargs = {'normalize_embeddings': True}
72
- embedding = HuggingFaceBgeEmbeddings(
73
- model_name=model_name,
74
- model_kwargs=model_kwargs,
75
- encode_kwargs=encode_kwargs,
76
- show_progress=True,
77
- )
78
-
79
- print()
80
- print("-------")
81
- print("Embeddings")
82
- print("-------")
83
-
84
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
85
-
86
- def format_docs(docs):
87
- return "\n\n".join(doc.page_content for doc in docs)
88
-
89
- retriever = vectorstore.as_retriever()
90
-
91
- prompt = hub.pull("rlm/rag-prompt")
92
- llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
93
- rag_chain = (
94
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
95
- | prompt
96
- | llm
97
- | StrOutputParser()
98
- )
99
-
100
- print()
101
- print("-------")
102
- print("Retriever, Prompt, LLM, Rag_Chain")
103
- print("-------")
104
-
105
- ### Contextualize question ###
106
- contextualize_q_system_prompt = """Given a chat history and the latest user question \
107
- which might reference context in the chat history, formulate a standalone question \
108
- which can be understood without the chat history. Do NOT answer the question, \
109
- just reformulate it if needed and otherwise return it as is."""
110
- contextualize_q_prompt = ChatPromptTemplate.from_messages(
111
- [
112
- ("system", contextualize_q_system_prompt),
113
- MessagesPlaceholder("chat_history"),
114
- ("human", "{input}"),
115
- ]
116
- )
117
- history_aware_retriever = create_history_aware_retriever(
118
- llm, retriever, contextualize_q_prompt
119
- )
120
-
121
-
122
- ### Answer question ###
123
- qa_system_prompt = """You are an assistant for question-answering tasks. \
124
- Use the following pieces of retrieved context to answer the question. \
125
- If you don't know the answer, just say that you don't know. \
126
- Use three sentences maximum and keep the answer concise.\
127
-
128
- {context}"""
129
- qa_prompt = ChatPromptTemplate.from_messages(
130
- [
131
- ("system", qa_system_prompt),
132
- MessagesPlaceholder("chat_history"),
133
- ("human", "{input}"),
134
- ]
135
- )
136
- question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
137
-
138
- rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
139
-
140
-
141
- ### Statefully manage chat history ###
142
- store = {}
143
-
144
-
145
- def get_session_history(session_id: str) -> BaseChatMessageHistory:
146
- if session_id not in store:
147
- store[session_id] = ChatMessageHistory()
148
- return store[session_id]
149
-
150
-
151
- conversational_rag_chain = RunnableWithMessageHistory(
152
- rag_chain,
153
- get_session_history,
154
- input_messages_key="input",
155
- history_messages_key="chat_history",
156
- output_messages_key="answer",
157
- )
158
-
159
- asyncio.run(main())
 
 
 
160
  """
161
  websocket
162
  streamlit app ~> backend
 
47
  async def main():
48
  async with serve(echo, "0.0.0.0", 7860):
49
  await asyncio.Future()
50
+ def g():
51
+ if not os.path.isdir('database'):
52
+ os.system("unzip database.zip")
53
+
54
+ loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
55
+
56
+ documents = loader.load()
57
+
58
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
59
+ splits = text_splitter.split_documents(documents)
60
+
61
+ print()
62
+ print("-------")
63
+ print("TextSplitter, DirectoryLoader")
64
+ print("-------")
65
+
66
+ persist_directory = 'db'
67
+
68
+ # embedding = HuggingFaceInferenceAPIEmbeddings(api_key=os.environ["HUGGINGFACE_API_KEY"], model=)
69
+ model_name = "BAAI/bge-large-en"
70
+ model_kwargs = {'device': 'cpu'}
71
+ encode_kwargs = {'normalize_embeddings': True}
72
+ embedding = HuggingFaceBgeEmbeddings(
73
+ model_name=model_name,
74
+ model_kwargs=model_kwargs,
75
+ encode_kwargs=encode_kwargs,
76
+ show_progress=True,
77
+ )
78
+
79
+ print()
80
+ print("-------")
81
+ print("Embeddings")
82
+ print("-------")
83
+
84
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
85
+
86
+ def format_docs(docs):
87
+ return "\n\n".join(doc.page_content for doc in docs)
88
+
89
+ retriever = vectorstore.as_retriever()
90
+
91
+ prompt = hub.pull("rlm/rag-prompt")
92
+ llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
93
+ rag_chain = (
94
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
95
+ | prompt
96
+ | llm
97
+ | StrOutputParser()
98
+ )
99
+
100
+ print()
101
+ print("-------")
102
+ print("Retriever, Prompt, LLM, Rag_Chain")
103
+ print("-------")
104
+
105
+ ### Contextualize question ###
106
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
107
+ which might reference context in the chat history, formulate a standalone question \
108
+ which can be understood without the chat history. Do NOT answer the question, \
109
+ just reformulate it if needed and otherwise return it as is."""
110
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
111
+ [
112
+ ("system", contextualize_q_system_prompt),
113
+ MessagesPlaceholder("chat_history"),
114
+ ("human", "{input}"),
115
+ ]
116
+ )
117
+ history_aware_retriever = create_history_aware_retriever(
118
+ llm, retriever, contextualize_q_prompt
119
+ )
120
+
121
+
122
+ ### Answer question ###
123
+ qa_system_prompt = """You are an assistant for question-answering tasks. \
124
+ Use the following pieces of retrieved context to answer the question. \
125
+ If you don't know the answer, just say that you don't know. \
126
+ Use three sentences maximum and keep the answer concise.\
127
+
128
+ {context}"""
129
+ qa_prompt = ChatPromptTemplate.from_messages(
130
+ [
131
+ ("system", qa_system_prompt),
132
+ MessagesPlaceholder("chat_history"),
133
+ ("human", "{input}"),
134
+ ]
135
+ )
136
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
137
+
138
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
139
+
140
+
141
+ ### Statefully manage chat history ###
142
+ store = {}
143
+
144
+
145
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
146
+ if session_id not in store:
147
+ store[session_id] = ChatMessageHistory()
148
+ return store[session_id]
149
+
150
+
151
+ conversational_rag_chain = RunnableWithMessageHistory(
152
+ rag_chain,
153
+ get_session_history,
154
+ input_messages_key="input",
155
+ history_messages_key="chat_history",
156
+ output_messages_key="answer",
157
+ )
158
+
159
+ def f():
160
+ asyncio.run(main())
161
+ Process(f).start()
162
+ Process(g).start()
163
  """
164
  websocket
165
  streamlit app ~> backend