Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -37,43 +37,30 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
37 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
38 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
39 |
"""
|
40 |
-
document_loader = TextLoader("./data/paul_graham_essays.txt")
|
41 |
-
documents = document_loader.load()
|
42 |
-
|
43 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
|
44 |
-
split_documents = text_splitter.split_documents(documents)
|
45 |
|
|
|
|
|
46 |
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
47 |
model=HF_EMBED_ENDPOINT,
|
48 |
task="feature-extraction",
|
49 |
huggingfacehub_api_token=HF_TOKEN,
|
50 |
)
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
"./data/vectorstore",
|
55 |
hf_embeddings,
|
56 |
-
allow_dangerous_deserialization=True
|
57 |
-
|
58 |
-
hf_retriever = vectorstore.as_retriever()
|
59 |
-
print("Loaded Vectorstore")
|
60 |
-
else:
|
61 |
-
print("Indexing Files")
|
62 |
-
os.makedirs("./data/vectorstore", exist_ok=True)
|
63 |
-
for i in range(0, len(split_documents), 32):
|
64 |
-
if i == 0:
|
65 |
-
vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
|
66 |
-
continue
|
67 |
-
vectorstore.add_documents(split_documents[i:i+32])
|
68 |
-
vectorstore.save_local("./data/vectorstore")
|
69 |
-
|
70 |
hf_retriever = vectorstore.as_retriever()
|
|
|
|
|
71 |
|
72 |
# -- AUGMENTED -- #
|
73 |
"""
|
74 |
1. Define a String Template
|
75 |
2. Create a Prompt Template from the String Template
|
76 |
"""
|
|
|
77 |
RAG_PROMPT_TEMPLATE = """\
|
78 |
<|start_header_id|>system<|end_header_id|>
|
79 |
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
@@ -88,19 +75,21 @@ Context:
|
|
88 |
<|start_header_id|>assistant<|end_header_id|>
|
89 |
"""
|
90 |
|
|
|
91 |
rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
|
92 |
|
93 |
# -- GENERATION -- #
|
94 |
"""
|
95 |
1. Create a HuggingFaceEndpoint for the LLM
|
96 |
"""
|
|
|
97 |
hf_llm = HuggingFaceEndpoint(
|
98 |
endpoint_url=HF_LLM_ENDPOINT,
|
99 |
max_new_tokens=512,
|
100 |
top_k=10,
|
101 |
top_p=0.95,
|
102 |
-
temperature=0.
|
103 |
-
repetition_penalty=1.
|
104 |
huggingfacehub_api_token=HF_TOKEN,
|
105 |
)
|
106 |
|
@@ -126,6 +115,7 @@ async def start_chat():
|
|
126 |
The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
|
127 |
"""
|
128 |
|
|
|
129 |
lcel_rag_chain = (
|
130 |
{"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
|
131 |
| rag_prompt | hf_llm
|
@@ -146,7 +136,7 @@ async def main(message: cl.Message):
|
|
146 |
|
147 |
msg = cl.Message(content="")
|
148 |
|
149 |
-
for chunk in
|
150 |
{"query": message.content},
|
151 |
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
|
152 |
):
|
|
|
37 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
38 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
39 |
"""
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
vectorstore_path = "./data/vectorstore"
|
42 |
+
index_file = os.path.join(vectorstore_path, "index.faiss")
|
43 |
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
44 |
model=HF_EMBED_ENDPOINT,
|
45 |
task="feature-extraction",
|
46 |
huggingfacehub_api_token=HF_TOKEN,
|
47 |
)
|
48 |
|
49 |
+
vectorstore = FAISS.load_local(
|
50 |
+
vectorstore_path,
|
|
|
51 |
hf_embeddings,
|
52 |
+
allow_dangerous_deserialization=True
|
53 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
hf_retriever = vectorstore.as_retriever()
|
55 |
+
print("Loaded Vectorstore")
|
56 |
+
|
57 |
|
58 |
# -- AUGMENTED -- #
|
59 |
"""
|
60 |
1. Define a String Template
|
61 |
2. Create a Prompt Template from the String Template
|
62 |
"""
|
63 |
+
### 1. DEFINE STRING TEMPLATE
|
64 |
RAG_PROMPT_TEMPLATE = """\
|
65 |
<|start_header_id|>system<|end_header_id|>
|
66 |
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
|
|
75 |
<|start_header_id|>assistant<|end_header_id|>
|
76 |
"""
|
77 |
|
78 |
+
### 2. CREATE PROMPT TEMPLATE
|
79 |
rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
|
80 |
|
81 |
# -- GENERATION -- #
|
82 |
"""
|
83 |
1. Create a HuggingFaceEndpoint for the LLM
|
84 |
"""
|
85 |
+
### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
|
86 |
hf_llm = HuggingFaceEndpoint(
|
87 |
endpoint_url=HF_LLM_ENDPOINT,
|
88 |
max_new_tokens=512,
|
89 |
top_k=10,
|
90 |
top_p=0.95,
|
91 |
+
temperature=0.1,
|
92 |
+
repetition_penalty=1.0,
|
93 |
huggingfacehub_api_token=HF_TOKEN,
|
94 |
)
|
95 |
|
|
|
115 |
The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
|
116 |
"""
|
117 |
|
118 |
+
### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
|
119 |
lcel_rag_chain = (
|
120 |
{"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
|
121 |
| rag_prompt | hf_llm
|
|
|
136 |
|
137 |
msg = cl.Message(content="")
|
138 |
|
139 |
+
async for chunk in lcel_rag_chain.astream(
|
140 |
{"query": message.content},
|
141 |
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
|
142 |
):
|