aaromosshf2424
commited on
Commit
·
75d1e33
1
Parent(s):
58cc93f
update to app one more time
Browse files
app.py
CHANGED
@@ -42,12 +42,9 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
42 |
document_loader = TextLoader("./data/paul_graham_essays.txt")
|
43 |
documents = document_loader.load()
|
44 |
|
45 |
-
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
46 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
|
47 |
split_documents = text_splitter.split_documents(documents)
|
48 |
|
49 |
-
### 3. LOAD HUGGINGFACE EMBEDDINGS
|
50 |
-
|
51 |
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
52 |
model=HF_EMBED_ENDPOINT,
|
53 |
task="feature-extraction",
|
@@ -79,7 +76,6 @@ hf_retriever = vectorstore.as_retriever()
|
|
79 |
1. Define a String Template
|
80 |
2. Create a Prompt Template from the String Template
|
81 |
"""
|
82 |
-
### 1. DEFINE STRING TEMPLATE
|
83 |
RAG_PROMPT_TEMPLATE = """\
|
84 |
<|start_header_id|>system<|end_header_id|>
|
85 |
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
@@ -100,16 +96,14 @@ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
|
|
100 |
"""
|
101 |
1. Create a HuggingFaceEndpoint for the LLM
|
102 |
"""
|
103 |
-
### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
|
104 |
hf_llm = HuggingFaceEndpoint(
|
105 |
endpoint_url=HF_LLM_ENDPOINT,
|
106 |
max_new_tokens=512,
|
107 |
top_k=10,
|
108 |
top_p=0.95,
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
huggingfacehub_api_token=HF_TOKEN
|
113 |
)
|
114 |
|
115 |
@cl.author_rename
|
@@ -134,10 +128,9 @@ async def start_chat():
|
|
134 |
The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
|
135 |
"""
|
136 |
|
137 |
-
### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
|
138 |
lcel_rag_chain = (
|
139 |
{"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
|
140 |
-
| rag_prompt | hf_llm
|
141 |
)
|
142 |
|
143 |
cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
|
@@ -155,7 +148,7 @@ async def main(message: cl.Message):
|
|
155 |
|
156 |
msg = cl.Message(content="")
|
157 |
|
158 |
-
|
159 |
{"query": message.content},
|
160 |
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
|
161 |
):
|
|
|
42 |
document_loader = TextLoader("./data/paul_graham_essays.txt")
|
43 |
documents = document_loader.load()
|
44 |
|
|
|
45 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
|
46 |
split_documents = text_splitter.split_documents(documents)
|
47 |
|
|
|
|
|
48 |
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
49 |
model=HF_EMBED_ENDPOINT,
|
50 |
task="feature-extraction",
|
|
|
76 |
1. Define a String Template
|
77 |
2. Create a Prompt Template from the String Template
|
78 |
"""
|
|
|
79 |
RAG_PROMPT_TEMPLATE = """\
|
80 |
<|start_header_id|>system<|end_header_id|>
|
81 |
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
|
|
96 |
"""
|
97 |
1. Create a HuggingFaceEndpoint for the LLM
|
98 |
"""
|
|
|
99 |
hf_llm = HuggingFaceEndpoint(
|
100 |
endpoint_url=HF_LLM_ENDPOINT,
|
101 |
max_new_tokens=512,
|
102 |
top_k=10,
|
103 |
top_p=0.95,
|
104 |
+
temperature=0.3,
|
105 |
+
repetition_penalty=1.15,
|
106 |
+
huggingfacehub_api_token=HF_TOKEN,
|
|
|
107 |
)
|
108 |
|
109 |
@cl.author_rename
|
|
|
128 |
The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
|
129 |
"""
|
130 |
|
|
|
131 |
lcel_rag_chain = (
|
132 |
{"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
|
133 |
+
| rag_prompt | hf_llm
|
134 |
)
|
135 |
|
136 |
cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
|
|
|
148 |
|
149 |
msg = cl.Message(content="")
|
150 |
|
151 |
+
for chunk in await cl.make_async(lcel_rag_chain.stream)(
|
152 |
{"query": message.content},
|
153 |
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
|
154 |
):
|