Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -138,13 +138,74 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
138 |
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
|
139 |
|
140 |
context_retrieved = retriever.invoke(question)
|
|
|
|
|
141 |
|
142 |
def format_docs(docs):
|
143 |
-
return "
|
144 |
|
145 |
context_retrieved_formatted = format_docs(context_retrieved)
|
146 |
context_retrieved_lst.append(context_retrieved_formatted)
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
yield history,docs_html
|
150 |
#process_pdf()
|
|
|
138 |
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
|
139 |
|
140 |
context_retrieved = retriever.invoke(question)
|
141 |
+
for doc in context_retrieved:
|
142 |
+
print(doc.metadata)
|
143 |
|
144 |
def format_docs(docs):
|
145 |
+
return "|".join(doc.page_content for doc in docs)
|
146 |
|
147 |
context_retrieved_formatted = format_docs(context_retrieved)
|
148 |
context_retrieved_lst.append(context_retrieved_formatted)
|
149 |
+
|
150 |
+
##-------------------Prompt---------------------------------------------------------------
|
151 |
+
SYSTEM_PROMPT = """
|
152 |
+
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
153 |
+
Guidelines:
|
154 |
+
- If the passages have useful facts or numbers, use them in your answer.
|
155 |
+
- Documents are separated by "|"
|
156 |
+
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
157 |
+
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
158 |
+
- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
|
159 |
+
- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
|
160 |
+
- If it makes sense, use bullet points and lists to make your answers easier to understand.
|
161 |
+
- You do not need to use every passage. Only use the ones that help answer the question.
|
162 |
+
- If the documents do not have the information needed to answer the question, just say you do not have enough information.
|
163 |
+
"""
|
164 |
+
|
165 |
+
USER_PROMPT = """Passages:
|
166 |
+
{context}
|
167 |
+
-----------------------
|
168 |
+
Question: {question} - Explained to audit expert
|
169 |
+
Answer in english with the passages citations:
|
170 |
+
""".format(context = context_retrieved_lst, question=query)
|
171 |
+
|
172 |
+
messages = [
|
173 |
+
SystemMessage(content=SYSTEM_PROMPT),
|
174 |
+
HumanMessage(
|
175 |
+
content=USER_PROMPT
|
176 |
+
),]
|
177 |
+
|
178 |
+
###-----------------getting inference endpoints------------------------------
|
179 |
+
llm_qa = HuggingFaceEndpoint(
|
180 |
+
endpoint_url="https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud",
|
181 |
+
max_new_tokens=512,
|
182 |
+
top_k=10,
|
183 |
+
top_p=0.95,
|
184 |
+
typical_p=0.95,
|
185 |
+
temperature=0.01,
|
186 |
+
repetition_penalty=1.03,)
|
187 |
+
|
188 |
+
# create rag chain
|
189 |
+
chat_model = ChatHuggingFace(llm=llm_qa)
|
190 |
+
chain = chat_model | StrOutputParser()
|
191 |
+
|
192 |
+
###-------------------------- get answers ---------------------------------------
|
193 |
+
answer_lst = []
|
194 |
+
for question, context in zip(question_lst , context_retrieved_lst):
|
195 |
+
answer = chain.invoke(messages)
|
196 |
+
answer_lst.append(answer)
|
197 |
+
docs_html = []
|
198 |
+
for i, d in enumerate(context_retrieved, 1):
|
199 |
+
docs_html.append(make_html_source(d, i))
|
200 |
+
docs_html = "".join(docs_html)
|
201 |
+
|
202 |
+
previous_answer = history[-1][1]
|
203 |
+
previous_answer = previous_answer if previous_answer is not None else ""
|
204 |
+
answer_yet = previous_answer + answer_lst[0]
|
205 |
+
answer_yet = parse_output_llm_with_sources(answer_yet)
|
206 |
+
history[-1] = (query,answer_yet)
|
207 |
+
|
208 |
+
history = [tuple(x) for x in history]
|
209 |
|
210 |
yield history,docs_html
|
211 |
#process_pdf()
|