Spaces:
Sleeping
Sleeping
Dreamer (#16)
Browse files* added qa dreamer
* Added CoRE features
---------
Co-authored-by: Sai <[email protected]>
- .gitignore +2 -1
- app.py +101 -40
- prompts.py +145 -2
- qa_dreamer.py +62 -0
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
*.DS_Store
|
2 |
.streamlit/secrets.toml
|
3 |
-
config.py
|
|
|
|
1 |
*.DS_Store
|
2 |
.streamlit/secrets.toml
|
3 |
+
config.py
|
4 |
+
.history
|
app.py
CHANGED
@@ -6,9 +6,10 @@ import os
|
|
6 |
import json
|
7 |
import requests
|
8 |
from llm_reasoner import LLMReasoner
|
9 |
-
from prompts import templates
|
10 |
from typing import Any
|
11 |
from string import Template
|
|
|
12 |
|
13 |
def safe_parse_json(model_answer):
|
14 |
""".."""
|
@@ -49,7 +50,7 @@ def check_password():
|
|
49 |
del st.session_state["username"]
|
50 |
return
|
51 |
|
52 |
-
|
53 |
st.session_state["password_correct"] = False
|
54 |
|
55 |
# Return True if the username + password is validated.
|
@@ -67,6 +68,7 @@ def select_models():
|
|
67 |
|
68 |
retriever_options = ["Choose one...", "BM25 Retriever", "Off-the-shelf Retriever", "Finetuned Retriever", "No Retriever"]
|
69 |
reasoner_options = ["Choose one...", "Claude Sonnet", "GPT-4o", "o3-mini"]
|
|
|
70 |
|
71 |
#selectboxes
|
72 |
|
@@ -76,6 +78,12 @@ def select_models():
|
|
76 |
key="retriever"
|
77 |
)
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
reasoner = st.selectbox(
|
80 |
"Select the Reasoner Model",
|
81 |
reasoner_options,
|
@@ -88,14 +96,14 @@ def select_models():
|
|
88 |
# Check that both selections are not the placeholder.
|
89 |
if retriever == "Choose one..." or reasoner == "Choose one...":
|
90 |
st.info("Please select both a retriever and a reasoner.")
|
91 |
-
return None, None
|
92 |
else:
|
93 |
# Store the valid selections in session state
|
94 |
-
st.session_state["selected_models"] = (retriever, reasoner)
|
95 |
-
return retriever, reasoner
|
96 |
else:
|
97 |
st.info("Click 'Next' once you have made your selections.")
|
98 |
-
return None, None
|
99 |
|
100 |
def retriever(query: str, selected_retriever: str):
|
101 |
"""Simulate a 'retriever' step, searching for relevant information."""
|
@@ -144,14 +152,6 @@ def retriever(query: str, selected_retriever: str):
|
|
144 |
|
145 |
corpus = '\n\n'.join(topk_documents)
|
146 |
|
147 |
-
if "last_expander_id" not in st.session_state:
|
148 |
-
st.session_state.last_expander_id = 0
|
149 |
-
|
150 |
-
# Function to create a unique key for each expander
|
151 |
-
def get_expander_key():
|
152 |
-
st.session_state.last_expander_id += 1
|
153 |
-
return f"expander_{st.session_state.last_expander_id}"
|
154 |
-
|
155 |
print(f"Topk documents: {topk_documents}")
|
156 |
|
157 |
for idx, document in enumerate(topk_documents):
|
@@ -197,26 +197,90 @@ def retriever(query: str, selected_retriever: str):
|
|
197 |
# You could return retrieved info here.
|
198 |
return corpus
|
199 |
|
200 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
"""Simulate a 'reasoner' step, thinking about how to answer."""
|
202 |
with st.chat_message("assistant"):
|
203 |
placeholder = st.empty()
|
204 |
text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
if selected_reasoner == "Claude Sonnet":
|
207 |
-
message
|
208 |
elif selected_reasoner == "GPT-4o":
|
209 |
-
message
|
210 |
elif selected_reasoner == "o3-mini":
|
211 |
-
message
|
212 |
-
|
213 |
-
if not documents or len(documents) == 0:
|
214 |
-
prompt_template = Template(templates["no_evidence"])
|
215 |
-
prompt = prompt_template.substitute(claim=query)
|
216 |
-
print(prompt)
|
217 |
-
else:
|
218 |
-
prompt_template = Template(templates["with_evidence"])
|
219 |
-
prompt = prompt_template.substitute(claim=query, corpus_text=documents)
|
220 |
|
221 |
print(prompt)
|
222 |
|
@@ -230,12 +294,7 @@ def reasoner(query: str, documents: list[str], selected_reasoner: str, llm_clien
|
|
230 |
print(f"Error with parsing the returned {answer_dict}")
|
231 |
decision, reasoning = "", ""
|
232 |
|
233 |
-
|
234 |
-
text += chunk + " "
|
235 |
-
time.sleep(0.05)
|
236 |
-
# Add a blinking cursor to simulate typing
|
237 |
-
placeholder.markdown(text + "▌")
|
238 |
-
placeholder.markdown(text)
|
239 |
# You could return reasoning info here.
|
240 |
return reasoning, decision
|
241 |
|
@@ -247,25 +306,27 @@ def main():
|
|
247 |
st.stop()
|
248 |
|
249 |
if "selected_models" not in st.session_state:
|
250 |
-
selected_retriever, selected_reasoner = select_models()
|
251 |
# If valid selections are returned, store them and reset the change flag.
|
252 |
if selected_retriever is not None and selected_reasoner is not None:
|
253 |
-
st.session_state.selected_models = (selected_retriever, selected_reasoner)
|
254 |
st.rerun()
|
255 |
else:
|
256 |
st.stop() # Halt further execution until valid selections are made.
|
257 |
else:
|
258 |
-
selected_retriever, selected_reasoner = st.session_state.selected_models
|
259 |
|
260 |
# START OF AGENTIC DEMO
|
261 |
|
262 |
column1, column2 = st.columns(2)
|
263 |
column1.caption(f"Retriever Selected: {selected_retriever}")
|
|
|
264 |
column2.caption(f"Reasoner Selected: {selected_reasoner}")
|
265 |
|
266 |
if st.button("Change Selection", key="change_selection_btn"):
|
267 |
st.session_state.pop("selected_models", None)
|
268 |
st.session_state.pop("retriever", None)
|
|
|
269 |
st.session_state.pop("reasoner", None)
|
270 |
st.session_state.messages = [{"role": "assistant", "content": "Let's start verifying the claims here! 👇"}]
|
271 |
st.rerun()
|
@@ -340,7 +401,7 @@ def main():
|
|
340 |
if "auto_submit" in st.session_state and st.session_state.auto_submit:
|
341 |
prompt = st.session_state.auto_submit_text
|
342 |
|
343 |
-
display_message = prompt + " \n" + " \n" + f"Retriever: {selected_retriever}, Reasoner: {selected_reasoner}"
|
344 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
345 |
st.session_state.messages.append({"role": "summary", "content": display_message})
|
346 |
|
@@ -349,7 +410,7 @@ def main():
|
|
349 |
st.markdown(display_message)
|
350 |
|
351 |
retrieved_documents = retriever(prompt, selected_retriever)
|
352 |
-
reasoning, decision = reasoner(prompt, retrieved_documents, selected_reasoner, llm_client)
|
353 |
|
354 |
# Display assistant response in chat message container
|
355 |
with st.chat_message("assistant"):
|
@@ -360,7 +421,7 @@ def main():
|
|
360 |
elif decision.lower() == 'contradict':
|
361 |
assistant_response = f'The claim is INCORRECT because {reasoning}'
|
362 |
else:
|
363 |
-
assistant_response =
|
364 |
|
365 |
# Simulate stream of response with milliseconds delay
|
366 |
for chunk in assistant_response.split():
|
@@ -381,7 +442,7 @@ def main():
|
|
381 |
prompt = st.chat_input("Type your claim here")
|
382 |
if prompt:
|
383 |
# Add user message to chat history
|
384 |
-
display_message = prompt + " \n"+ " \n"+ f"Retriever: {selected_retriever}, Reasoner: {selected_reasoner}"
|
385 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
386 |
st.session_state.messages.append({"role": "summary", "content": display_message})
|
387 |
# Display user message in chat message container
|
@@ -389,7 +450,7 @@ def main():
|
|
389 |
st.markdown(display_message)
|
390 |
|
391 |
retrieved_documents = retriever(prompt, selected_retriever)
|
392 |
-
reasoning, decision = reasoner(prompt, retrieved_documents, selected_reasoner, llm_client)
|
393 |
|
394 |
# Display assistant response in chat message container
|
395 |
with st.chat_message("assistant"):
|
|
|
6 |
import json
|
7 |
import requests
|
8 |
from llm_reasoner import LLMReasoner
|
9 |
+
from prompts import templates, get_examples
|
10 |
from typing import Any
|
11 |
from string import Template
|
12 |
+
from qa_dreamer import get_questions
|
13 |
|
14 |
def safe_parse_json(model_answer):
|
15 |
""".."""
|
|
|
50 |
del st.session_state["username"]
|
51 |
return
|
52 |
|
53 |
+
# If authentication fails
|
54 |
st.session_state["password_correct"] = False
|
55 |
|
56 |
# Return True if the username + password is validated.
|
|
|
68 |
|
69 |
retriever_options = ["Choose one...", "BM25 Retriever", "Off-the-shelf Retriever", "Finetuned Retriever", "No Retriever"]
|
70 |
reasoner_options = ["Choose one...", "Claude Sonnet", "GPT-4o", "o3-mini"]
|
71 |
+
dreamer_options = ["None", "CoRE", "CoRE-Contrastive", "QA-Decomposition"]
|
72 |
|
73 |
#selectboxes
|
74 |
|
|
|
78 |
key="retriever"
|
79 |
)
|
80 |
|
81 |
+
dreamer = st.selectbox(
|
82 |
+
"Select the DREAMER",
|
83 |
+
dreamer_options,
|
84 |
+
key="dreamer"
|
85 |
+
)
|
86 |
+
|
87 |
reasoner = st.selectbox(
|
88 |
"Select the Reasoner Model",
|
89 |
reasoner_options,
|
|
|
96 |
# Check that both selections are not the placeholder.
|
97 |
if retriever == "Choose one..." or reasoner == "Choose one...":
|
98 |
st.info("Please select both a retriever and a reasoner.")
|
99 |
+
return None, None, None
|
100 |
else:
|
101 |
# Store the valid selections in session state
|
102 |
+
st.session_state["selected_models"] = (retriever, dreamer, reasoner)
|
103 |
+
return retriever, dreamer, reasoner
|
104 |
else:
|
105 |
st.info("Click 'Next' once you have made your selections.")
|
106 |
+
return None, None, None
|
107 |
|
108 |
def retriever(query: str, selected_retriever: str):
|
109 |
"""Simulate a 'retriever' step, searching for relevant information."""
|
|
|
152 |
|
153 |
corpus = '\n\n'.join(topk_documents)
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
print(f"Topk documents: {topk_documents}")
|
156 |
|
157 |
for idx, document in enumerate(topk_documents):
|
|
|
197 |
# You could return retrieved info here.
|
198 |
return corpus
|
199 |
|
200 |
+
def display_to_chat(placeholder, message):
|
201 |
+
"""Simulate a stream of messages in the chat."""
|
202 |
+
text = ""
|
203 |
+
import re
|
204 |
+
for chunk in re.split(r'(\s+)', message):
|
205 |
+
text += chunk + " "
|
206 |
+
time.sleep(0.05)
|
207 |
+
# Add a blinking cursor to simulate typing
|
208 |
+
placeholder.markdown(text + "▌")
|
209 |
+
placeholder.markdown(text)
|
210 |
+
|
211 |
+
def reasoner(query: str, documents: list[str], selected_dreamer: str, selected_reasoner: str, llm_client: Any):
|
212 |
"""Simulate a 'reasoner' step, thinking about how to answer."""
|
213 |
with st.chat_message("assistant"):
|
214 |
placeholder = st.empty()
|
215 |
text = ""
|
216 |
+
message = ""
|
217 |
+
|
218 |
+
# find the appropriate template
|
219 |
+
if selected_dreamer == 'None':
|
220 |
+
if not documents or len(documents) == 0:
|
221 |
+
prompt_template = Template(templates["no_evidence"])
|
222 |
+
prompt = prompt_template.substitute(claim=query)
|
223 |
+
print(prompt)
|
224 |
+
else:
|
225 |
+
prompt_template = Template(templates["with_evidence"])
|
226 |
+
prompt = prompt_template.substitute(claim=query, corpus_text=documents)
|
227 |
+
elif selected_dreamer == 'CoRE' or selected_dreamer == 'CoRE-Contrastive':
|
228 |
+
conditions = [""]
|
229 |
+
prompt_template = Template(templates["generate_condition"])
|
230 |
+
prompt = prompt_template.substitute(claim=query)
|
231 |
+
prompt = get_examples() + prompt + " answer: "
|
232 |
+
print(prompt)
|
233 |
+
|
234 |
+
message += f"Using {selected_dreamer} to analyze and verify the claim in detail..."
|
235 |
+
display_to_chat(placeholder, message)
|
236 |
+
placeholder = st.empty()
|
237 |
+
|
238 |
+
llm_response = llm_client.run_inference(prompt)
|
239 |
+
print(llm_response)
|
240 |
+
conditions = llm_response.split('\n\n')
|
241 |
+
print(conditions)
|
242 |
+
|
243 |
+
condition = conditions[0] if selected_dreamer == 'CoRE' else conditions[1]
|
244 |
+
|
245 |
+
message = "To reason about the claim, CoRE is using the condition: " + condition + "\n\n\n\n"
|
246 |
+
|
247 |
+
if not documents or len(documents) == 0:
|
248 |
+
prompt_template = Template(templates["with_condition"])
|
249 |
+
prompt = prompt_template.substitute(claim=query, condition=condition)
|
250 |
+
else:
|
251 |
+
prompt_template = Template(templates["with_evidence_condition"])
|
252 |
+
prompt = prompt_template.substitute(claim=query, corpus_text=documents, condition=condition)
|
253 |
+
elif selected_dreamer == 'QA-Decomposition':
|
254 |
+
message += "Decomposing into atomic questions...\n"
|
255 |
+
display_to_chat(placeholder, message)
|
256 |
+
placeholder = st.empty()
|
257 |
+
questions = get_questions(query)
|
258 |
+
message = questions + "\n\n\n\n"
|
259 |
+
|
260 |
+
message += "Now using the question decomposition to reason and verify the claim...\n\n"
|
261 |
+
|
262 |
+
if not documents or len(documents) == 0:
|
263 |
+
prompt_template = Template(templates["no_evidence_questions"])
|
264 |
+
prompt = prompt_template.substitute(claim=query, questions=questions)
|
265 |
+
print(prompt)
|
266 |
+
else:
|
267 |
+
prompt_template = Template(templates["with_evidence_questions"])
|
268 |
+
prompt = prompt_template.substitute(claim=query, corpus_text=documents, questions=questions)
|
269 |
+
|
270 |
+
else:
|
271 |
+
if not documents or len(documents) == 0:
|
272 |
+
prompt_template = Template(templates["no_evidence"])
|
273 |
+
prompt = prompt_template.substitute(claim=query)
|
274 |
+
else:
|
275 |
+
prompt_template = Template(templates["with_evidence"])
|
276 |
+
prompt = prompt_template.substitute(claim=query, corpus_text=documents)
|
277 |
|
278 |
if selected_reasoner == "Claude Sonnet":
|
279 |
+
message += "Using Claude Sonnet to reason and verify the claim..."
|
280 |
elif selected_reasoner == "GPT-4o":
|
281 |
+
message += "Using GPT-4o to analyze and verify the claim in detail..."
|
282 |
elif selected_reasoner == "o3-mini":
|
283 |
+
message += "Using o3-mini to quickly analyze the claim..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
print(prompt)
|
286 |
|
|
|
294 |
print(f"Error with parsing the returned {answer_dict}")
|
295 |
decision, reasoning = "", ""
|
296 |
|
297 |
+
display_to_chat(placeholder, message)
|
|
|
|
|
|
|
|
|
|
|
298 |
# You could return reasoning info here.
|
299 |
return reasoning, decision
|
300 |
|
|
|
306 |
st.stop()
|
307 |
|
308 |
if "selected_models" not in st.session_state:
|
309 |
+
selected_retriever, selected_dreamer, selected_reasoner = select_models()
|
310 |
# If valid selections are returned, store them and reset the change flag.
|
311 |
if selected_retriever is not None and selected_reasoner is not None:
|
312 |
+
st.session_state.selected_models = (selected_retriever, selected_dreamer, selected_reasoner)
|
313 |
st.rerun()
|
314 |
else:
|
315 |
st.stop() # Halt further execution until valid selections are made.
|
316 |
else:
|
317 |
+
selected_retriever, selected_dreamer, selected_reasoner = st.session_state.selected_models
|
318 |
|
319 |
# START OF AGENTIC DEMO
|
320 |
|
321 |
column1, column2 = st.columns(2)
|
322 |
column1.caption(f"Retriever Selected: {selected_retriever}")
|
323 |
+
column1.caption(f"Dreamer Selected: {selected_dreamer}")
|
324 |
column2.caption(f"Reasoner Selected: {selected_reasoner}")
|
325 |
|
326 |
if st.button("Change Selection", key="change_selection_btn"):
|
327 |
st.session_state.pop("selected_models", None)
|
328 |
st.session_state.pop("retriever", None)
|
329 |
+
st.session_state.pop("dreamer", None)
|
330 |
st.session_state.pop("reasoner", None)
|
331 |
st.session_state.messages = [{"role": "assistant", "content": "Let's start verifying the claims here! 👇"}]
|
332 |
st.rerun()
|
|
|
401 |
if "auto_submit" in st.session_state and st.session_state.auto_submit:
|
402 |
prompt = st.session_state.auto_submit_text
|
403 |
|
404 |
+
display_message = prompt + " \n" + " \n" + f"Retriever: {selected_retriever}, Dreamer: {selected_dreamer}, Reasoner: {selected_reasoner}"
|
405 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
406 |
st.session_state.messages.append({"role": "summary", "content": display_message})
|
407 |
|
|
|
410 |
st.markdown(display_message)
|
411 |
|
412 |
retrieved_documents = retriever(prompt, selected_retriever)
|
413 |
+
reasoning, decision = reasoner(prompt, retrieved_documents, selected_dreamer, selected_reasoner, llm_client)
|
414 |
|
415 |
# Display assistant response in chat message container
|
416 |
with st.chat_message("assistant"):
|
|
|
421 |
elif decision.lower() == 'contradict':
|
422 |
assistant_response = f'The claim is INCORRECT because {reasoning}'
|
423 |
else:
|
424 |
+
assistant_response = 'Sorry, the query failed due to an issue with connecting to the LLM service.'
|
425 |
|
426 |
# Simulate stream of response with milliseconds delay
|
427 |
for chunk in assistant_response.split():
|
|
|
442 |
prompt = st.chat_input("Type your claim here")
|
443 |
if prompt:
|
444 |
# Add user message to chat history
|
445 |
+
display_message = prompt + " \n"+ " \n"+ f"Retriever: {selected_retriever}, Dreamer: {selected_dreamer}, Reasoner: {selected_reasoner}"
|
446 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
447 |
st.session_state.messages.append({"role": "summary", "content": display_message})
|
448 |
# Display user message in chat message container
|
|
|
450 |
st.markdown(display_message)
|
451 |
|
452 |
retrieved_documents = retriever(prompt, selected_retriever)
|
453 |
+
reasoning, decision = reasoner(prompt, retrieved_documents, selected_dreamer, selected_reasoner, llm_client)
|
454 |
|
455 |
# Display assistant response in chat message container
|
456 |
with st.chat_message("assistant"):
|
prompts.py
CHANGED
@@ -1,3 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
templates = {
|
2 |
"no_evidence": (
|
3 |
"You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. "
|
@@ -45,5 +53,140 @@ templates = {
|
|
45 |
' "decision": "SUPPORT or CONTRADICT"\n'
|
46 |
"}\n\n"
|
47 |
"Now, please evaluate the claim above."
|
48 |
-
)
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def get_examples():
|
2 |
+
examples = [('A: Dan got a Sega Genesis.\n\nGenerate a pair of constrastive conditions which can help infer if the context in A is true.\nMake sure the first condition in the pair is supported by the context in A and the second contrastive condition is not supported by the context in A.\nThe conditions should not be stated or paraphrased from the context in A and you are allowed to use your internal knowledge.\nThe conditions should be stative expressions.\nDo not describe activities.\nMake sure the conditions in a pair are constrastive.\nDo not give an explanation.', "Rob was a child in the early 200's. \n\n Rob was a child in the late 80's or early 90's."), ("A: Kelsey's dog rested on the couch.\n\nGenerate a pair of constrastive conditions which can help infer if the context in A is true.\nMake sure the first condition in the pair is supported by the context in A and the second contrastive condition is not supported by the context in A.\nThe conditions should not be stated or paraphrased from the context in A and you are allowed to use your internal knowledge.\nThe conditions should be stative expressions.\nDo not describe activities.\nMake sure the conditions in a pair are constrastive.\nDo not give an explanation.", 'The couch cover was not durable. \n\n The couch cover was durable.')]
|
3 |
+
ret_str = ""
|
4 |
+
for prompt, value in examples:
|
5 |
+
ret_str += "example: " + prompt + " answer: " + str(value) + " "
|
6 |
+
return ret_str
|
7 |
+
|
8 |
+
|
9 |
templates = {
|
10 |
"no_evidence": (
|
11 |
"You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. "
|
|
|
53 |
' "decision": "SUPPORT or CONTRADICT"\n'
|
54 |
"}\n\n"
|
55 |
"Now, please evaluate the claim above."
|
56 |
+
),
|
57 |
+
|
58 |
+
"with_evidence_questions": """\
|
59 |
+
You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. Your job is to analyze a given claim along with provided supporting evidence (i.e. corpus articles) and decide whether the available evidence and your general medical knowledge would likely SUPPORT or CONTRADICT the claim. You are also given some questions that can help you analyze the claim and evidence.
|
60 |
+
|
61 |
+
Claim to evaluate:
|
62 |
+
<claim>
|
63 |
+
$claim
|
64 |
+
</claim>
|
65 |
+
|
66 |
+
Additional evidence provided:
|
67 |
+
<corpus_text>
|
68 |
+
$corpus_text
|
69 |
+
</corpus_text>
|
70 |
+
|
71 |
+
Questions to consider:
|
72 |
+
<questions>
|
73 |
+
$questions
|
74 |
+
</questions>
|
75 |
+
|
76 |
+
Guidelines:
|
77 |
+
1. Evaluate the claim's plausibility based on general medical knowledge.
|
78 |
+
2. Consider the quality and relevance of the provided evidence.
|
79 |
+
3. Analyze the context and determine if the evidence supports or contradicts the claim.
|
80 |
+
4. Assess any biases or limitations in the evidence.
|
81 |
+
5. Use the provided questions to guide your analysis.
|
82 |
+
|
83 |
+
After your analysis, output your response strictly as a JSON object with exactly two keys: "reasoning" and "decision". Do not include any additional commentary or keys.
|
84 |
+
|
85 |
+
Example output format:
|
86 |
+
{
|
87 |
+
"reasoning": "Your brief explanation here (one or two sentences).",
|
88 |
+
"decision": "SUPPORT or CONTRADICT"
|
89 |
+
}
|
90 |
+
|
91 |
+
Now, please evaluate the claim above.
|
92 |
+
""",
|
93 |
+
|
94 |
+
"no_evidence_questions": """\
|
95 |
+
You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. Your job is to analyze a given claim and decide whether the available evidence and your general medical knowledge would likely SUPPORT or CONTRADICT the claim. You are also given some questions that can help you analyze the claim and evidence.
|
96 |
+
|
97 |
+
Claim to evaluate:
|
98 |
+
<claim>
|
99 |
+
$claim
|
100 |
+
</claim>
|
101 |
+
|
102 |
+
Questions to consider:
|
103 |
+
<questions>
|
104 |
+
$questions
|
105 |
+
</questions>
|
106 |
+
|
107 |
+
Guidelines:
|
108 |
+
1. Evaluate the claim's plausibility based on general medical knowledge.
|
109 |
+
2. Consider the specificity and credibility of any numbers or percentages.
|
110 |
+
3. Analyze the context and scope of the claim.
|
111 |
+
4. Assess any potential biases or limitations.
|
112 |
+
5. Use the provided questions to guide your analysis.
|
113 |
+
|
114 |
+
After your analysis, output your response strictly as a JSON object with exactly two keys: "reasoning" and "decision". Do not include any additional commentary or keys.
|
115 |
+
|
116 |
+
Example output format:
|
117 |
+
{
|
118 |
+
"reasoning": "Your brief explanation here (one or two sentences).",
|
119 |
+
"decision": "SUPPORT or CONTRADICT"
|
120 |
+
}
|
121 |
+
|
122 |
+
Now, please evaluate the claim above.
|
123 |
+
""",
|
124 |
+
|
125 |
+
"with_condition": (
|
126 |
+
"You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. "
|
127 |
+
"Your job is to analyze a given claim and decide whether the known condition, available evidence and your general medical knowledge would likely SUPPORT or CONTRADICT the claim.\n\n"
|
128 |
+
"Claim to evaluate:\n"
|
129 |
+
"<claim>\n"
|
130 |
+
"$claim\n"
|
131 |
+
"</claim>\n\n"
|
132 |
+
"Known Condition:\n"
|
133 |
+
"<condition>\n"
|
134 |
+
"$condition\n"
|
135 |
+
"</condition>\n\n"
|
136 |
+
"Guidelines:\n"
|
137 |
+
"1. Evaluate the claim's plausibility based on general medical knowledge.\n"
|
138 |
+
"2. Consider the specificity and credibility of any numbers or percentages.\n"
|
139 |
+
"3. Analyze the context and scope of the claim.\n"
|
140 |
+
"4. Examine the provided condition and use it as additional help for assessing the claim's plausibility.\n"
|
141 |
+
"5. Assess any potential biases or limitations.\n\n"
|
142 |
+
"After your analysis, output your response strictly as a JSON object with exactly two keys: \"reasoning\" and \"decision\". "
|
143 |
+
"Do not include any additional commentary or keys.\n\n"
|
144 |
+
"Example output format:\n"
|
145 |
+
"{\n"
|
146 |
+
' "reasoning": "Your brief explanation here (one or two sentences).",\n'
|
147 |
+
' "decision": "SUPPORT or CONTRADICT"\n'
|
148 |
+
"}\n\n"
|
149 |
+
"Now, please evaluate the claim above."
|
150 |
+
),
|
151 |
+
"with_evidence_condition": (
|
152 |
+
"You are an AI model tasked with verifying claims related to medical and health topics using zero-shot learning. "
|
153 |
+
"Your job is to analyze a given claim along with provided supporting evidence (i.e. corpus articles) and decide whether "
|
154 |
+
"the known condition, available evidence and your general medical knowledge would likely SUPPORT or CONTRADICT the claim.\n\n"
|
155 |
+
"Claim to evaluate:\n"
|
156 |
+
"<claim>\n"
|
157 |
+
"$claim\n"
|
158 |
+
"</claim>\n\n"
|
159 |
+
"Additional evidence provided:\n"
|
160 |
+
"<corpus_text>\n"
|
161 |
+
"$corpus_text\n"
|
162 |
+
"</corpus_text>\n\n"
|
163 |
+
"Known Condition:\n"
|
164 |
+
"<condition>\n"
|
165 |
+
"$condition\n"
|
166 |
+
"</condition>\n\n"
|
167 |
+
"Guidelines:\n"
|
168 |
+
"1. Evaluate the claim's plausibility based on general medical knowledge.\n"
|
169 |
+
"2. Consider the quality and relevance of the provided evidence.\n"
|
170 |
+
"3. Analyze the context and determine if the evidence supports or contradicts the claim.\n"
|
171 |
+
"4. Assess any biases or limitations in the evidence.\n"
|
172 |
+
"5. Examine the provided condition and use it as additional help for assessing the claim's plausibility.\n\n"
|
173 |
+
"After your analysis, output your response strictly as a JSON object with exactly two keys: \"reasoning\" and \"decision\". "
|
174 |
+
"Do not include any additional commentary or keys.\n\n"
|
175 |
+
"Example output format:\n"
|
176 |
+
"{\n"
|
177 |
+
' "reasoning": "Your brief explanation here (one or two sentences).",\n'
|
178 |
+
' "decision": "SUPPORT or CONTRADICT"\n'
|
179 |
+
"}\n\n"
|
180 |
+
"Now, please evaluate the claim above."
|
181 |
+
),
|
182 |
+
"generate_condition": (
|
183 |
+
"A: $claim\n\n"
|
184 |
+
"Generate a pair of constrastive conditions which can help infer if the context in A is true.\n"
|
185 |
+
"Make sure the first condition in the pair is supported by the context in A and the second contrastive condition is not supported by the context in A.\n"
|
186 |
+
"The conditions should not be stated or paraphrased from the context in A and you are allowed to use your internal knowledge.\n"
|
187 |
+
"The conditions should be stative expressions.\n"
|
188 |
+
"Do not describe activities.\n"
|
189 |
+
"Make sure the conditions in a pair are constrastive.\n"
|
190 |
+
"Do not give an explanation."
|
191 |
+
)
|
192 |
+
}
|
qa_dreamer.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from tenacity import (
|
3 |
+
retry,
|
4 |
+
stop_after_attempt,
|
5 |
+
wait_random_exponential,
|
6 |
+
)
|
7 |
+
|
8 |
+
|
9 |
+
PQA_QUESTION2QUESTION_PROMPT = """\
|
10 |
+
Given a question, decompose it into multiple atomic questions.
|
11 |
+
|
12 |
+
### Instructions:
|
13 |
+
- The questions MUST be atomic, i.e., they MUST be answerable by only a single piece of information.
|
14 |
+
- The questions MUST be standalone, i.e., they MUST NOT reference any other question or the given question.
|
15 |
+
- The questions can be both open-ended or yes/no questions.
|
16 |
+
- The questions should be decomposed only from the main question.
|
17 |
+
- Each question should be on a new line and start with `**** `.
|
18 |
+
|
19 |
+
### Input:
|
20 |
+
{text}
|
21 |
+
|
22 |
+
### Output:
|
23 |
+
"""
|
24 |
+
|
25 |
+
qa_client = OpenAI(
|
26 |
+
base_url="http://130.85.37.21:4774/v1",
|
27 |
+
api_key="EMPTY"
|
28 |
+
)
|
29 |
+
|
30 |
+
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(7))
|
31 |
+
def completion_with_backoff(client, **kwargs):
|
32 |
+
return client.chat.completions.create(**kwargs)
|
33 |
+
|
34 |
+
def generation_to_questions(generated_text, header, numbered=True):
|
35 |
+
try:
|
36 |
+
lines = generated_text.split("\n")
|
37 |
+
lines = [line.strip() for line in lines]
|
38 |
+
lines = [line for line in lines if line.startswith(header)]
|
39 |
+
lines = [line.replace(header, "").strip() for line in lines]
|
40 |
+
except:
|
41 |
+
lines = []
|
42 |
+
print("Error in processing generated text")
|
43 |
+
return lines
|
44 |
+
|
45 |
+
def get_questions(claim):
|
46 |
+
prompt = PQA_QUESTION2QUESTION_PROMPT.format(text=claim)
|
47 |
+
print(f"Question Generation Prompt: {prompt}")
|
48 |
+
response = completion_with_backoff(
|
49 |
+
client=qa_client,
|
50 |
+
model="dipta007/Llama-3.1-8B-Instruct-finetuned-pqa",
|
51 |
+
messages=[
|
52 |
+
{"role": "user", "content": prompt},
|
53 |
+
],
|
54 |
+
max_tokens=2048,
|
55 |
+
top_p=1.0,
|
56 |
+
temperature=0.0,
|
57 |
+
)
|
58 |
+
print(f"Questions: {response.choices[0].message}")
|
59 |
+
generation = response.choices[0].message.content
|
60 |
+
questions = generation_to_questions(generation, "****")
|
61 |
+
questions = [f"- {q}" for q in questions]
|
62 |
+
return "\n".join(questions)
|