Spaces:
Sleeping
Sleeping
File size: 15,711 Bytes
0703e71 68c11f0 e504a30 68c11f0 d232ed1 5b6755d 901a87e 6efea88 d232ed1 0703e71 5b6755d 60ffe71 210b40c 090dd00 210b40c 090dd00 210b40c 090dd00 210b40c f21f2ed 31afd70 fbccd18 e200600 fbccd18 31afd70 e71a07e f15b0bc f21f2ed 27e1849 91ad5b7 27e1849 c73afd0 7cf55c5 91ad5b7 27e1849 91ad5b7 27e1849 7cf55c5 91ad5b7 f21f2ed 91ad5b7 f21f2ed 0a402b2 f21f2ed 0a402b2 0108e87 f21f2ed 0bb5671 1230f78 6d1bb98 ed9ab49 8de2cbd 0edf770 ed9ab49 6d1bb98 1418604 ec09f15 1418604 a1f7b63 1418604 a1f7b63 37528ea e899803 1418604 6b786ce eb7b56f 1aa6e35 090dd00 bc3ac0f 5b6755d 7f45e1c 76c4bfe 8672bbc 2cdd4cb 9bd0a5d 1861169 87d4d46 d232ed1 e504a30 901a87e e504a30 901a87e e504a30 6efea88 7144bca 6efea88 f41fff5 e504a30 34e4cb8 6fe41d2 49a1729 e504a30 901a87e 6efea88 31afd70 34e4cb8 e504a30 82e1308 e504a30 f41fff5 31afd70 32e8378 f41fff5 50d1738 f2c7470 f21f2ed f2c7470 c43c3ad 2ca5664 b9132f9 0b27a4e 1230f78 034edb9 4a543db 9ecbe8e 4cb73b2 2ca5664 d5478e2 19b3d90 db1bb98 ed9ab49 0bb5671 2d01a69 f41fff5 1418604 e504a30 32e8378 bd26917 1418604 d232ed1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
import os
import re
import io
from typing import List, Tuple, Union
from pathlib import Path
import gradio as gr
import openai
import pymupdf
from docx import Document
HF_TOKEN = os.environ.get("HF_TOKEN", None)
LEPTON_API_TOKEN = os.environ.get("LEPTON_API_TOKEN", None)
PROMPT = """
Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: "PASS" if the answer is faithful to the DOCUMENT and "FAIL" if the answer is not faithful to the DOCUMENT. Show your reasoning.
--
QUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):
{question}
--
DOCUMENT:
{document}
--
ANSWER:
{answer}
--
Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
{{"REASONING": <your reasoning as bullet points>, "SCORE": <your final score>}}
"""
EXAMPLES = [
{
"emoji": "🎬",
"question": "Who directed the film that Amanda Brooks starred in in 2007?",
"document": '''Amanda Brooks (born June 22, 1981) is an actress who starred in the 2007 film "D-War" and in the 2006 film "Cut Off".D-War (Korean: 디워, released in North America as Dragon Wars: D-War), is a 2007 South Korean action-adventure fantasy film written and directed by Shim Hyung-rae, and starring Jason Behr, Amanda Brooks, Robert Forster, and Elizabeth Peña.''',
"answer": "The film that Amanda Brooks starred in was directed by Steven Spielberg."
},
{
"emoji": "🎾",
"question": "Who lost to Petrova in the Australian Open?",
"document": "Coming off their road win over the Redskins, the Chiefs went home, donned their Dallas Texans throwbacks, and played a Week 7 AFL Legacy game with the San Diego Chargers. Kansas City would find themselves trailing in the first quarter as Chargers quarterback Philip Rivers completed a 3-yard touchdown pass to wide receiver Malcom Floyd, followed by a 10-yard touchdown pass to wide receiver Vincent Jackson. San Diego would add onto their lead in the second quarter with a 20-yard and a 39-yard field goal from kicker Nate Kaeding. The Chiefs would get onto the board in the third quarter with quarterback Matt Cassel completing a 7-yard touchdown pass to wide receiver Dwayne Bowe, but the Chargers kept their momentum going with Rivers finding running back Darren Sproles on a 58-yard touchdown pass. In the fourth quarter, San Diego sealed the win with Kaeding's 19-yard field goal and fullback Jacob Hester recovering a blocked punt in the end zone for a touchdown. With the loss, Kansas City went into their bye week at 1-6. Larry Johnson was suspended for two weeks after he made offensive comments about Todd Haley and made offensive comments about homosexuals on Twitter and in public.",
"answer": "['Maria Sharapova', 'Venus Williams', 'Svetlana Kuznetsova']"
},
{
"emoji": "⛪️",
"question": "Whose Apology was rejected by Charles V?",
"document": "In the course of the Lutheran Reformation numerous Imperial States had adopted the new confession, against the opposition of the ruling Catholic House of Habsburg, who recognised these conversions as a quest for increasing autonomy to the detriment of the central Imperial authority. At the 1521 Diet of Worms Emperor Charles V had Martin Luther banned and the proliferation of his writings prohibited, which in 1529 provoked the Protestation at Speyer by several Lutheran estates. The tensions culminated to an open conflict over the Lutheran Augsburg Confession of 1530, the Apology of which, written by Philipp Melanchthon, was rejected by the Emperor. In turn several Lutheran states led by Elector John Frederick I of Saxony and Landgrave Philip I of Hesse met at the town of Schmalkalden, where they established the Schmalkaldic League in 1531. In 1544 Charles V returned to Germany from the Italian War after he had signed the Treaty of Crépy and began to forge alliances not only with Pope Paul III but also with Lutheran princes, foremost with Duke Maurice of Saxony, the Albertine cousin of Saxon Elector John Frederick I. In view of the Emperor's preparations for battle, the Schmalkaldic leaders on 4 July 1546 gathered at Ichtershausen and agreed that a preventive strike would be advisable, as long as Charles V had not concentrated a significant amount of mercenaries. The Emperor gathered an army of around 52,000 men for his campaign, that was to start on the Danube.",
"answer": "Martin Luther"
},
{
"emoji": "🪖",
"question": "What battle started first: Battle of Vienna or Battle of Mohács?",
"document": "The Siege of Vienna in 1529 was the first attempt by the Ottoman Empire, led by Suleiman the Magnificent, to capture the city of Vienna, Austria. The siege signalled the pinnacle of the Ottoman Empire's power and the maximum extent of Ottoman expansion in central Europe. Thereafter, 150 years of bitter military tension and reciprocal attacks ensued, culminating in the Battle of Vienna of 1683, which marked the start of the 15-year-long Great Turkish War. The inability of the Ottomans to capture Vienna in 1529 turned the tide against almost a century of conquest throughout eastern and central Europe. The Ottoman Empire had previously annexed Central Hungary and established a vassal state in Transylvania in the wake of the Battle of Mohács. According to Arnold J. Toynbee, 'The failure of the first brought to a standstill the tide of Ottoman conquest which had been flooding up the Danube Valley for a century past.' There is speculation by some historians that Suleiman's main objective in 1529 was actually to assert Ottoman control over the whole of Hungary, the western part of which was under Habsburg control. The decision to attack Vienna after such a long interval in Suleiman's European campaign is viewed as an opportunistic manoeuvre after his decisive victory in Hungary. Other scholars theorise that the suppression of Hungary simply marked the prologue to a later, premeditated invasion of Europe.",
"answer": "Battle of Vienna"
},
{
"emoji": "🏰",
"question": "What American businessman and guided the character Epick Mickey?",
"document": "The character was regained by The Walt Disney Company in 2006 under the guidance of Bob Iger.Robert Allen Iger ( ; born February 10, 1951) is an American businessman who is chairman and chief executive officer (CEO) of The Walt Disney Company.",
"answer": "Walt Disney himself guided Epic Mickey."
}
]
with open('logo.svg', 'r') as file:
logo_svg = file.read()
HEADER = f"""
# Patronus Lynx Demo
<table bgcolor="#1E2432" cellspacing="0" cellpadding="0" width="450">
<tr>
<td style="text-align: center;">
<a href="https://www.patronus.ai/blog/lynx-state-of-the-art-open-source-hallucination-detection-model">
<img src="https://postimage.me/images/2024/07/30/lynx.png" width="150">
<h1 style="color: white; font-size: 1.5rem; font-weight: 600;">
Lynx 8B v1.1 & Lynx 70B
</h1>
</a>
</td>
</tr>
<tr style="height:50px;">
<td style="text-align: center;">
<a href="https://www.patronus.ai">
<svg>{logo_svg}</svg>
</a>
</td>
</tr>
</table>
<table bgcolor="#1E2432" cellspacing="0" cellpadding="0" width="450">
<tr style="height:30px;">
<td style="text-align: center;">
<a href="https://huggingface.co/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct">
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange" height="20">
</a>
</td>
<td style="text-align: center;">
<a href="https://github.com/patronus-ai/Lynx-hallucination-detection">
<img src="https://postimage.me/images/2024/03/04/GitHub_Logo_White.png" width="100" height="20">
</a>
</td>
<td style="text-align: center; color: white;">
<a href="https://arxiv.org/abs/2407.08488">
<img src="https://img.shields.io/badge/arXiv-2407.08488-b31b1b.svg" height="20">
</a>
</td>
</tr>
</table>
**Patronus Lynx** is a state-of-the-art open-source model for hallucination detection.
**Getting Started**: Provide a question and document or context given to your model in addition to the answer given by the model and then click submit. The output panel will indicate whether the reponse is a hallucination (Fail) or if it is faithful to the given document or context (Pass) through the score Pass or Fail and provide reasoning behind the score.
"""
EXAMPLES_HEADER = """
# Try it Yourself!
"""
UPLOADABLE_FILE_TYPES = [".pdf", ".txt", ".docx", ".doc"]
css = """
.example-button {
width: fit-content;
font-size: 1rem;
font-weight: 400 !important;
padding: .5rem 1rem;
}
"""
def update_client_base_url(model_name):
if model_name == "Patronus Lynx 8B v1.1":
return "https://yb15a7dy-patronus-lynx-8b-v1-1.tin.lepton.run/api/v1/"
elif model_name == "Patronus Lynx 70B":
return "https://yb15a7dy-lynx-70b.tin.lepton.run/api/v1/"
def parse_patronus_lynx_response(
response: str,
) -> Tuple[bool, Union[List[str], None]]:
"""
Parses the response from the Patronus Lynx LLM and returns a tuple of:
- Whether the response is hallucinated or not.
- A reasoning trace explaining the decision.
"""
# Default to hallucinated
hallucination, reasoning = True, None
reasoning_pattern = r'"REASONING":\s*\[(.*?)\]'
score_pattern = r'"SCORE":\s*"?\b(PASS|FAIL)\b"?'
reasoning_match = re.search(reasoning_pattern, response, re.DOTALL)
score_match = re.search(score_pattern, response)
if score_match:
score = score_match.group(1)
if score == "PASS":
hallucination = False
if reasoning_match:
reasoning_content = reasoning_match.group(1)
reasoning = re.split(r"['\"],\s*['\"]", reasoning_content)
return hallucination, reasoning
def model_call(question, document, answer, client_base_url):
client = openai.OpenAI(
base_url=client_base_url,
api_key=LEPTON_API_TOKEN
)
print("CLIENT AND CLIENT BASE URL", client, client_base_url)
if question == "" or document == "" or answer == "":
return "", ""
NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
print("ENTIRE NEW_FORMAT", NEW_FORMAT)
response = client.completions.create(
model="gpt-3.5-turbo-instruct",
prompt=NEW_FORMAT,
temperature=0.0
)
print("RESPONSE FROM CLIENT:", response)
hallucination, reasoning = parse_patronus_lynx_response(response.choices[0].text)
score = "🔴 FAIL 🔴" if hallucination else "🟢 PASS 🟢"
combined_reasoning = " ".join(reasoning)[1:-1]
return combined_reasoning, score
def get_filetype(filename):
return filename.split(".")[-1]
def extract_text_pymupdf(file):
with pymupdf.open(file) as pdf_or_txt:
text = ""
for page in pdf_or_txt:
text += page.get_text()
return text
def extract_text_python_docx(file):
doc = Document(file)
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + '\n'
return text.strip()
def upload_file(filepath):
extracted_file_text = ""
if filepath is not None:
name = Path(filepath).name
print("FILEPATH & file name", filepath, name)
print("FILEPATH type & file name type", type(filepath), type(name))
filetype = get_filetype(name)
# conditionals for filetype and function call
if filetype == "pdf" or filetype == "txt":
extracted_file_text = extract_text_pymupdf(filepath)
elif filetype == "docx":
extracted_file_text = extract_text_python_docx(filepath)
return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
else:
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
def reset_buttons():
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), gr.Textbox(value="")]
def select_template(template):
return template["question"], template["document"], template["answer"]
# def download_file():
# return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
with gr.Blocks(css=css, theme=gr.themes.Default(spacing_size="sm", font=[gr.themes.GoogleFont("Plus Jakarta Sans"), "Arial", "sans-serif"], primary_hue="indigo", secondary_hue="purple")) as demo:
base_url_state = gr.State(update_client_base_url("Patronus Lynx 8B v1.1"))
gr.Markdown(HEADER)
model_dropdown = gr.Dropdown(choices=["Patronus Lynx 8B v1.1", "Patronus Lynx 70B"], value="Patronus Lynx 8B v1.1", label="Model", interactive=True)
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
question = gr.Textbox(label="Question")
with gr.Row():
document = gr.Textbox(label="Document", scale=9)
u = gr.UploadButton("Upload", visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES, scale=1)
file_group = gr.Group(elem_classes="fixed-height-button", visible=False)
with file_group:
file_name = gr.Markdown("")
c = gr.ClearButton([u, file_name])
# d = gr.DownloadButton("Download the file", visible=False, scale=1)
with gr.Row():
answer = gr.Textbox(label="Answer")
with gr.Row():
clear_btn = gr.ClearButton([question, document, answer])
submit_button = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
reasoning = gr.Textbox(label="Reasoning")
score = gr.Textbox(label="Score (FAIL if Hallucinated, PASS if not)")
gr.Markdown(" ")
gr.Markdown(EXAMPLES_HEADER)
with gr.Row():
with gr.Column():
for _, example in enumerate(EXAMPLES):
template_btn = gr.Button(f"{example['emoji']} {example['question']}", elem_classes="example-button")
template_btn.click(
fn=select_template,
inputs=[gr.State(example)],
outputs=[question, document, answer]
)
model_dropdown.change(fn=update_client_base_url, inputs=[model_dropdown], outputs=[base_url_state])
u.upload(upload_file, u, [u, file_group, file_name, document])
c.click(reset_buttons, None, [u, file_group, file_name, document])
# d.click(download_file, None, [u, d])
submit_button.click(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
question.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
document.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
answer.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
demo.launch()
|