File size: 17,093 Bytes
0703e71
68c11f0
e504a30
68c11f0
 
d232ed1
5b6755d
901a87e
6efea88
d232ed1
0703e71
5b6755d
60ffe71
a1f7b63
 
 
 
7be224f
 
 
 
5b6755d
210b40c
 
 
 
 
090dd00
210b40c
 
 
090dd00
210b40c
 
 
090dd00
210b40c
 
 
 
 
 
f21f2ed
31afd70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b6505c
 
 
 
 
 
034edb9
 
6b5935b
034edb9
5b6505c
 
f21f2ed
 
 
 
 
 
 
 
 
 
 
 
 
 
a73aede
f21f2ed
 
a73aede
f21f2ed
 
 
 
 
 
0a402b2
f21f2ed
0a402b2
0108e87
f21f2ed
 
1230f78
 
1418604
ec09f15
1418604
a1f7b63
1418604
a1f7b63
37528ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e899803
1418604
 
 
 
 
6b786ce
eb7b56f
1aa6e35
090dd00
bc3ac0f
5b6755d
 
7f45e1c
 
76c4bfe
8672bbc
2cdd4cb
 
1861169
87d4d46
d232ed1
e504a30
 
 
901a87e
 
e504a30
901a87e
 
e504a30
 
6efea88
7144bca
6efea88
 
 
 
 
f41fff5
e504a30
34e4cb8
 
6fe41d2
49a1729
e504a30
 
901a87e
 
6efea88
 
31afd70
34e4cb8
e504a30
32e8378
82e1308
e504a30
f41fff5
31afd70
 
 
32e8378
 
f41fff5
6b786ce
 
 
 
 
 
 
 
 
bd26917
5b6505c
f2c7470
f21f2ed
6b786ce
f2c7470
c43c3ad
 
f575d58
0b27a4e
b9132f9
0b27a4e
1230f78
034edb9
4a543db
9ecbe8e
 
4cb73b2
f575d58
32e8378
d5478e2
 
 
31afd70
 
 
 
 
 
 
 
 
c43c3ad
 
2cdd4cb
f41fff5
1418604
e504a30
 
32e8378
bd26917
1418604
 
 
 
d232ed1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import os
import re
import io
from typing import List, Tuple, Union
from pathlib import Path
import gradio as gr
import openai
import pymupdf
from docx import Document

HF_TOKEN = os.environ.get("HF_TOKEN", None)
LEPTON_API_TOKEN = os.environ.get("LEPTON_API_TOKEN", None)

# client=openai.OpenAI(
#     base_url="https://yb15a7dy-patronus-lynx-8b-v1-1.tin.lepton.run/api/v1/",
#     api_key=LEPTON_API_TOKEN
# )
# client=openai.OpenAI(
#     base_url="https://yb15a7dy-lynx-70b.tin.lepton.run/api/v1/",
#     api_key=LEPTON_API_TOKEN
# )

PROMPT = """
Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: "PASS" if the answer is faithful to the DOCUMENT and "FAIL" if the answer is not faithful to the DOCUMENT. Show your reasoning.

--
QUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):
{question}

--
DOCUMENT:
{document}

--
ANSWER:
{answer}

--

Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
{{"REASONING": <your reasoning as bullet points>, "SCORE": <your final score>}}
"""

EXAMPLES = [
    {
        "emoji": "🏈",
        "question": "Which player had the longest touchdown play of the first half?",
        "document": "The Browns dropped their 13th consecutive season-opening game with a 21-18 loss to the Steelers. The scoring began early in the first quarter when Pittsburgh's Tyler Matakevich blocked a Britton Colquitt punt, knocking the ball into the end zone where it was recovered by Anthony Chickillo for a touchdown. The Browns were able to tie the score by the end of the first quarter, however, as rookie quarterback DeShone Kizer orchestrated a 12-play drive that ended when he scored on a 1-yard touchdown run. The game stayed at 7-7 through most of the second quarter, until the Steelers quarterback Ben Roethlisberger connected with tight end Jesse James on a 4-yard touchdown with 45 seconds left in the first half. The 7-play, 91-yard scoring drive was highlighted by a 50-yard reception by Antonio Brown on a tipped ball. After a Zane Gonzalez field goal brought the Browns within 14-10 early in the third quarter, the Steelers drove down the field again. Roethlisberger threw a second touchdown pass to James to put them up by 11, 21-10. The drive was boosted by a 41-yard pass interference penalty on Browns cornerback Jamar Taylor on a deep pass intended for Brown. The score remained 21-10 until under four minutes remained in the game, when Kizer was able to throw his first career touchdown pass to Corey Coleman. A two-point conversion run by Isaiah Crowell brought the Browns to within a field goal, 21-18. The Browns did not get a chance to score again, however, as the Steelers were able to run out the clock after a long pass from Roethlisberger to Brown. Browns head coach Hue Jackson challenged the ruling; however, the catch call on the field was upheld. The 13th-straight season-opening loss extended an NFL record for the Browns, who also fell to 1-18 in season openers since returning to the league in 1999. The team also lost their 12th straight game against a divisional opponent. The team started 0-1.",
        "answer": "['Antonio Brown']"
    },
    {
        "emoji": "🎾",
        "question": "Who lost to Petrova in the Australian Open?",
        "document": "Coming off their road win over the Redskins, the Chiefs went home, donned their Dallas Texans throwbacks, and played a Week 7 AFL Legacy game with the San Diego Chargers. Kansas City would find themselves trailing in the first quarter as Chargers quarterback Philip Rivers completed a 3-yard touchdown pass to wide receiver Malcom Floyd, followed by a 10-yard touchdown pass to wide receiver Vincent Jackson. San Diego would add onto their lead in the second quarter with a 20-yard and a 39-yard field goal from kicker Nate Kaeding. The Chiefs would get onto the board in the third quarter with quarterback Matt Cassel completing a 7-yard touchdown pass to wide receiver Dwayne Bowe, but the Chargers kept their momentum going with Rivers finding running back Darren Sproles on a 58-yard touchdown pass. In the fourth quarter, San Diego sealed the win with Kaeding's 19-yard field goal and fullback Jacob Hester recovering a blocked punt in the end zone for a touchdown. With the loss, Kansas City went into their bye week at 1-6. Larry Johnson was suspended for two weeks after he made offensive comments about Todd Haley and made offensive comments about homosexuals on Twitter and in public.",
        "answer": "['Maria Sharapova', 'Venus Williams', 'Svetlana Kuznetsova']"
    },
    {
        "emoji": "⛪️",
        "question": "Whose Apology was rejected by Charles V?",
        "document": "In the course of the Lutheran Reformation numerous Imperial States had adopted the new confession, against the opposition of the ruling Catholic House of Habsburg, who recognised these conversions as a quest for increasing autonomy to the detriment of the central Imperial authority. At the 1521 Diet of Worms Emperor Charles V had Martin Luther banned and the proliferation of his writings prohibited, which in 1529 provoked the Protestation at Speyer by several Lutheran estates. The tensions culminated to an open conflict over the Lutheran Augsburg Confession of 1530, the Apology of which, written by Philipp Melanchthon, was rejected by the Emperor. In turn several Lutheran states led by Elector John Frederick I of Saxony and Landgrave Philip I of Hesse met at the town of Schmalkalden, where they established the Schmalkaldic League in 1531. In 1544 Charles V returned to Germany from the Italian War after he had signed the Treaty of Crépy and began to forge alliances not only with Pope Paul III but also with Lutheran princes, foremost with Duke Maurice of Saxony, the Albertine cousin of Saxon Elector John Frederick I. In view of the Emperor's preparations for battle, the Schmalkaldic leaders on 4 July 1546 gathered at Ichtershausen and agreed that a preventive strike would be advisable, as long as Charles V had not concentrated a significant amount of mercenaries. The Emperor gathered an army of around 52,000 men for his campaign, that was to start on the Danube.",
        "answer": "Martin Luther"
    },
    {
        "emoji": "🪖",
        "question": "What battle started first: Battle of Vienna or Battle of Mohács?",
        "document": "The Siege of Vienna in 1529 was the first attempt by the Ottoman Empire, led by Suleiman the Magnificent, to capture the city of Vienna, Austria. The siege signalled the pinnacle of the Ottoman Empire's power and the maximum extent of Ottoman expansion in central Europe. Thereafter, 150 years of bitter military tension and reciprocal attacks ensued, culminating in the Battle of Vienna of 1683, which marked the start of the 15-year-long Great Turkish War. The inability of the Ottomans to capture Vienna in 1529 turned the tide against almost a century of conquest throughout eastern and central Europe. The Ottoman Empire had previously annexed Central Hungary and established a vassal state in Transylvania in the wake of the Battle of Mohács. According to Arnold J. Toynbee, 'The failure of the first brought to a standstill the tide of Ottoman conquest which had been flooding up the Danube Valley for a century past.' There is speculation by some historians that Suleiman's main objective in 1529 was actually to assert Ottoman control over the whole of Hungary, the western part of which was under Habsburg control. The decision to attack Vienna after such a long interval in Suleiman's European campaign is viewed as an opportunistic manoeuvre after his decisive victory in Hungary. Other scholars theorise that the suppression of Hungary simply marked the prologue to a later, premeditated invasion of Europe.",
        "answer": "Battle of Vienna"
    },
    {
        "emoji": "🏰",
        "question": "What American businessman and guided the character Epick Mickey?",
        "document": "The character was regained by The Walt Disney Company in 2006 under the guidance of Bob Iger.Robert Allen Iger ( ; born February 10, 1951) is an American businessman who is chairman and chief executive officer (CEO) of The Walt Disney Company.",
        "answer": "Walt Disney himself guided Epic Mickey."
    }
]

css = """
@import url('https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;700&display=swap');

body, .gradio-container {
    font-family: 'Plus Jakarta Sans', sans-serif !important;
}

.fixed-height-button {
    height: 150px;
}
"""

HEADER = """
# Patronus Lynx Demo
<table bgcolor="#1E2432" cellspacing="0" cellpadding="0"  width="450">
<tr style="height:50px;">
<td style="text-align: center;">
<a href="https://www.patronus.ai">
<img src="https://cdn.prod.website-files.com/64e655d42d3be60f582d0472/64ede352897bcddbe2d41207_patronusai_final_logo.svg" width="200" height="40" />
</a>
</td>
</tr>
</table>
<table bgcolor="#1E2432" cellspacing="0" cellpadding="0"  width="450">
<tr style="height:30px;">
<td style="text-align: center;">
<a href="https://huggingface.co/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange" height="20"></a>
</td>
<td style="text-align: center;">
<a href="https://github.com/patronus-ai/Lynx-hallucination-detection"><img src="https://postimage.me/images/2024/03/04/GitHub_Logo_White.png" width="100" height="20"></a>
</td>
<td style="text-align: center; color: white;">
<a href="https://arxiv.org/abs/2407.08488"><img src="https://img.shields.io/badge/arXiv-2407.08488-b31b1b.svg" height="20"></a>
</td>
</tr>
</table>

**Patronus Lynx** is a state-of-the-art open-source model for hallucination detection.

**Getting Started**: Provide a question and document or context given to your model in addition to the answer given by the model and then click submit. The output panel will indicate whether the reponse is a hallucination (Fail) or if it is faithful to the given document or context (Pass) through the score Pass or Fail and provide reasoning behind the score.
"""

UPLOADABLE_FILE_TYPES = [".pdf", ".txt", ".docx", ".doc"]

def update_client_base_url(model_name):
    if model_name == "Patronus Lynx 8B v1.1":
        return "https://yb15a7dy-patronus-lynx-8b-v1-1.tin.lepton.run/api/v1/"
    elif model_name == "Patronus Lynx 70B":
        return "https://yb15a7dy-lynx-70b.tin.lepton.run/api/v1/"

def parse_patronus_lynx_response(
    response: str,
) -> Tuple[bool, Union[List[str], None]]:
    """
    Parses the response from the Patronus Lynx LLM and returns a tuple of:
    - Whether the response is hallucinated or not.
    - A reasoning trace explaining the decision.
    """
    # Default to hallucinated
    hallucination, reasoning = True, None
    reasoning_pattern = r'"REASONING":\s*\[(.*?)\]'
    score_pattern = r'"SCORE":\s*"?\b(PASS|FAIL)\b"?'

    reasoning_match = re.search(reasoning_pattern, response, re.DOTALL)
    score_match = re.search(score_pattern, response)

    if score_match:
        score = score_match.group(1)
        if score == "PASS":
            hallucination = False
    if reasoning_match:
        reasoning_content = reasoning_match.group(1)
        reasoning = re.split(r"['\"],\s*['\"]", reasoning_content)

    return hallucination, reasoning

def model_call(question, document, answer, client_base_url):
    client = openai.OpenAI(
        base_url=client_base_url,
        api_key=LEPTON_API_TOKEN
    )
    print("CLIENT AND CLIENT BASE URL", client, client_base_url)
    if question == "" or document == "" or answer == "":
        return "", ""
    NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
    print("ENTIRE NEW_FORMAT", NEW_FORMAT)
    response = client.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=NEW_FORMAT,
        temperature=0.0
    )
    print("RESPONSE FROM CLIENT:", response)
    hallucination, reasoning = parse_patronus_lynx_response(response.choices[0].text)
    score = "FAIL" if hallucination else "PASS"
    combined_reasoning = " ".join(reasoning)[1:-1]
    return combined_reasoning, score

def get_filetype(filename):
    return filename.split(".")[-1]

def extract_text_pymupdf(file):
    with pymupdf.open(file) as pdf_or_txt:
        text = ""
        for page in pdf_or_txt:
            text += page.get_text()
    return text

def extract_text_python_docx(file):
    doc = Document(file)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text.strip()

def upload_file(filepath):
    extracted_file_text = ""
    if filepath is not None:
        name = Path(filepath).name
        print("FILEPATH & file name", filepath, name)
        print("FILEPATH type & file name type", type(filepath), type(name))
        filetype = get_filetype(name)
        # conditionals for filetype and function call
        if filetype == "pdf" or filetype == "txt":
            extracted_file_text = extract_text_pymupdf(filepath)
        elif filetype == "docx":
            extracted_file_text = extract_text_python_docx(filepath)
        return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
    else:
        return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
    # return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {name}", value=filepath, visible=True)]
def reset_buttons():
    return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), gr.Textbox(value="")]

def select_template(template):
    return template["question"], template["document"], template["answer"]

# def download_file():
#     return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]

# inputs = [
#     gr.Textbox(label="Question"),
#     gr.Textbox(label="Document"),
#     gr.Textbox(label="Answer")
# ]
# outputs = [
#     gr.Textbox(label="Reasoning"),
#     gr.Textbox(label="Score")
# ]

with gr.Blocks(css=css) as demo:
    base_url_state = gr.State(update_client_base_url("Patronus Lynx 8B v1.1"))
    gr.Markdown(HEADER)
    # gr.Interface(fn=model_call, inputs=inputs, outputs=outputs)
    model_dropdown = gr.Dropdown(choices=["Patronus Lynx 8B v1.1", "Patronus Lynx 70B"], value="Patronus Lynx 8B v1.1", label="Model", interactive=True)
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                question = gr.Textbox(label="Question")
            with gr.Row():
                document = gr.Textbox(label="Document", scale=9)
                u = gr.UploadButton("Upload", visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES, scale=1)
                file_group = gr.Group(elem_classes="fixed-height-button", visible=False)
                with file_group:
                    file_name = gr.Markdown("")
                    c = gr.ClearButton([u, file_name])
                    # d = gr.DownloadButton("Download the file", visible=False, scale=1)
            with gr.Row():
                answer = gr.Textbox(label="Answer")
            with gr.Row():
                clear_btn = gr.ClearButton([question, document, answer])
                submit_button = gr.Button("Submit")
            with gr.Row():
                gr.Markdown("### Example Templates")
                for idx, example in enumerate(EXAMPLES):
                    template_btn = gr.Button(f"{example['emoji']} {example['question']}")
                    template_btn.click(
                        fn=select_template,
                        inputs=[gr.State(example)],
                        outputs=[question, document, answer]
                    )
        with gr.Column(scale=1):
            reasoning = gr.Textbox(label="Reasoning")
            score = gr.Textbox(label="Score (FAIL if Hallucinated, PASS if not)")

    model_dropdown.change(fn=update_client_base_url, inputs=[model_dropdown], outputs=[base_url_state])
    u.upload(upload_file, u, [u, file_group, file_name, document])
    c.click(reset_buttons, None, [u, file_group, file_name, document])
    # d.click(download_file, None, [u, d])

    submit_button.click(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    question.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    document.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    answer.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
demo.launch()