Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
tanveeshsingh
commited on
Commit
•
e052565
1
Parent(s):
dd8c845
Add add to ds func
Browse files- app.py +35 -1
- requirements.txt +1 -1
app.py
CHANGED
@@ -81,13 +81,42 @@ Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
|
|
81 |
lynx_time = round(time.time() - start_time, 2) # Calculate time taken for Lynx
|
82 |
return results, lynx_time
|
83 |
# Function to judge reliability based on the selected input format
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
async def judge_reliability(input_style, document, conversation, claim, question, answer):
|
85 |
start_time = time.time()
|
86 |
if input_style == "Dialog":
|
87 |
print(conversation)
|
88 |
conversation = convert_to_message_array(conversation=conversation)
|
89 |
print(conversation)
|
90 |
-
outputs= await collinear.judge.veritas.conversation(document,conversation[:-1],conversation[-1])
|
91 |
elif input_style == "NLI":
|
92 |
outputs = await collinear.judge.veritas.natural_language_inference(document,claim)
|
93 |
elif input_style == "QA format":
|
@@ -214,8 +243,13 @@ assistant:Yes, it is about $38Bn.""")
|
|
214 |
fn=lynx,
|
215 |
inputs=[input_style_dropdown,document_input,question_input,answer_input],
|
216 |
outputs=[lynx_output, lynx_time_output]
|
|
|
|
|
|
|
|
|
217 |
)
|
218 |
|
|
|
219 |
# Launch the demo
|
220 |
if __name__ == "__main__":
|
221 |
demo.launch()
|
|
|
81 |
lynx_time = round(time.time() - start_time, 2) # Calculate time taken for Lynx
|
82 |
return results, lynx_time
|
83 |
# Function to judge reliability based on the selected input format
|
84 |
+
|
85 |
+
|
86 |
+
async def add_to_dataset(category,document,question,answer,claim,conv_prefix,lynx_output,veritas_output):
|
87 |
+
conv_prefix = convert_to_message_array(conv_prefix)
|
88 |
+
dataset = load_dataset("collinear-ai/veritas-demo-dataset")
|
89 |
+
new_row = {
|
90 |
+
'style':category,
|
91 |
+
'document':document,
|
92 |
+
'question':question,
|
93 |
+
'answer':answer,
|
94 |
+
'claim':claim,
|
95 |
+
'conv_prefix':conv_prefix[:-1],
|
96 |
+
'response':conv_prefix[-1],
|
97 |
+
'lynx_output':lynx_output,
|
98 |
+
'veritas_output':veritas_output,
|
99 |
+
}
|
100 |
+
train_dataset = dataset['train']
|
101 |
+
|
102 |
+
df = train_dataset.to_pandas()
|
103 |
+
df2 = pd.DataFrame([new_row])
|
104 |
+
df = pd.concat([df, df2],ignore_index=True)
|
105 |
+
|
106 |
+
new_train_dataset = Dataset.from_pandas(df)
|
107 |
+
|
108 |
+
updated_dataset = DatasetDict({
|
109 |
+
'train': new_train_dataset
|
110 |
+
})
|
111 |
+
updated_dataset.push_to_hub("collinear-ai/veritas-demo-dataset",token=os.getenv("HF_TOKEN"))
|
112 |
+
|
113 |
async def judge_reliability(input_style, document, conversation, claim, question, answer):
|
114 |
start_time = time.time()
|
115 |
if input_style == "Dialog":
|
116 |
print(conversation)
|
117 |
conversation = convert_to_message_array(conversation=conversation)
|
118 |
print(conversation)
|
119 |
+
outputs= await collinear.judge.veritas.conversation('72267aea-e1c7-4f38-8eb8-f5e3c2abc279',document,conversation[:-1],conversation[-1])
|
120 |
elif input_style == "NLI":
|
121 |
outputs = await collinear.judge.veritas.natural_language_inference(document,claim)
|
122 |
elif input_style == "QA format":
|
|
|
243 |
fn=lynx,
|
244 |
inputs=[input_style_dropdown,document_input,question_input,answer_input],
|
245 |
outputs=[lynx_output, lynx_time_output]
|
246 |
+
).then(
|
247 |
+
fn=add_to_dataset,
|
248 |
+
inputs=[input_style_dropdown,document_input,question_input,answer_input,claim_input,conversation_input,lynx_output,result_output],
|
249 |
+
outputs=[]
|
250 |
)
|
251 |
|
252 |
+
|
253 |
# Launch the demo
|
254 |
if __name__ == "__main__":
|
255 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
jinja2
|
2 |
sentencepiece
|
3 |
gradio
|
4 |
-
collinear==0.1.
|
|
|
1 |
jinja2
|
2 |
sentencepiece
|
3 |
gradio
|
4 |
+
collinear==0.1.25
|