import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer from jinja2 import Template import torch import json # load the judge device = "cuda:0" model_name = "collinear-ai/collinear-reliability-judge-v5" model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device) tokenizer = AutoTokenizer.from_pretrained(model_name) # template template = Template( """ # Document: {{ document }} # Conversation: {% for message in conversation %} {{ message.role }}: {{ message.content }} {% endfor %} """ ) def judge_reliability(document: str, conversation: str): with torch.no_grad(): conversation = json.loads(conversation) text = template.render(text=document, conversation=conversation) print(text) encoded = tokenizer([text], padding=True) input_ids = torch.tensor(encoded.input_ids).to(device) attention_mask = torch.tensor(encoded.attention_mask).to(device) outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask) outputs = torch.softmax(outputs.logits, axis=1) results = f"Reliability Score: {outputs}" return results demo = gr.Interface( fn=judge_reliability, inputs=[ gr.Textbox(label="Document", lines=5, value="Chris Voss, was born in Iowa, USA. He is the best negotiator in the world."), gr.Textbox(label="Conversation", lines=5, value='[{"role": "user", "content": "Where are you born?"}, {"role": "assistant", "content": "I am born in Iowa"}]') ], outputs=gr.Textbox(label="Results"), title="Collinear Reliability Judge", description="Enter a document and conversation (json formatted) to judge reliability. Note: this judges if the last assistant turn is faithful according to the given document ", theme="default" ) if __name__ == "__main__": demo.launch()