import gradio as gr import torch import os import transformers from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, ) from utils import preprocess device = 'cpu' model_dir = "nealcly/detection-longformer" # load the Longformer detector tokenizer = AutoTokenizer.from_pretrained(model_dir) model = AutoModelForSequenceClassification.from_pretrained(model_dir).to(device) def detect(input_text,th=-3.08583984375): if len(input_text.split()) < 30: return 'It is not reliable to detect text with less than 30 words.' label2decisions = { 0: "machine-generated", 1: "human-written", } tokenize_input = tokenizer(input_text) tensor_input = torch.tensor([tokenize_input["input_ids"]]).to(device) outputs = model(tensor_input) is_machine = -outputs.logits[0][0].item() if is_machine < th: decision = 0 else: decision = 1 return label2decisions[decision] description_e = """ This is a demo on Github project 🏃 [Deepfake Text Detection in the Wild](https://github.com/yafuly/DeepfakeTextDetect). 🎯 Input the text to be detected, and click ''submit''' to get the detection result, either human-written or machine-generated. ⌛️ It takes about 6~ seconds to generate segment results. 🏠 Check out our [Model Card 🏃](https://huggingface.co/nealcly/detection-longformer) """ iface = gr.Interface(fn=detect, inputs="text", outputs="text", description=gr.Markdown(description_e)) iface.launch()