Spaces:
Running
Running
import nltk | |
nltk.download('punkt') | |
import pandas as pd | |
import gradio as gr | |
from nltk import sent_tokenize | |
from transformers import pipeline | |
from gradio.themes.utils import red, green | |
detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent') | |
color_map = { | |
'0%': green.c400, | |
'10%': green.c300, | |
'20%': green.c200, | |
'30%': green.c100, | |
'40%': green.c50, | |
'50%': red.c50, | |
'60%': red.c100, | |
'70%': red.c200, | |
'80%': red.c300, | |
'90%': red.c400, | |
'100%': red.c500 | |
} | |
def predict_doc(doc): | |
sents = sent_tokenize(doc) | |
data = {'sentence': [], 'label': [], 'score': []} | |
res = [] | |
for sent in sents: | |
prob = predict_one_sent(sent) | |
data['sentence'].append(sent) | |
data['score'].append(round(prob, 4)) | |
if prob <= 0.5: | |
data['label'].append('Human') | |
else: data['label'].append('Machine') | |
if prob < 0.1: label = '0%' | |
elif prob < 0.2: label = '10%' | |
elif prob < 0.3: label = '20%' | |
elif prob < 0.4: label = '30%' | |
elif prob < 0.5: label = '40%' | |
elif prob < 0.6: label = '50%' | |
elif prob < 0.7: label = '60%' | |
elif prob < 0.8: label = '70%' | |
elif prob < 0.9: label = '80%' | |
elif prob < 1: label = '90%' | |
else: label = '100%' | |
res.append((sent, label)) | |
df = pd.DataFrame(data) | |
df.to_csv('result.csv') | |
overall_score = df.score.mean() | |
sum_str = '' | |
if overall_score <= 0.5: overall_label = 'Human' | |
else: overall_label = 'Machine' | |
sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}' | |
return sum_str, res, df, 'result.csv' | |
def predict_one_sent(sent): | |
''' | |
convert to prob | |
LABEL_1, 0.66 -> 0.66 | |
LABEL_0, 0.66 -> 0.34 | |
''' | |
res = detector(sent)[0] | |
org_label, prob = res['label'], res['score'] | |
if org_label == 'LABEL_0': prob = 1 - prob | |
return prob | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
text_in = gr.Textbox( | |
lines=5, | |
label='Essay input', | |
info='Please enter the essay in the textbox' | |
) | |
btn = gr.Button('Predict who writes this essay!') | |
sent_res = gr.Highlight( | |
label='Labeled Result' | |
).style(color_map=color_map) | |
with gr.Row(): | |
summary = gr.Text( | |
label='Result summary' | |
) | |
csv_f = gr.File( | |
label='CSV file storing data with all sentences.' | |
) | |
tab = gr.DataFrame( | |
label='Table with Probability Score', | |
max_rows=100 | |
) | |
btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc') | |
demo.launch() | |