Spaces:
Running
Running
File size: 2,740 Bytes
98cc895 f555c09 9c53030 7d8479e 9c53030 98cc895 9c53030 0bbc8ff 98cc895 9c53030 0bbc8ff 9c53030 0bbc8ff 9c53030 0bbc8ff 98cc895 9c53030 0bbc8ff 9c53030 98cc895 9c53030 98cc895 9c53030 0bbc8ff ee5fabd 0bbc8ff ee5fabd 0bbc8ff 9c53030 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import nltk
nltk.download('punkt')
import pandas as pd
import gradio as gr
from nltk import sent_tokenize
from transformers import pipeline
from gradio.themes.utils.colors import red, green
detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
color_map = {
'0%': green.c400,
'10%': green.c300,
'20%': green.c200,
'30%': green.c100,
'40%': green.c50,
'50%': red.c50,
'60%': red.c100,
'70%': red.c200,
'80%': red.c300,
'90%': red.c400,
'100%': red.c500
}
def predict_doc(doc):
sents = sent_tokenize(doc)
data = {'sentence': [], 'label': [], 'score': []}
res = []
for sent in sents:
prob = predict_one_sent(sent)
data['sentence'].append(sent)
data['score'].append(round(prob, 4))
if prob <= 0.5:
data['label'].append('Human')
else: data['label'].append('Machine')
if prob < 0.1: label = '0%'
elif prob < 0.2: label = '10%'
elif prob < 0.3: label = '20%'
elif prob < 0.4: label = '30%'
elif prob < 0.5: label = '40%'
elif prob < 0.6: label = '50%'
elif prob < 0.7: label = '60%'
elif prob < 0.8: label = '70%'
elif prob < 0.9: label = '80%'
elif prob < 1: label = '90%'
else: label = '100%'
res.append((sent, label))
df = pd.DataFrame(data)
df.to_csv('result.csv')
overall_score = df.score.mean()
sum_str = ''
if overall_score <= 0.5: overall_label = 'Human'
else: overall_label = 'Machine'
sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}'
return sum_str, res, df, 'result.csv'
def predict_one_sent(sent):
'''
convert to prob
LABEL_1, 0.66 -> 0.66
LABEL_0, 0.66 -> 0.34
'''
res = detector(sent)[0]
org_label, prob = res['label'], res['score']
if org_label == 'LABEL_0': prob = 1 - prob
return prob
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
lines=5,
label='Essay input',
info='Please enter the essay in the textbox'
)
btn = gr.Button('Predict who writes this essay!')
sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
with gr.Row():
summary = gr.Text(label='Result summary')
csv_f = gr.File(label='CSV file storing data with all sentences.')
tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
demo.launch()
|