chinhon's picture
added Disclaimer to Description about app's limits in practical use
4ccce0c
raw
history blame
2.16 kB
import gradio as gr
import numpy as np
import pandas as pd
import re
import shap
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
TextClassificationPipeline,
)
tokenizer = AutoTokenizer.from_pretrained("chinhon/fake_tweet_detect")
model = AutoModelForSequenceClassification.from_pretrained("chinhon/fake_tweet_detect")
tweet_detector = TextClassificationPipeline(model=model, tokenizer=tokenizer)
# tweak the extent of text cleaning as you wish
def clean_text(text):
text = re.sub(r"http\S+", "", text)
text = re.sub(r"\n", " ", text)
text = re.sub(r"\'t", " not", text) # Change 't to 'not'
text = re.sub(r"(@.*?)[\s]", " ", text) # Remove @name
text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text) # remove digits
text = re.sub(r"[^\w\s\#]", "", text) # remove special characters except hashtags
text = text.strip(" ")
text = re.sub(
" +", " ", text
).strip() # get rid of multiple spaces and replace with a single
return text
def tweet_detect(text):
data = [clean_text(text)]
prediction = tweet_detector(data)
pred_label = [x.get("label") for x in prediction]
if pred_label == ["LABEL_1"]:
return "Fake Tweet"
elif pred_label == ["LABEL_0"]:
return "Real Tweet"
#Define Gradio interface
gradio_ui = gr.Interface(
fn=tweet_detect,
title="Detect Fake Tweets",
description="Enter a tweet and see if the transformer model can identify if it was written by state-backed trolls. DISCLAIMER: While the model was fine tuned on 100k real and troll tweets, and achieved high accuracy in my tests, its performance drops significantly against the day-to-day barrage of content on Twitter. As such, this app is intended as an example for understanding the limits of AI/ML in highly complex problems like fake media detection, and not as a final arbiter of whether someone's tweet is real or not.",
inputs=gr.inputs.Textbox(lines=10, label="Paste tweet text here [English Only]"),
outputs=gr.outputs.Label(type="auto", label="Prediction"),
interpretation="shap",
enable_queue=True
)
gradio_ui.launch()