import gradio as gr import numpy as np import pandas as pd import re import shap from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline, ) tokenizer = AutoTokenizer.from_pretrained("chinhon/fake_tweet_detect") model = AutoModelForSequenceClassification.from_pretrained("chinhon/fake_tweet_detect") tweet_detector = TextClassificationPipeline(model=model, tokenizer=tokenizer) # tweak the extent of text cleaning as you wish def clean_text(text): text = re.sub(r"http\S+", "", text) text = re.sub(r"\n", " ", text) text = re.sub(r"\'t", " not", text) # Change 't to 'not' text = re.sub(r"(@.*?)[\s]", " ", text) # Remove @name text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text) # remove digits text = re.sub(r"[^\w\s\#]", "", text) # remove special characters except hashtags text = text.strip(" ") text = re.sub( " +", " ", text ).strip() # get rid of multiple spaces and replace with a single return text def tweet_detect(text): data = [clean_text(text)] prediction = tweet_detector(data) pred_label = [x.get("label") for x in prediction] if pred_label == ["LABEL_1"]: return "Fake Tweet" elif pred_label == ["LABEL_0"]: return "Real Tweet" #Define Gradio interface gradio_ui = gr.Interface( fn=tweet_detect, title="Detect Fake Tweets", description="Enter a tweet and see if a Distilbert model can identify if it was written by state-backed trolls. DISCLAIMER: While the model was fine tuned on 100k real and troll tweets, and achieved high accuracy in my tests, its performance drops significantly against the day-to-day barrage of content on Twitter. As such, this app is intended as an example for understanding the limits of AI/ML in highly complex problems like fake media detection, and not as a final arbiter of whether someone's tweet is real or not.", inputs=gr.inputs.Textbox(lines=10, label="Paste tweet text here [English Only]"), outputs=gr.outputs.Label(type="auto", label="Prediction"), interpretation="shap", article="Details of the fine tuning and tests are in this Medium post: https://bit.ly/3tueP36", ) gradio_ui.launch(enable_queue=True)