|
|
|
import os |
|
import uuid |
|
import pandas as pd |
|
import numpy as np |
|
from scipy.special import softmax |
|
import gradio as gr |
|
|
|
import torch |
|
from transformers import AutoTokenizer |
|
from transformers import AutoConfig |
|
from transformers import AutoModelForSequenceClassification |
|
from transformers import TFAutoModelForSequenceClassification |
|
from transformers import IntervalStrategy |
|
from transformers import TrainingArguments |
|
from transformers import EarlyStoppingCallback |
|
from transformers import pipeline |
|
from transformers import TrainingArguments |
|
from transformers import Trainer |
|
from torch import nn |
|
|
|
|
|
|
|
|
|
|
|
model_path = "slickdata/finetuned-Sentiment-classfication-ROBERTA-model" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained('roberta-base') |
|
|
|
|
|
config = AutoConfig.from_pretrained(model_path) |
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
|
|
def preprocess(text): |
|
new_text = [] |
|
|
|
for t in text.split(" "): |
|
t = '@user' if t.startswith('@') and len(t) > 1 else t |
|
|
|
t = 'http' if t.startswith('http') else t |
|
new_text.append(t) |
|
|
|
return " ".join(new_text) |
|
|
|
|
|
def sentiment_analysis(text): |
|
|
|
text = preprocess(text) |
|
|
|
|
|
encoded_input = tokenizer(text, return_tensors='pt') |
|
|
|
|
|
output = model(**encoded_input) |
|
|
|
scores_ = softmax(output.logits[0].detach().numpy()) |
|
|
|
|
|
labels = ['Negative', 'Neutral', 'Positive'] |
|
scores = {l:float(s) for (l,s) in zip(labels, scores_) } |
|
|
|
|
|
max_score_label = max(scores, key=scores.get) |
|
|
|
|
|
return max_score_label |
|
|
|
|
|
demo = gr.Interface( |
|
fn=sentiment_analysis, |
|
inputs=gr.Textbox(placeholder="Write your tweet here..."), |
|
outputs="label", |
|
examples=[["This is wonderful!"]]) |
|
|
|
|
|
demo.launch(debug=True) |