File size: 2,741 Bytes
6d27cd7
 
 
 
 
aca2b0a
eb7cf2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
087fa98
 
 
eb7cf2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
os.system('pip install torch')
os.system('pip install transformers')
os.system('pip install scipy')
os.system('pip install gradio')
os.system('pip install numpy')
import numpy as np
from scipy.special import softmax
import gradio as gr
from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForSequenceClassification,
    TFAutoModelForSequenceClassification)
# Define the model path where the pre-trained model is saved on the Hugging Face model hub
model_path = "Winnie-Kay/Finetuned_bert_model"

# Initialize the tokenizer for the pre-trained model
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the configuration for the pre-trained model
config = AutoConfig.from_pretrained(model_path)

# Load the pre-trained model
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Define a function to preprocess the text data
def preprocess(text):
    new_text = []
    # Replace user mentions with '@user'
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        # Replace links with 'http'
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    # Join the preprocessed text
    return " ".join(new_text)

# Define a function to perform sentiment analysis on the input text
def sentiment_analysis(text):
    # Preprocess the input text
    text = preprocess(text)

    # Tokenize the input text using the pre-trained tokenizer
    encoded_input = tokenizer(text, return_tensors='pt')
    
    # Feed the tokenized input to the pre-trained model and obtain output
    output = model(**encoded_input)
    
    # Obtain the prediction scores for the output
    scores_ = output[0][0].detach().numpy()
    
    # Apply softmax activation function to obtain probability distribution over the labels
    scores_ = softmax(scores_)
    
    # Format the output dictionary with the predicted scores
    labels = ['Negative', 'Neutral', 'Positive']
    scores = {l:float(s) for (l,s) in zip(labels, scores_) }
    
    # Return the scores
    return scores

# Define a Gradio interface to interact with the model
demo = gr.Interface(
    fn=sentiment_analysis, # Function to perform sentiment analysis
    inputs=gr.Textbox(placeholder="Write your tweet here..."), # Text input field
    outputs="label", # Output type (here, we only display the label with the highest score)
    interpretation="default", # Interpretation mode
    examples=[["Have Fun with it...will be updated soon!"]],# Example input(s) to display on the interface
    image=gr.Image("https://www.reputationx.com/hubfs/what-is-sentiment-analysis-cover.jpg"),
    css= "body {background-color: black}"
)

# Launch the Gradio interface
demo.launch()