Spaces:

mainakhf
/

sentiment_analysis

Sleeping

File size: 1,493 Bytes

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from transformers import BertTokenizer, TFBertForSequenceClassification, TextClassificationPipeline
import tensorflow as tf

# Download NLTK resources (one-time step)
nltk.download('punkt')
nltk.download('stopwords')

# Define stopwords and punctuation
stop_words = set(stopwords.words('english'))
punctuations = set(string.punctuation)

# Function to preprocess text
def preprocess_text(text):
    text = str(text)
    # Lowercase the text
    text = text.lower()
    # Tokenize the text
    tokens = word_tokenize(text)
    # Remove stopwords and punctuation
    tokens = [token for token in tokens if token not in stop_words and token not in punctuations]
    # Reconstruct the text
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

bert_tokenizer = BertTokenizer.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis')
 
# Load model
bert_model = TFBertForSequenceClassification.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis')


def Get_sentiment(Review, Tokenizer=bert_tokenizer, Model=bert_model):
	# Convert Review to a list if it's not already a list
	if not isinstance(Review, list):
		Review = [Review]
	model = bert_model
	model.config.id2label = {0: "Negative", 1: "Positive"} 
	tokenizer = bert_tokenizer
	pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)
	pred_labels=pipe(Review)
	return [pred_labels[0]['label']]