Spaces:

AhmadT198
/

TweetEmotionRecognitionUsingBidirectionalLSTM

Sleeping

TweetEmotionRecognitionUsingBidirectionalLSTM

File size: 3,345 Bytes

import gradio as gr
from datasets import load_dataset
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import tensorflow as tf
import random
import spaces

maxlen = 50



def get_sequences(tokenizer, tweets):
  sequences =  tokenizer.texts_to_sequences(tweets)
  padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen)
  return padded

def get_label(idx):
  if idx == 0: return 'sadness'
  elif idx == 1: return 'joy'
  elif idx == 2: return 'love'
  elif idx == 3: return 'anger'
  elif idx == 4: return 'fear'
  else: return 'surprise'
  
def get_tweet(data):
  tweets = [x['text'] for x in data]
  labels = [get_label(x['label']) for x in data]
  return tweets, labels

def get_train_val_test_sets():
    dataset = load_dataset("dair-ai/emotion")
    train = dataset['train']
    val = dataset['validation']
    test = dataset['test']
    return train, val, test


train, val, test = get_train_val_test_sets()
tweets, labels = get_tweet(train)
tokenizer = Tokenizer(num_words=10000,oov_token='<UNK>')
tokenizer.fit_on_texts(tweets)
padded_train_seq = get_sequences(tokenizer, tweets)
classes = set(labels)
class_to_index = dict((c,i) for i,c in enumerate(classes))
index_to_class = dict((v,k) for k,v in class_to_index.items())
names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels])
train_labels = names_to_ids(labels)

@spaces.GPU(duration=125)
def load_model():
   

    model = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(10000, 16, input_length=maxlen),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
        tf.keras.layers.Dense(6, activation='softmax')
    ])

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    print(model.summary())
    val_tweets, val_labels = get_tweet(val)
    val_seq = get_sequences(tokenizer, val_tweets)
    val_labels = names_to_ids(val_labels)
    h = model.fit(
        padded_train_seq, train_labels, validation_data=(val_seq, val_labels),
        epochs=8,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)
        ]
    )
    
    # test_tweets, test_labels = get_tweet(test)
    # test_seq = get_sequences(tokenizer, test_tweets)
    # test_labels = names_to_ids(test_labels)
    
    # _ = model.evaluate(test_seq, test_labels)
    # i = random.randint(0, len(test_labels) - 1)
    # print('Sentence:', test_tweets[i])
    # print('Emotion:', index_to_class[test_labels[i]]) 
    # p = model.predict(np.expand_dims(test_seq[i],axis=0))[0]
    # pred_class = index_to_class[np.argmax(p).astype('uint8')]

    # print('Predicted Emotion:', pred_class)
    
    # preds = (model.predict(test_seq) > 0.5).astype("int32")
    # print(preds)
    
    return model
    
model = load_model()

def predict(tweet):
    print("ENTERED", tweet)
    seq = get_sequences(tokenizer, [tweet])
    print(seq)
    p = model.predict(np.expand_dims(seq[0],axis=0))[0]
    print(p)
    return index_to_class[np.argmax(p).astype('uint8')]



demo = gr.Interface(fn=predict, inputs="text", outputs="text")
demo.launch()