|
import gradio as gr |
|
from datasets import load_dataset |
|
from tensorflow.keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
import numpy as np |
|
import tensorflow as tf |
|
import random |
|
import spaces |
|
|
|
maxlen = 50 |
|
|
|
|
|
|
|
def get_sequences(tokenizer, tweets): |
|
sequences = tokenizer.texts_to_sequences(tweets) |
|
padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen) |
|
return padded |
|
|
|
def get_label(idx): |
|
if idx == 0: return 'sadness' |
|
elif idx == 1: return 'joy' |
|
elif idx == 2: return 'love' |
|
elif idx == 3: return 'anger' |
|
elif idx == 4: return 'fear' |
|
else: return 'surprise' |
|
|
|
def get_tweet(data): |
|
tweets = [x['text'] for x in data] |
|
labels = [get_label(x['label']) for x in data] |
|
return tweets, labels |
|
|
|
def get_train_val_test_sets(): |
|
dataset = load_dataset("dair-ai/emotion") |
|
train = dataset['train'] |
|
val = dataset['validation'] |
|
test = dataset['test'] |
|
return train, val, test |
|
|
|
|
|
train, val, test = get_train_val_test_sets() |
|
tweets, labels = get_tweet(train) |
|
tokenizer = Tokenizer(num_words=10000,oov_token='<UNK>') |
|
tokenizer.fit_on_texts(tweets) |
|
padded_train_seq = get_sequences(tokenizer, tweets) |
|
classes = set(labels) |
|
class_to_index = dict((c,i) for i,c in enumerate(classes)) |
|
index_to_class = dict((v,k) for k,v in class_to_index.items()) |
|
names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels]) |
|
train_labels = names_to_ids(labels) |
|
|
|
@spaces.GPU(duration=125) |
|
def load_model(): |
|
|
|
|
|
model = tf.keras.models.Sequential([ |
|
tf.keras.layers.Embedding(10000, 16, input_length=maxlen), |
|
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)), |
|
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)), |
|
tf.keras.layers.Dense(6, activation='softmax') |
|
]) |
|
|
|
model.compile( |
|
loss='sparse_categorical_crossentropy', |
|
optimizer='adam', |
|
metrics=['accuracy'] |
|
) |
|
print(model.summary()) |
|
val_tweets, val_labels = get_tweet(val) |
|
val_seq = get_sequences(tokenizer, val_tweets) |
|
val_labels = names_to_ids(val_labels) |
|
h = model.fit( |
|
padded_train_seq, train_labels, validation_data=(val_seq, val_labels), |
|
epochs=8, |
|
callbacks=[ |
|
tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2) |
|
] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return model |
|
|
|
model = load_model() |
|
|
|
def predict(tweet): |
|
print("ENTERED", tweet) |
|
seq = get_sequences(tokenizer, [tweet]) |
|
print(seq) |
|
p = model.predict(np.expand_dims(seq[0],axis=0))[0] |
|
print(p) |
|
return index_to_class[np.argmax(p).astype('uint8')] |
|
|
|
|
|
|
|
demo = gr.Interface(fn=predict, inputs="text", outputs="text") |
|
demo.launch() |
|
|