import gradio as gr from datasets import load_dataset from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import numpy as np import tensorflow as tf import random import spaces maxlen = 50 def get_sequences(tokenizer, tweets): sequences = tokenizer.texts_to_sequences(tweets) padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen) return padded def get_label(idx): if idx == 0: return 'sadness' elif idx == 1: return 'joy' elif idx == 2: return 'love' elif idx == 3: return 'anger' elif idx == 4: return 'fear' else: return 'surprise' def get_tweet(data): tweets = [x['text'] for x in data] labels = [get_label(x['label']) for x in data] return tweets, labels def get_train_val_test_sets(): dataset = load_dataset("dair-ai/emotion") train = dataset['train'] val = dataset['validation'] test = dataset['test'] return train, val, test train, val, test = get_train_val_test_sets() tweets, labels = get_tweet(train) tokenizer = Tokenizer(num_words=10000,oov_token='') tokenizer.fit_on_texts(tweets) padded_train_seq = get_sequences(tokenizer, tweets) classes = set(labels) class_to_index = dict((c,i) for i,c in enumerate(classes)) index_to_class = dict((v,k) for k,v in class_to_index.items()) names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels]) train_labels = names_to_ids(labels) @spaces.GPU(duration=125) def load_model(): model = tf.keras.models.Sequential([ tf.keras.layers.Embedding(10000, 16, input_length=maxlen), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)), tf.keras.layers.Dense(6, activation='softmax') ]) model.compile( loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) print(model.summary()) val_tweets, val_labels = get_tweet(val) val_seq = get_sequences(tokenizer, val_tweets) val_labels = names_to_ids(val_labels) h = model.fit( padded_train_seq, train_labels, validation_data=(val_seq, val_labels), epochs=8, callbacks=[ tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2) ] ) # test_tweets, test_labels = get_tweet(test) # test_seq = get_sequences(tokenizer, test_tweets) # test_labels = names_to_ids(test_labels) # _ = model.evaluate(test_seq, test_labels) # i = random.randint(0, len(test_labels) - 1) # print('Sentence:', test_tweets[i]) # print('Emotion:', index_to_class[test_labels[i]]) # p = model.predict(np.expand_dims(test_seq[i],axis=0))[0] # pred_class = index_to_class[np.argmax(p).astype('uint8')] # print('Predicted Emotion:', pred_class) # preds = (model.predict(test_seq) > 0.5).astype("int32") # print(preds) return model model = load_model() def predict(tweet): print("ENTERED", tweet) seq = get_sequences(tokenizer, [tweet]) print(seq) p = model.predict(np.expand_dims(seq[0],axis=0))[0] print(p) return index_to_class[np.argmax(p).astype('uint8')] demo = gr.Interface(fn=predict, inputs="text", outputs="text") demo.launch()