Spaces:

AhmadT198
/

TweetEmotionRecognitionUsingBidirectionalLSTM

Sleeping

App Files Files Community

TweetEmotionRecognitionUsingBidirectionalLSTM / app.py

AhmadT198

Update app.py

fb6ef69 verified 9 months ago

raw

history blame

3.35 kB

	import gradio as gr
	from datasets import load_dataset
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import tensorflow as tf
	import random
	import spaces

	maxlen = 50



	def get_sequences(tokenizer, tweets):
	sequences = tokenizer.texts_to_sequences(tweets)
	padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen)
	return padded

	def get_label(idx):
	if idx == 0: return 'sadness'
	elif idx == 1: return 'joy'
	elif idx == 2: return 'love'
	elif idx == 3: return 'anger'
	elif idx == 4: return 'fear'
	else: return 'surprise'

	def get_tweet(data):
	tweets = [x['text'] for x in data]
	labels = [get_label(x['label']) for x in data]
	return tweets, labels

	def get_train_val_test_sets():
	dataset = load_dataset("dair-ai/emotion")
	train = dataset['train']
	val = dataset['validation']
	test = dataset['test']
	return train, val, test


	train, val, test = get_train_val_test_sets()
	tweets, labels = get_tweet(train)
	tokenizer = Tokenizer(num_words=10000,oov_token='<UNK>')
	tokenizer.fit_on_texts(tweets)
	padded_train_seq = get_sequences(tokenizer, tweets)
	classes = set(labels)
	class_to_index = dict((c,i) for i,c in enumerate(classes))
	index_to_class = dict((v,k) for k,v in class_to_index.items())
	names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels])
	train_labels = names_to_ids(labels)

	@spaces.GPU(duration=125)
	def load_model():


	model = tf.keras.models.Sequential([
	tf.keras.layers.Embedding(10000, 16, input_length=maxlen),
	tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)),
	tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
	tf.keras.layers.Dense(6, activation='softmax')
	])

	model.compile(
	loss='sparse_categorical_crossentropy',
	optimizer='adam',
	metrics=['accuracy']
	)
	print(model.summary())
	val_tweets, val_labels = get_tweet(val)
	val_seq = get_sequences(tokenizer, val_tweets)
	val_labels = names_to_ids(val_labels)
	h = model.fit(
	padded_train_seq, train_labels, validation_data=(val_seq, val_labels),
	epochs=8,
	callbacks=[
	tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)
	]
	)

	# test_tweets, test_labels = get_tweet(test)
	# test_seq = get_sequences(tokenizer, test_tweets)
	# test_labels = names_to_ids(test_labels)

	# _ = model.evaluate(test_seq, test_labels)
	# i = random.randint(0, len(test_labels) - 1)
	# print('Sentence:', test_tweets[i])
	# print('Emotion:', index_to_class[test_labels[i]])
	# p = model.predict(np.expand_dims(test_seq[i],axis=0))[0]
	# pred_class = index_to_class[np.argmax(p).astype('uint8')]

	# print('Predicted Emotion:', pred_class)

	# preds = (model.predict(test_seq) > 0.5).astype("int32")
	# print(preds)

	return model

	model = load_model()

	def predict(tweet):
	print("ENTERED", tweet)
	seq = get_sequences(tokenizer, [tweet])
	print(seq)
	p = model.predict(np.expand_dims(seq[0],axis=0))[0]
	print(p)
	return index_to_class[np.argmax(p).astype('uint8')]



	demo = gr.Interface(fn=predict, inputs="text", outputs="text")
	demo.launch()