AhmadT198 commited on
Commit
a1cd320
·
1 Parent(s): c41f014
Files changed (1) hide show
  1. app.py +104 -0
app.py CHANGED
@@ -1,4 +1,108 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def greet(name):
4
  return "Hello " + name + "!!"
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
+ from tensorflow.keras.preprocessing.text import Tokenizer
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import numpy as np
6
+ import tensorflow as tf
7
+ import random
8
+ import spaces
9
+
10
+ maxlen = 50
11
+
12
+
13
+
14
+ def get_sequences(tokenizer, tweets):
15
+ sequences = tokenizer.texts_to_sequences(tweets)
16
+ padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen)
17
+ return padded
18
+
19
+ def get_label(idx):
20
+ if idx == 0: return 'sadness'
21
+ elif idx == 1: return 'joy'
22
+ elif idx == 2: return 'love'
23
+ elif idx == 3: return 'anger'
24
+ elif idx == 4: return 'fear'
25
+ else: return 'surprise'
26
+
27
+ def get_tweet(data):
28
+ tweets = [x['text'] for x in data]
29
+ labels = [get_label(x['label']) for x in data]
30
+ return tweets, labels
31
+
32
+ def load_dataset():
33
+ dataset = load_dataset("dair-ai/emotion")
34
+ train = dataset['train']
35
+ val = dataset['validation']
36
+ test = dataset['test']
37
+ return train, val, test
38
+
39
+
40
+ train, val, test = load_dataset()
41
+ tweets, labels = get_tweet(train)
42
+ tokenizer = Tokenizer(num_words=10000,oov_token='<UNK>')
43
+ tokenizer.fit_on_texts(tweets)
44
+ padded_train_seq = get_sequences(tokenizer, tweets)
45
+ classes = set(labels)
46
+ class_to_index = dict((c,i) for i,c in enumerate(classes))
47
+ index_to_class = dict((v,k) for k,v in class_to_index.items())
48
+ names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels])
49
+ train_labels = names_to_ids(labels)
50
+
51
+ @spaces.GPU
52
+ def load_model():
53
+
54
+
55
+ model = tf.keras.models.Sequential([
56
+ tf.keras.layers.Embedding(10000, 16, input_length=maxlen),
57
+ tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)),
58
+ tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
59
+ tf.keras.layers.Dense(6, activation='softmax')
60
+ ])
61
+
62
+ model.compile(
63
+ loss='sparse_categorical_crossentropy',
64
+ optimizer='adam',
65
+ metrics=['accuracy']
66
+ )
67
+ print(model.summary())
68
+ val_tweets, val_labels = get_tweet(val)
69
+ val_seq = get_sequences(tokenizer, val_tweets)
70
+ val_labels = names_to_ids(val_labels)
71
+ h = model.fit(
72
+ padded_train_seq, train_labels, validation_data=(val_seq, val_labels),
73
+ epochs=20,
74
+ callbacks=[
75
+ tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)
76
+ ]
77
+ )
78
+
79
+ test_tweets, test_labels = get_tweet(test)
80
+ test_seq = get_sequences(tokenizer, test_tweets)
81
+ test_labels = names_to_ids(test_labels)
82
+
83
+ _ = model.evaluate(test_seq, test_labels)
84
+ i = random.randint(0, len(test_labels) - 1)
85
+ print('Sentence:', test_tweets[i])
86
+ print('Emotion:', index_to_class[test_labels[i]])
87
+ p = model.predict(np.expand_dims(test_seq[i],axis=0))[0]
88
+ pred_class = index_to_class[np.argmax(p).astype('uint8')]
89
+
90
+ print('Predicted Emotion:', pred_class)
91
+
92
+ preds = (model.predict(test_seq) > 0.5).astype("int32")
93
+ print(preds)
94
+
95
+ return model
96
+
97
+ model = load_model()
98
+
99
+ @spaces.GPU
100
+ def predict(tweet):
101
+ seq = get_sequences(tokenizer, [tweet])
102
+ p = model.predict(np.expand_dims(seq[0],axis=0))[0]
103
+ return index_to_class[np.argmax(p).astype('uint8')]
104
+
105
+
106
 
107
  def greet(name):
108
  return "Hello " + name + "!!"