App Init
Browse files
app.py
CHANGED
@@ -1,4 +1,108 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
def greet(name):
|
4 |
return "Hello " + name + "!!"
|
|
|
1 |
import gradio as gr
|
2 |
+
from datasets import load_dataset
|
3 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
4 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
import random
|
8 |
+
import spaces
|
9 |
+
|
10 |
+
maxlen = 50
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
def get_sequences(tokenizer, tweets):
|
15 |
+
sequences = tokenizer.texts_to_sequences(tweets)
|
16 |
+
padded = pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen)
|
17 |
+
return padded
|
18 |
+
|
19 |
+
def get_label(idx):
|
20 |
+
if idx == 0: return 'sadness'
|
21 |
+
elif idx == 1: return 'joy'
|
22 |
+
elif idx == 2: return 'love'
|
23 |
+
elif idx == 3: return 'anger'
|
24 |
+
elif idx == 4: return 'fear'
|
25 |
+
else: return 'surprise'
|
26 |
+
|
27 |
+
def get_tweet(data):
|
28 |
+
tweets = [x['text'] for x in data]
|
29 |
+
labels = [get_label(x['label']) for x in data]
|
30 |
+
return tweets, labels
|
31 |
+
|
32 |
+
def load_dataset():
|
33 |
+
dataset = load_dataset("dair-ai/emotion")
|
34 |
+
train = dataset['train']
|
35 |
+
val = dataset['validation']
|
36 |
+
test = dataset['test']
|
37 |
+
return train, val, test
|
38 |
+
|
39 |
+
|
40 |
+
train, val, test = load_dataset()
|
41 |
+
tweets, labels = get_tweet(train)
|
42 |
+
tokenizer = Tokenizer(num_words=10000,oov_token='<UNK>')
|
43 |
+
tokenizer.fit_on_texts(tweets)
|
44 |
+
padded_train_seq = get_sequences(tokenizer, tweets)
|
45 |
+
classes = set(labels)
|
46 |
+
class_to_index = dict((c,i) for i,c in enumerate(classes))
|
47 |
+
index_to_class = dict((v,k) for k,v in class_to_index.items())
|
48 |
+
names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels])
|
49 |
+
train_labels = names_to_ids(labels)
|
50 |
+
|
51 |
+
@spaces.GPU
|
52 |
+
def load_model():
|
53 |
+
|
54 |
+
|
55 |
+
model = tf.keras.models.Sequential([
|
56 |
+
tf.keras.layers.Embedding(10000, 16, input_length=maxlen),
|
57 |
+
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)),
|
58 |
+
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
|
59 |
+
tf.keras.layers.Dense(6, activation='softmax')
|
60 |
+
])
|
61 |
+
|
62 |
+
model.compile(
|
63 |
+
loss='sparse_categorical_crossentropy',
|
64 |
+
optimizer='adam',
|
65 |
+
metrics=['accuracy']
|
66 |
+
)
|
67 |
+
print(model.summary())
|
68 |
+
val_tweets, val_labels = get_tweet(val)
|
69 |
+
val_seq = get_sequences(tokenizer, val_tweets)
|
70 |
+
val_labels = names_to_ids(val_labels)
|
71 |
+
h = model.fit(
|
72 |
+
padded_train_seq, train_labels, validation_data=(val_seq, val_labels),
|
73 |
+
epochs=20,
|
74 |
+
callbacks=[
|
75 |
+
tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)
|
76 |
+
]
|
77 |
+
)
|
78 |
+
|
79 |
+
test_tweets, test_labels = get_tweet(test)
|
80 |
+
test_seq = get_sequences(tokenizer, test_tweets)
|
81 |
+
test_labels = names_to_ids(test_labels)
|
82 |
+
|
83 |
+
_ = model.evaluate(test_seq, test_labels)
|
84 |
+
i = random.randint(0, len(test_labels) - 1)
|
85 |
+
print('Sentence:', test_tweets[i])
|
86 |
+
print('Emotion:', index_to_class[test_labels[i]])
|
87 |
+
p = model.predict(np.expand_dims(test_seq[i],axis=0))[0]
|
88 |
+
pred_class = index_to_class[np.argmax(p).astype('uint8')]
|
89 |
+
|
90 |
+
print('Predicted Emotion:', pred_class)
|
91 |
+
|
92 |
+
preds = (model.predict(test_seq) > 0.5).astype("int32")
|
93 |
+
print(preds)
|
94 |
+
|
95 |
+
return model
|
96 |
+
|
97 |
+
model = load_model()
|
98 |
+
|
99 |
+
@spaces.GPU
|
100 |
+
def predict(tweet):
|
101 |
+
seq = get_sequences(tokenizer, [tweet])
|
102 |
+
p = model.predict(np.expand_dims(seq[0],axis=0))[0]
|
103 |
+
return index_to_class[np.argmax(p).astype('uint8')]
|
104 |
+
|
105 |
+
|
106 |
|
107 |
def greet(name):
|
108 |
return "Hello " + name + "!!"
|