Voldemort commited on
Commit
20d0ef5
·
1 Parent(s): 4b73d8f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from tensorflow.keras.preprocessing.text import Tokenizer
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ from tensorflow.keras.models import Sequential
6
+
7
+ import json
8
+
9
+
10
+ test_div = 0.75
11
+
12
+ vocab_size = 10000
13
+ embedding_dim = 16
14
+ max_length = 100
15
+ trunc_type = 'post'
16
+ padding_type = 'post'
17
+ oov_tok = "<OOV>"
18
+
19
+ sentences = [
20
+ 'Wow this AI is astonishing',
21
+ 'This is the worst AI',
22
+ 'This is the best AI',
23
+ 'I am the best AI',
24
+ 'It is very astonishing that we can train a model on any data we have',
25
+ ]
26
+
27
+
28
+ headlines = []
29
+ is_sarcastic = []
30
+ article_link = []
31
+
32
+ with open('Sarcasm_Headlines_Dataset.json', 'r') as f:
33
+ data = json.load(f)
34
+
35
+
36
+ for i in data:
37
+ headlines.append(i['headline'])
38
+ is_sarcastic.append(i['is_sarcastic'])
39
+ article_link.append(i['article_link'])
40
+
41
+
42
+ train_data = headlines[:int(len(headlines) * test_div)]
43
+ train_result = is_sarcastic[:int(len(is_sarcastic) * test_div)]
44
+ test_data = headlines[int(len(headlines) * test_div):]
45
+ test_result = is_sarcastic[int(len(is_sarcastic) * test_div):]
46
+
47
+
48
+ tokenizer = Tokenizer(num_words=10000, oov_token=oov_tok)
49
+ tokenizer.fit_on_texts(train_data)
50
+
51
+ word_index = tokenizer.word_index
52
+
53
+ train_sequences = tokenizer.texts_to_sequences(train_data)
54
+ test_sequences = tokenizer.texts_to_sequences(test_data)
55
+ train_padded = pad_sequences(
56
+ train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
57
+ test_padded = pad_sequences(
58
+ test_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
59
+
60
+ training_padded = np.array(train_padded)
61
+ training_labels = np.array(train_result)
62
+ testing_padded = np.array(test_padded)
63
+ testing_labels = np.array(test_result)
64
+
65
+
66
+ model = Sequential([
67
+ tf.keras.layers.Embedding(
68
+ vocab_size, embedding_dim, input_length=max_length),
69
+ tf.keras.layers.GlobalAveragePooling1D(),
70
+ tf.keras.layers.Dense(24, activation='relu'),
71
+ tf.keras.layers.Dense(1, activation='sigmoid')
72
+ ])
73
+ model.compile(loss='binary_crossentropy',
74
+ optimizer='adam', metrics=['accuracy'])
75
+
76
+ model.summary()
77
+ num_epochs = 30
78
+ history = model.fit(training_padded, training_labels, epochs=num_epochs,
79
+ validation_data=(testing_padded, testing_labels), verbose=2)
80
+
81
+ sentence = ["granny starting to fear spiders in the garden might be real",
82
+ "game of thrones season finale showing this sunday night",
83
+ "Central Valley Coalition Suing the EPA Over Clean Air Failures"]
84
+ sequences = tokenizer.texts_to_sequences(sentence)
85
+ padded = pad_sequences(sequences, maxlen=max_length,
86
+ padding=padding_type, truncating=trunc_type)
87
+ print(model.predict(padded))