KarthikaRajagopal commited on
Commit
4b87470
·
verified ·
1 Parent(s): e45a747

Upload fake_news_model.py

Browse files
Files changed (1) hide show
  1. fake_news_model.py +64 -0
fake_news_model.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import tensorflow as tf
3
+ from tensorflow.keras.models import Sequential
4
+ from tensorflow.keras.layers import Embedding, Dense
5
+ from tensorflow.keras.layers import LSTM, Activation, SpatialDropout1D
6
+ from tensorflow.keras.preprocessing.text import one_hot
7
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
8
+ from sklearn.model_selection import train_test_split
9
+ from nltk.corpus import stopwords
10
+ from nltk.stem import WordNetLemmatizer
11
+ import matplotlib.pyplot as plt
12
+ import pandas as pd
13
+ import numpy as np
14
+ import re
15
+
16
+ train_dir = "data/train.csv"
17
+ df = pd.read_csv(train_dir)
18
+
19
+ df = df.dropna()
20
+ df = df.reset_index()
21
+ X = df.drop(labels=['label', 'id'], axis=1)
22
+ y = df['label']
23
+
24
+ xdata = X.copy()
25
+ xdata.reset_index(inplace=True)
26
+
27
+ lemmatizer = WordNetLemmatizer()
28
+ stop_words = stopwords.words('english')
29
+
30
+ xtitle = []
31
+ for i in range(len(xdata)):
32
+ sent = re.sub('[^a-zA-Z]', ' ', xdata['title'][i])
33
+ sent = sent.lower().split()
34
+ sent = [lemmatizer.lemmatize(word) for word in sent if word not in set(stop_words)]
35
+ sent = " ".join(sent)
36
+ xtitle.append(sent)
37
+
38
+ vocab_size = 5000
39
+ embedding_feature_len = 30
40
+ max_sent_len = 20
41
+ batch_size = 32
42
+ epochs = 10
43
+
44
+ one_hot_representation = [one_hot(words, vocab_size) for words in xtitle]
45
+ padded_sequences = pad_sequences(one_hot_representation, truncating="post",
46
+ padding="post", maxlen=max_sent_len)
47
+
48
+ X = np.array(padded_sequences)
49
+ y = np.array(y)
50
+
51
+ x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
52
+
53
+ model = Sequential()
54
+ model.add(Embedding(vocab_size, embedding_feature_len, input_length=max_sent_len))
55
+ model.add(SpatialDropout1D(rate=0.2))
56
+ model.add(LSTM(units=128))
57
+ model.add(Dense(units=1))
58
+ model.add(Activation("sigmoid"))
59
+ model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
60
+ print(model.summary())
61
+
62
+ hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))
63
+
64
+ y_pred = model.predict_classes(x_test)