File size: 856 Bytes
1549063
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
import pandas as pd
import nltk
from nltk.corpus import stopwords
from joblib import dump

# Carregando os dados
df = pd.read_csv("dataset/ALOY_deep-se.csv")

# Tirando os 5 Primeiros
df = df.iloc[5:df.shape[0]]

# Criando a coluna contexto = titulo + descricao
df["context"] = df["title"] + df["description"]

# Pré-processamento
nltk.download('stopwords')
stop = stopwords.words('english')
df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

# Extração de features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["context"])
y = df["storypoint"]

# Modelos
model = svm.SVR()
model.fit(X, y)

dump(vectorizer, "model/vectorizer_tfidf.pkl")
dump(model, "model/model_tawos_aloy_tfidfsvm.pkl")