Spaces:
Sleeping
Sleeping
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn import svm | |
import pandas as pd | |
import nltk | |
from nltk.corpus import stopwords | |
from joblib import dump | |
# Carregando os dados | |
df = pd.read_csv("dataset/ALOY_deep-se.csv") | |
# Tirando os 5 Primeiros | |
df = df.iloc[5:df.shape[0]] | |
# Criando a coluna contexto = titulo + descricao | |
df["context"] = df["title"] + df["description"] | |
# Pré-processamento | |
nltk.download('stopwords') | |
stop = stopwords.words('english') | |
df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)])) | |
# Extração de features | |
vectorizer = TfidfVectorizer() | |
X = vectorizer.fit_transform(df["context"]) | |
y = df["storypoint"] | |
# Modelos | |
model = svm.SVR() | |
model.fit(X, y) | |
dump(vectorizer, "model/vectorizer_tfidf.pkl") | |
dump(model, "model/model_tawos_aloy_tfidfsvm.pkl") | |