Spaces:
Sleeping
Sleeping
ultima versao
Browse files- app.py +47 -8
- dataset/ALOY_deep-se.csv +0 -0
- gerar_modelos/gerar_modelos_mbr.py +0 -22
- gerar_modelos/gerar_modelos_neosp.py +0 -45
- gerar_modelos/gerar_modelos_tfidflinear.py +0 -36
- gerar_modelos/gerar_modelos_tfidfsvm.py +0 -36
- model/model_tawos_aloy_mbr.pkl +0 -3
- model/model_tawos_aloy_neosp.pkl +0 -3
- model/model_tawos_aloy_tfidflinear.pkl +0 -3
- model/model_tawos_aloy_tfidfsvm.pkl +0 -3
- model/vectorizer_tfidflinear.pkl +0 -3
- model/vectorizer_tfidfsvm.pkl +0 -3
app.py
CHANGED
@@ -25,16 +25,32 @@ descricao4 = """During the compile process Alloy will attempt to remove files fr
|
|
25 |
titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
|
26 |
descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
|
27 |
|
|
|
|
|
|
|
|
|
28 |
def calcula_MbR(titulo, descricao):
|
29 |
context = titulo + descricao
|
30 |
d = {"context": [context]}
|
31 |
df = pd.DataFrame(data=d, columns=["context"])
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
story_points_MbR = model.predict(df["context"])
|
34 |
return story_points_MbR
|
35 |
|
36 |
def calcula_neosp(titulo, descricao):
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
context = titulo + descricao
|
39 |
d = {"context": [context]}
|
40 |
df = pd.DataFrame(data=d, columns=["context"])
|
@@ -62,21 +78,45 @@ def calcula_neosp(titulo, descricao):
|
|
62 |
return story_points
|
63 |
|
64 |
def calculaTFIDFSVM(titulo, descricao):
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
context = titulo + descricao
|
67 |
d = {"context": [context]}
|
68 |
df = pd.DataFrame(data=d, columns=["context"])
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
X = vectorizer.transform(df["context"])
|
71 |
story_points = model.predict(X)
|
72 |
return story_points
|
73 |
|
74 |
def calculaTFIDFLinear(titulo, descricao):
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
context = titulo + descricao
|
77 |
d = {"context": [context]}
|
78 |
df = pd.DataFrame(data=d, columns=["context"])
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
X = vectorizer.transform(df["context"])
|
81 |
story_points = model.predict(X)
|
82 |
return story_points
|
@@ -92,7 +132,6 @@ demo = gr.Interface(fn=calcula,
|
|
92 |
gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
|
93 |
gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
|
94 |
title="Agile Task Story Point Estimator",
|
95 |
-
examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]]
|
96 |
-
)
|
97 |
|
98 |
demo.launch()
|
|
|
25 |
titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
|
26 |
descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
|
27 |
|
28 |
+
|
29 |
+
from huggingface_hub import hf_hub_download
|
30 |
+
import joblib
|
31 |
+
|
32 |
def calcula_MbR(titulo, descricao):
|
33 |
context = titulo + descricao
|
34 |
d = {"context": [context]}
|
35 |
df = pd.DataFrame(data=d, columns=["context"])
|
36 |
+
|
37 |
+
model = joblib.load(
|
38 |
+
hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_mbr.joblib")
|
39 |
+
)
|
40 |
+
|
41 |
+
#model = load("model/model_tawos_aloy_mbr.pkl")
|
42 |
+
|
43 |
story_points_MbR = model.predict(df["context"])
|
44 |
return story_points_MbR
|
45 |
|
46 |
def calcula_neosp(titulo, descricao):
|
47 |
+
|
48 |
+
model = joblib.load(
|
49 |
+
hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp.joblib")
|
50 |
+
)
|
51 |
+
|
52 |
+
# model = load("model/model_tawos_aloy_neosp.pkl")
|
53 |
+
|
54 |
context = titulo + descricao
|
55 |
d = {"context": [context]}
|
56 |
df = pd.DataFrame(data=d, columns=["context"])
|
|
|
78 |
return story_points
|
79 |
|
80 |
def calculaTFIDFSVM(titulo, descricao):
|
81 |
+
|
82 |
+
model = joblib.load(
|
83 |
+
hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidfsvm.joblib")
|
84 |
+
)
|
85 |
+
|
86 |
+
# model = load("model/model_tawos_aloy_tfidfsvm.pkl")
|
87 |
+
|
88 |
context = titulo + descricao
|
89 |
d = {"context": [context]}
|
90 |
df = pd.DataFrame(data=d, columns=["context"])
|
91 |
+
|
92 |
+
vectorizer = joblib.load(
|
93 |
+
hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidfsvm.joblib")
|
94 |
+
)
|
95 |
+
|
96 |
+
# vectorizer = load("model/vectorizer_tfidfsvm.pkl")
|
97 |
+
|
98 |
X = vectorizer.transform(df["context"])
|
99 |
story_points = model.predict(X)
|
100 |
return story_points
|
101 |
|
102 |
def calculaTFIDFLinear(titulo, descricao):
|
103 |
+
|
104 |
+
model = joblib.load(
|
105 |
+
hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidflinear.joblib")
|
106 |
+
)
|
107 |
+
|
108 |
+
# model = load("model/model_tawos_aloy_tfidflinear.pkl")
|
109 |
+
|
110 |
context = titulo + descricao
|
111 |
d = {"context": [context]}
|
112 |
df = pd.DataFrame(data=d, columns=["context"])
|
113 |
+
|
114 |
+
vectorizer = joblib.load(
|
115 |
+
hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidflinear.joblib")
|
116 |
+
)
|
117 |
+
|
118 |
+
# vectorizer = load("model/vectorizer_tfidflinear.pkl")
|
119 |
+
|
120 |
X = vectorizer.transform(df["context"])
|
121 |
story_points = model.predict(X)
|
122 |
return story_points
|
|
|
132 |
gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
|
133 |
gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
|
134 |
title="Agile Task Story Point Estimator",
|
135 |
+
examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]])
|
|
|
136 |
|
137 |
demo.launch()
|
dataset/ALOY_deep-se.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
gerar_modelos/gerar_modelos_mbr.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
from sklearn.dummy import DummyRegressor
|
2 |
-
import pandas as pd
|
3 |
-
from nltk.corpus import stopwords
|
4 |
-
from joblib import dump
|
5 |
-
|
6 |
-
# carregando os dados
|
7 |
-
df = pd.read_csv("dataset/ALOY_deep-se.csv")
|
8 |
-
|
9 |
-
# Tirando os 5 Primeiros
|
10 |
-
df = df.iloc[5:df.shape[0]]
|
11 |
-
|
12 |
-
# criando a coluna contexto = titulo + descricao
|
13 |
-
df["context"] = df["title"] + df["description"]
|
14 |
-
|
15 |
-
X = df["storypoint"].index
|
16 |
-
y = df["storypoint"]
|
17 |
-
|
18 |
-
model = DummyRegressor(strategy="mean")
|
19 |
-
model.fit(X, y)
|
20 |
-
|
21 |
-
dump(model, "model/model_tawos_aloy_mbr.pkl")
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gerar_modelos/gerar_modelos_neosp.py
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
from textblob import TextBlob
|
2 |
-
import textstat
|
3 |
-
from sklearn import svm
|
4 |
-
import pandas as pd
|
5 |
-
import nltk
|
6 |
-
from nltk.corpus import stopwords
|
7 |
-
from joblib import dump
|
8 |
-
|
9 |
-
# carregando os dados
|
10 |
-
df = pd.read_csv("dataset/ALOY_deep-se.csv")
|
11 |
-
|
12 |
-
# Tirando os 5 Primeiros
|
13 |
-
df = df.iloc[5:df.shape[0]]
|
14 |
-
|
15 |
-
# criando a coluna contexto = titulo + descricao
|
16 |
-
df["context"] = df["title"] + df["description"]
|
17 |
-
|
18 |
-
# pré-processamento
|
19 |
-
nltk.download('stopwords')
|
20 |
-
stop = stopwords.words('english')
|
21 |
-
df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
|
22 |
-
|
23 |
-
# Extração de features
|
24 |
-
df["gunning_fog"] = df['context'].apply(textstat.gunning_fog)
|
25 |
-
df["flesch_reading_ease"] = df['context'].apply(textstat.flesch_reading_ease)
|
26 |
-
df["flesch_kincaid_grade"] = df['context'].apply(textstat.flesch_kincaid_grade)
|
27 |
-
df["smog_index"] = df['context'].apply(textstat.smog_index)
|
28 |
-
df["coleman_liau_index"] = df['context'].apply(textstat.coleman_liau_index)
|
29 |
-
df["automated_readability_index"] = df['context'].apply(textstat.automated_readability_index)
|
30 |
-
df["dale_chall_readability_score"] = df['context'].apply(textstat.dale_chall_readability_score)
|
31 |
-
df["difficult_words"] = df['context'].apply(textstat.difficult_words)
|
32 |
-
df["linsear_write_formula"] = df['context'].apply(textstat.linsear_write_formula)
|
33 |
-
df["polarity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.polarity)
|
34 |
-
df["subjectivity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
|
35 |
-
|
36 |
-
X = df[["gunning_fog", "flesch_reading_ease", "flesch_kincaid_grade", "smog_index", "coleman_liau_index",
|
37 |
-
"automated_readability_index", "dale_chall_readability_score", "difficult_words", "linsear_write_formula",
|
38 |
-
"polarity", "subjectivity"]]
|
39 |
-
y = df["storypoint"]
|
40 |
-
|
41 |
-
# modelo SVR
|
42 |
-
model = svm.SVR()
|
43 |
-
model.fit(X, y)
|
44 |
-
|
45 |
-
dump(model, "model/model_tawos_aloy_neosp.pkl")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gerar_modelos/gerar_modelos_tfidflinear.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
2 |
-
from sklearn.linear_model import LinearRegression
|
3 |
-
import pandas as pd
|
4 |
-
import nltk
|
5 |
-
from nltk.corpus import stopwords
|
6 |
-
from joblib import dump
|
7 |
-
|
8 |
-
# Carregando os dados
|
9 |
-
df = pd.read_csv("dataset/ALOY_deep-se.csv")
|
10 |
-
|
11 |
-
# Tirando os 5 Primeiros
|
12 |
-
df = df.iloc[5:df.shape[0]]
|
13 |
-
|
14 |
-
# Criando a coluna contexto = titulo + descricao
|
15 |
-
df["context"] = df["title"] + df["description"]
|
16 |
-
|
17 |
-
# Pré-processamento
|
18 |
-
nltk.download('stopwords')
|
19 |
-
stop = stopwords.words('english')
|
20 |
-
df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
|
21 |
-
|
22 |
-
# Extração de features
|
23 |
-
vectorizer = TfidfVectorizer()
|
24 |
-
X = vectorizer.fit_transform(df["context"])
|
25 |
-
y = df["storypoint"]
|
26 |
-
|
27 |
-
# Modelos
|
28 |
-
model = LinearRegression()
|
29 |
-
model.fit(X, y)
|
30 |
-
|
31 |
-
dump(vectorizer, "model/vectorizer_tfidflinear.pkl")
|
32 |
-
dump(model, "model/model_tawos_aloy_tfidflinear.pkl")
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gerar_modelos/gerar_modelos_tfidfsvm.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
2 |
-
from sklearn import svm
|
3 |
-
import pandas as pd
|
4 |
-
import nltk
|
5 |
-
from nltk.corpus import stopwords
|
6 |
-
from joblib import dump
|
7 |
-
|
8 |
-
# Carregando os dados
|
9 |
-
df = pd.read_csv("dataset/ALOY_deep-se.csv")
|
10 |
-
|
11 |
-
# Tirando os 5 Primeiros
|
12 |
-
df = df.iloc[5:df.shape[0]]
|
13 |
-
|
14 |
-
# Criando a coluna contexto = titulo + descricao
|
15 |
-
df["context"] = df["title"] + df["description"]
|
16 |
-
|
17 |
-
# Pré-processamento
|
18 |
-
nltk.download('stopwords')
|
19 |
-
stop = stopwords.words('english')
|
20 |
-
df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
|
21 |
-
|
22 |
-
# Extração de features
|
23 |
-
vectorizer = TfidfVectorizer()
|
24 |
-
X = vectorizer.fit_transform(df["context"])
|
25 |
-
y = df["storypoint"]
|
26 |
-
|
27 |
-
# Modelos
|
28 |
-
model = svm.SVR()
|
29 |
-
model.fit(X, y)
|
30 |
-
|
31 |
-
dump(vectorizer, "model/vectorizer_tfidfsvm.pkl")
|
32 |
-
dump(model, "model/model_tawos_aloy_tfidfsvm.pkl")
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model/model_tawos_aloy_mbr.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:caf24da2287096a50a79a061a89a97b3754e97a73e761b347209441e4f4a8a5d
|
3 |
-
size 383
|
|
|
|
|
|
|
|
model/model_tawos_aloy_neosp.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:af24bfeaf71ddd9722eba995ddc99afe0c8b106785dac50818218a3d9d963d83
|
3 |
-
size 22883
|
|
|
|
|
|
|
|
model/model_tawos_aloy_tfidflinear.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:303f877f464839fa0f901c430ac518c5faccad753e8c59dd9c6e80a613c70bb6
|
3 |
-
size 28056
|
|
|
|
|
|
|
|
model/model_tawos_aloy_tfidfsvm.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a56d344541d9f8ac65460dd19e9702f8aaafcea26579957f9460ef0b3d0abebf
|
3 |
-
size 141035
|
|
|
|
|
|
|
|
model/vectorizer_tfidflinear.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
|
3 |
-
size 102859
|
|
|
|
|
|
|
|
model/vectorizer_tfidfsvm.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
|
3 |
-
size 102859
|
|
|
|
|
|
|
|