giseldo commited on
Commit
09b169e
·
1 Parent(s): f34d18f

ultima versao

Browse files
app.py CHANGED
@@ -25,16 +25,32 @@ descricao4 = """During the compile process Alloy will attempt to remove files fr
25
  titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
26
  descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
27
 
 
 
 
 
28
  def calcula_MbR(titulo, descricao):
29
  context = titulo + descricao
30
  d = {"context": [context]}
31
  df = pd.DataFrame(data=d, columns=["context"])
32
- model = load("model/model_tawos_aloy_mbr.pkl")
 
 
 
 
 
 
33
  story_points_MbR = model.predict(df["context"])
34
  return story_points_MbR
35
 
36
  def calcula_neosp(titulo, descricao):
37
- model = load("model/model_tawos_aloy_neosp.pkl")
 
 
 
 
 
 
38
  context = titulo + descricao
39
  d = {"context": [context]}
40
  df = pd.DataFrame(data=d, columns=["context"])
@@ -62,21 +78,45 @@ def calcula_neosp(titulo, descricao):
62
  return story_points
63
 
64
  def calculaTFIDFSVM(titulo, descricao):
65
- model = load("model/model_tawos_aloy_tfidfsvm.pkl")
 
 
 
 
 
 
66
  context = titulo + descricao
67
  d = {"context": [context]}
68
  df = pd.DataFrame(data=d, columns=["context"])
69
- vectorizer = load("model/vectorizer_tfidfsvm.pkl")
 
 
 
 
 
 
70
  X = vectorizer.transform(df["context"])
71
  story_points = model.predict(X)
72
  return story_points
73
 
74
  def calculaTFIDFLinear(titulo, descricao):
75
- model = load("model/model_tawos_aloy_tfidflinear.pkl")
 
 
 
 
 
 
76
  context = titulo + descricao
77
  d = {"context": [context]}
78
  df = pd.DataFrame(data=d, columns=["context"])
79
- vectorizer = load("model/vectorizer_tfidflinear.pkl")
 
 
 
 
 
 
80
  X = vectorizer.transform(df["context"])
81
  story_points = model.predict(X)
82
  return story_points
@@ -92,7 +132,6 @@ demo = gr.Interface(fn=calcula,
92
  gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
93
  gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
94
  title="Agile Task Story Point Estimator",
95
- examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]]
96
- )
97
 
98
  demo.launch()
 
25
  titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
26
  descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
27
 
28
+
29
+ from huggingface_hub import hf_hub_download
30
+ import joblib
31
+
32
  def calcula_MbR(titulo, descricao):
33
  context = titulo + descricao
34
  d = {"context": [context]}
35
  df = pd.DataFrame(data=d, columns=["context"])
36
+
37
+ model = joblib.load(
38
+ hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_mbr.joblib")
39
+ )
40
+
41
+ #model = load("model/model_tawos_aloy_mbr.pkl")
42
+
43
  story_points_MbR = model.predict(df["context"])
44
  return story_points_MbR
45
 
46
  def calcula_neosp(titulo, descricao):
47
+
48
+ model = joblib.load(
49
+ hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp.joblib")
50
+ )
51
+
52
+ # model = load("model/model_tawos_aloy_neosp.pkl")
53
+
54
  context = titulo + descricao
55
  d = {"context": [context]}
56
  df = pd.DataFrame(data=d, columns=["context"])
 
78
  return story_points
79
 
80
  def calculaTFIDFSVM(titulo, descricao):
81
+
82
+ model = joblib.load(
83
+ hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidfsvm.joblib")
84
+ )
85
+
86
+ # model = load("model/model_tawos_aloy_tfidfsvm.pkl")
87
+
88
  context = titulo + descricao
89
  d = {"context": [context]}
90
  df = pd.DataFrame(data=d, columns=["context"])
91
+
92
+ vectorizer = joblib.load(
93
+ hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidfsvm.joblib")
94
+ )
95
+
96
+ # vectorizer = load("model/vectorizer_tfidfsvm.pkl")
97
+
98
  X = vectorizer.transform(df["context"])
99
  story_points = model.predict(X)
100
  return story_points
101
 
102
  def calculaTFIDFLinear(titulo, descricao):
103
+
104
+ model = joblib.load(
105
+ hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidflinear.joblib")
106
+ )
107
+
108
+ # model = load("model/model_tawos_aloy_tfidflinear.pkl")
109
+
110
  context = titulo + descricao
111
  d = {"context": [context]}
112
  df = pd.DataFrame(data=d, columns=["context"])
113
+
114
+ vectorizer = joblib.load(
115
+ hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidflinear.joblib")
116
+ )
117
+
118
+ # vectorizer = load("model/vectorizer_tfidflinear.pkl")
119
+
120
  X = vectorizer.transform(df["context"])
121
  story_points = model.predict(X)
122
  return story_points
 
132
  gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
133
  gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
134
  title="Agile Task Story Point Estimator",
135
+ examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]])
 
136
 
137
  demo.launch()
dataset/ALOY_deep-se.csv DELETED
The diff for this file is too large to render. See raw diff
 
gerar_modelos/gerar_modelos_mbr.py DELETED
@@ -1,22 +0,0 @@
1
- from sklearn.dummy import DummyRegressor
2
- import pandas as pd
3
- from nltk.corpus import stopwords
4
- from joblib import dump
5
-
6
- # carregando os dados
7
- df = pd.read_csv("dataset/ALOY_deep-se.csv")
8
-
9
- # Tirando os 5 Primeiros
10
- df = df.iloc[5:df.shape[0]]
11
-
12
- # criando a coluna contexto = titulo + descricao
13
- df["context"] = df["title"] + df["description"]
14
-
15
- X = df["storypoint"].index
16
- y = df["storypoint"]
17
-
18
- model = DummyRegressor(strategy="mean")
19
- model.fit(X, y)
20
-
21
- dump(model, "model/model_tawos_aloy_mbr.pkl")
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gerar_modelos/gerar_modelos_neosp.py DELETED
@@ -1,45 +0,0 @@
1
- from textblob import TextBlob
2
- import textstat
3
- from sklearn import svm
4
- import pandas as pd
5
- import nltk
6
- from nltk.corpus import stopwords
7
- from joblib import dump
8
-
9
- # carregando os dados
10
- df = pd.read_csv("dataset/ALOY_deep-se.csv")
11
-
12
- # Tirando os 5 Primeiros
13
- df = df.iloc[5:df.shape[0]]
14
-
15
- # criando a coluna contexto = titulo + descricao
16
- df["context"] = df["title"] + df["description"]
17
-
18
- # pré-processamento
19
- nltk.download('stopwords')
20
- stop = stopwords.words('english')
21
- df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
22
-
23
- # Extração de features
24
- df["gunning_fog"] = df['context'].apply(textstat.gunning_fog)
25
- df["flesch_reading_ease"] = df['context'].apply(textstat.flesch_reading_ease)
26
- df["flesch_kincaid_grade"] = df['context'].apply(textstat.flesch_kincaid_grade)
27
- df["smog_index"] = df['context'].apply(textstat.smog_index)
28
- df["coleman_liau_index"] = df['context'].apply(textstat.coleman_liau_index)
29
- df["automated_readability_index"] = df['context'].apply(textstat.automated_readability_index)
30
- df["dale_chall_readability_score"] = df['context'].apply(textstat.dale_chall_readability_score)
31
- df["difficult_words"] = df['context'].apply(textstat.difficult_words)
32
- df["linsear_write_formula"] = df['context'].apply(textstat.linsear_write_formula)
33
- df["polarity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.polarity)
34
- df["subjectivity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
35
-
36
- X = df[["gunning_fog", "flesch_reading_ease", "flesch_kincaid_grade", "smog_index", "coleman_liau_index",
37
- "automated_readability_index", "dale_chall_readability_score", "difficult_words", "linsear_write_formula",
38
- "polarity", "subjectivity"]]
39
- y = df["storypoint"]
40
-
41
- # modelo SVR
42
- model = svm.SVR()
43
- model.fit(X, y)
44
-
45
- dump(model, "model/model_tawos_aloy_neosp.pkl")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gerar_modelos/gerar_modelos_tfidflinear.py DELETED
@@ -1,36 +0,0 @@
1
- from sklearn.feature_extraction.text import TfidfVectorizer
2
- from sklearn.linear_model import LinearRegression
3
- import pandas as pd
4
- import nltk
5
- from nltk.corpus import stopwords
6
- from joblib import dump
7
-
8
- # Carregando os dados
9
- df = pd.read_csv("dataset/ALOY_deep-se.csv")
10
-
11
- # Tirando os 5 Primeiros
12
- df = df.iloc[5:df.shape[0]]
13
-
14
- # Criando a coluna contexto = titulo + descricao
15
- df["context"] = df["title"] + df["description"]
16
-
17
- # Pré-processamento
18
- nltk.download('stopwords')
19
- stop = stopwords.words('english')
20
- df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
21
-
22
- # Extração de features
23
- vectorizer = TfidfVectorizer()
24
- X = vectorizer.fit_transform(df["context"])
25
- y = df["storypoint"]
26
-
27
- # Modelos
28
- model = LinearRegression()
29
- model.fit(X, y)
30
-
31
- dump(vectorizer, "model/vectorizer_tfidflinear.pkl")
32
- dump(model, "model/model_tawos_aloy_tfidflinear.pkl")
33
-
34
-
35
-
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gerar_modelos/gerar_modelos_tfidfsvm.py DELETED
@@ -1,36 +0,0 @@
1
- from sklearn.feature_extraction.text import TfidfVectorizer
2
- from sklearn import svm
3
- import pandas as pd
4
- import nltk
5
- from nltk.corpus import stopwords
6
- from joblib import dump
7
-
8
- # Carregando os dados
9
- df = pd.read_csv("dataset/ALOY_deep-se.csv")
10
-
11
- # Tirando os 5 Primeiros
12
- df = df.iloc[5:df.shape[0]]
13
-
14
- # Criando a coluna contexto = titulo + descricao
15
- df["context"] = df["title"] + df["description"]
16
-
17
- # Pré-processamento
18
- nltk.download('stopwords')
19
- stop = stopwords.words('english')
20
- df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
21
-
22
- # Extração de features
23
- vectorizer = TfidfVectorizer()
24
- X = vectorizer.fit_transform(df["context"])
25
- y = df["storypoint"]
26
-
27
- # Modelos
28
- model = svm.SVR()
29
- model.fit(X, y)
30
-
31
- dump(vectorizer, "model/vectorizer_tfidfsvm.pkl")
32
- dump(model, "model/model_tawos_aloy_tfidfsvm.pkl")
33
-
34
-
35
-
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/model_tawos_aloy_mbr.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:caf24da2287096a50a79a061a89a97b3754e97a73e761b347209441e4f4a8a5d
3
- size 383
 
 
 
 
model/model_tawos_aloy_neosp.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af24bfeaf71ddd9722eba995ddc99afe0c8b106785dac50818218a3d9d963d83
3
- size 22883
 
 
 
 
model/model_tawos_aloy_tfidflinear.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:303f877f464839fa0f901c430ac518c5faccad753e8c59dd9c6e80a613c70bb6
3
- size 28056
 
 
 
 
model/model_tawos_aloy_tfidfsvm.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a56d344541d9f8ac65460dd19e9702f8aaafcea26579957f9460ef0b3d0abebf
3
- size 141035
 
 
 
 
model/vectorizer_tfidflinear.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
3
- size 102859
 
 
 
 
model/vectorizer_tfidfsvm.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
3
- size 102859