Spaces:

giseldo
/

story_point_estimator

Sleeping

App Files Files Community

giseldo commited on Aug 31, 2023

Commit

09b169e

1 Parent(s): f34d18f

ultima versao

Browse files

Files changed (12) hide show

app.py +47 -8
dataset/ALOY_deep-se.csv +0 -0
gerar_modelos/gerar_modelos_mbr.py +0 -22
gerar_modelos/gerar_modelos_neosp.py +0 -45
gerar_modelos/gerar_modelos_tfidflinear.py +0 -36
gerar_modelos/gerar_modelos_tfidfsvm.py +0 -36
model/model_tawos_aloy_mbr.pkl +0 -3
model/model_tawos_aloy_neosp.pkl +0 -3
model/model_tawos_aloy_tfidflinear.pkl +0 -3
model/model_tawos_aloy_tfidfsvm.pkl +0 -3
model/vectorizer_tfidflinear.pkl +0 -3
model/vectorizer_tfidfsvm.pkl +0 -3

app.py CHANGED Viewed

@@ -25,16 +25,32 @@ descricao4 = """During the compile process Alloy will attempt to remove files fr
 titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
 descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links:    * https://github.com/mishoo/UglifyJS2/issues/137  * https://github.com/mishoo/UglifyJS2/issues/138    When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
 def calcula_MbR(titulo, descricao):
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
-    model = load("model/model_tawos_aloy_mbr.pkl")
     story_points_MbR = model.predict(df["context"])
     return story_points_MbR
 def calcula_neosp(titulo, descricao):
-    model = load("model/model_tawos_aloy_neosp.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
@@ -62,21 +78,45 @@ def calcula_neosp(titulo, descricao):
     return story_points
 def calculaTFIDFSVM(titulo, descricao):
-    model = load("model/model_tawos_aloy_tfidfsvm.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
-    vectorizer = load("model/vectorizer_tfidfsvm.pkl")
     X = vectorizer.transform(df["context"])
     story_points = model.predict(X)
     return story_points
 def calculaTFIDFLinear(titulo, descricao):
-    model = load("model/model_tawos_aloy_tfidflinear.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
-    vectorizer = load("model/vectorizer_tfidflinear.pkl")
     X = vectorizer.transform(df["context"])
     story_points = model.predict(X)
     return story_points
@@ -92,7 +132,6 @@ demo = gr.Interface(fn=calcula,
                              gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
                              gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
                     title="Agile Task Story Point Estimator",
-                    examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]]
-                    )
 demo.launch()

 titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
 descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links:    * https://github.com/mishoo/UglifyJS2/issues/137  * https://github.com/mishoo/UglifyJS2/issues/138    When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
+from huggingface_hub import hf_hub_download
+import joblib
 def calcula_MbR(titulo, descricao):
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
+    model = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_mbr.joblib")
+    )
+    #model = load("model/model_tawos_aloy_mbr.pkl")
     story_points_MbR = model.predict(df["context"])
     return story_points_MbR
 def calcula_neosp(titulo, descricao):
+    model = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp.joblib")
+    )
+    # model = load("model/model_tawos_aloy_neosp.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
     return story_points
 def calculaTFIDFSVM(titulo, descricao):
+    model = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidfsvm.joblib")
+    )
+    # model = load("model/model_tawos_aloy_tfidfsvm.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
+    vectorizer = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidfsvm.joblib")
+    )
+    # vectorizer = load("model/vectorizer_tfidfsvm.pkl")
     X = vectorizer.transform(df["context"])
     story_points = model.predict(X)
     return story_points
 def calculaTFIDFLinear(titulo, descricao):
+    model = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidflinear.joblib")
+    )
+    # model = load("model/model_tawos_aloy_tfidflinear.pkl")
     context = titulo + descricao
     d = {"context": [context]}
     df = pd.DataFrame(data=d, columns=["context"])
+    vectorizer = joblib.load(
+	    hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidflinear.joblib")
+    )
+    # vectorizer = load("model/vectorizer_tfidflinear.pkl")
     X = vectorizer.transform(df["context"])
     story_points = model.predict(X)
     return story_points
                              gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
                              gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
                     title="Agile Task Story Point Estimator",
+                    examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]])
 demo.launch()

dataset/ALOY_deep-se.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

gerar_modelos/gerar_modelos_mbr.py DELETED Viewed

@@ -1,22 +0,0 @@
-from sklearn.dummy import DummyRegressor
-import pandas as pd
-from nltk.corpus import stopwords
-from joblib import dump
-# carregando os dados
-df = pd.read_csv("dataset/ALOY_deep-se.csv")
-# Tirando os 5 Primeiros
-df = df.iloc[5:df.shape[0]]
-# criando a coluna contexto = titulo + descricao
-df["context"] = df["title"] + df["description"]
-X = df["storypoint"].index
-y = df["storypoint"]
-model = DummyRegressor(strategy="mean")
-model.fit(X, y)
-dump(model, "model/model_tawos_aloy_mbr.pkl")

gerar_modelos/gerar_modelos_neosp.py DELETED Viewed

@@ -1,45 +0,0 @@
-from textblob import TextBlob
-import textstat
-from sklearn import svm
-import pandas as pd
-import nltk
-from nltk.corpus import stopwords
-from joblib import dump
-# carregando os dados
-df = pd.read_csv("dataset/ALOY_deep-se.csv")
-# Tirando os 5 Primeiros
-df = df.iloc[5:df.shape[0]]
-# criando a coluna contexto = titulo + descricao
-df["context"] = df["title"] + df["description"]
-# pré-processamento
-nltk.download('stopwords')
-stop = stopwords.words('english')
-df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
-# Extração de features
-df["gunning_fog"] = df['context'].apply(textstat.gunning_fog)
-df["flesch_reading_ease"] = df['context'].apply(textstat.flesch_reading_ease)
-df["flesch_kincaid_grade"] = df['context'].apply(textstat.flesch_kincaid_grade)
-df["smog_index"] = df['context'].apply(textstat.smog_index)
-df["coleman_liau_index"] = df['context'].apply(textstat.coleman_liau_index)
-df["automated_readability_index"] = df['context'].apply(textstat.automated_readability_index)
-df["dale_chall_readability_score"] = df['context'].apply(textstat.dale_chall_readability_score)
-df["difficult_words"] = df['context'].apply(textstat.difficult_words)
-df["linsear_write_formula"] = df['context'].apply(textstat.linsear_write_formula)
-df["polarity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.polarity)
-df["subjectivity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
-X = df[["gunning_fog", "flesch_reading_ease", "flesch_kincaid_grade", "smog_index", "coleman_liau_index",
-        "automated_readability_index", "dale_chall_readability_score", "difficult_words", "linsear_write_formula",
-        "polarity", "subjectivity"]]
-y = df["storypoint"]
-# modelo SVR
-model = svm.SVR()
-model.fit(X, y)
-dump(model, "model/model_tawos_aloy_neosp.pkl")

gerar_modelos/gerar_modelos_tfidflinear.py DELETED Viewed

@@ -1,36 +0,0 @@
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LinearRegression
-import pandas as pd
-import nltk
-from nltk.corpus import stopwords
-from joblib import dump
-# Carregando os dados
-df = pd.read_csv("dataset/ALOY_deep-se.csv")
-# Tirando os 5 Primeiros
-df = df.iloc[5:df.shape[0]]
-# Criando a coluna contexto = titulo + descricao
-df["context"] = df["title"] + df["description"]
-# Pré-processamento
-nltk.download('stopwords')
-stop = stopwords.words('english')
-df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
-# Extração de features
-vectorizer = TfidfVectorizer()
-X = vectorizer.fit_transform(df["context"])
-y = df["storypoint"]
-# Modelos
-model = LinearRegression()
-model.fit(X, y)
-dump(vectorizer, "model/vectorizer_tfidflinear.pkl")
-dump(model, "model/model_tawos_aloy_tfidflinear.pkl")

gerar_modelos/gerar_modelos_tfidfsvm.py DELETED Viewed

@@ -1,36 +0,0 @@
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn import svm
-import pandas as pd
-import nltk
-from nltk.corpus import stopwords
-from joblib import dump
-# Carregando os dados
-df = pd.read_csv("dataset/ALOY_deep-se.csv")
-# Tirando os 5 Primeiros
-df = df.iloc[5:df.shape[0]]
-# Criando a coluna contexto = titulo + descricao
-df["context"] = df["title"] + df["description"]
-# Pré-processamento
-nltk.download('stopwords')
-stop = stopwords.words('english')
-df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
-# Extração de features
-vectorizer = TfidfVectorizer()
-X = vectorizer.fit_transform(df["context"])
-y = df["storypoint"]
-# Modelos
-model = svm.SVR()
-model.fit(X, y)
-dump(vectorizer, "model/vectorizer_tfidfsvm.pkl")
-dump(model, "model/model_tawos_aloy_tfidfsvm.pkl")

model/model_tawos_aloy_mbr.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:caf24da2287096a50a79a061a89a97b3754e97a73e761b347209441e4f4a8a5d
-size 383

model/model_tawos_aloy_neosp.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:af24bfeaf71ddd9722eba995ddc99afe0c8b106785dac50818218a3d9d963d83
-size 22883

model/model_tawos_aloy_tfidflinear.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:303f877f464839fa0f901c430ac518c5faccad753e8c59dd9c6e80a613c70bb6
-size 28056

model/model_tawos_aloy_tfidfsvm.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a56d344541d9f8ac65460dd19e9702f8aaafcea26579957f9460ef0b3d0abebf
-size 141035

model/vectorizer_tfidflinear.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
-size 102859

model/vectorizer_tfidfsvm.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:81af13f0ea1e088c522690d9d75968ad1a9307443066e6cc1af99cb2c22f109f
-size 102859