giseldo commited on
Commit
a6e7f6b
·
1 Parent(s): a599e86

ultima versao

Browse files
Files changed (5) hide show
  1. __pycache__/util.cpython-310.pyc +0 -0
  2. app.py +105 -57
  3. flagged/log.csv +2 -0
  4. requirements.txt +1 -0
  5. util.py +60 -0
__pycache__/util.cpython-310.pyc ADDED
Binary file (2.54 kB). View file
 
app.py CHANGED
@@ -4,6 +4,13 @@ from textblob import TextBlob
4
  import textstat
5
  from huggingface_hub import hf_hub_download
6
  from joblib import load
 
 
 
 
 
 
 
7
 
8
  titulo1 = """CLONE - Studio Dashboard: "default" and "Default Project" does not give clear information about Alloy and Project unless description is read."""
9
  descricao1 = """Steps To Reproduce: 1. On dashboard on studio 3.0, navigate to Develop tab. 2. Notice "default" and "Default Project" & "two-tabbed" and "Tabbed Application" names. Actual: User does not get clear information from names that one is alloy project and another one is Titanium project unless he reads the description below. Expected: Naming convention or icon corresponding must suggest type"""
@@ -20,104 +27,144 @@ descricao4 = """During the compile process Alloy will attempt to remove files fr
20
  titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
21
  descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
22
 
23
- def calcula_MbR(titulo, descricao):
24
  context = titulo + descricao
25
- d = {"context": [context]}
26
- df = pd.DataFrame(data=d, columns=["context"])
27
- model = load(hf_hub_download("model_effort_tawos", "model_tawos_aloy_mbr.joblib"))
28
- story_points_MbR = model.predict(df["context"])
29
  return story_points_MbR
30
 
31
- def calcula_Median(titulo, descricao):
32
  context = titulo + descricao
33
- d = {"context": [context]}
34
- df = pd.DataFrame(data=d, columns=["context"])
35
- model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_median.joblib"))
36
- story_points_MbR = model.predict(df["context"])
37
  return story_points_MbR
38
 
39
- def calcula_NEOSP_SVR(titulo, descricao):
40
- model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp_svr.joblib"))
 
 
41
  context = titulo + descricao
42
  d = {"context": [context]}
43
  df = pd.DataFrame(data=d, columns=["context"])
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # features de legibilidade
46
- df["gunning_fog"] = df['context'].apply(textstat.gunning_fog)#
47
- df["flesch_reading_ease"] = df['context'].apply(textstat.flesch_reading_ease)#
48
- df["flesch_kincaid_grade"] = df['context'].apply(textstat.flesch_kincaid_grade)#
49
- df["smog_index"] = df['context'].apply(textstat.smog_index)
50
- df["coleman_liau_index"] = df['context'].apply(textstat.coleman_liau_index)#
51
- df["automated_readability_index"] = df['context'].apply(textstat.automated_readability_index) #
52
- df["dale_chall_readability_score"] = df['context'].apply(textstat.dale_chall_readability_score)#
53
- df["difficult_words"] = df['context'].apply(textstat.difficult_words)
54
- df["linsear_write_formula"] = df['context'].apply(textstat.linsear_write_formula)#
55
 
56
  # feature de sentimento
57
- df["polarity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.polarity)
58
- df["subjectivity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
59
 
60
- X = df[["gunning_fog", "flesch_reading_ease", "flesch_kincaid_grade", "smog_index", "coleman_liau_index",
61
- "automated_readability_index", "dale_chall_readability_score", "difficult_words", "linsear_write_formula",
62
- "polarity", "subjectivity"]]
63
 
64
  story_points = model.predict(X)
65
  return story_points
66
 
67
- def calcula_NEOSP_Linear(titulo, descricao):
68
- model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp_linear.joblib"))
 
69
  context = titulo + descricao
70
  d = {"context": [context]}
71
  df = pd.DataFrame(data=d, columns=["context"])
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # features de legibilidade
74
- df["gunning_fog"] = df['context'].apply(textstat.gunning_fog)#
75
- df["flesch_reading_ease"] = df['context'].apply(textstat.flesch_reading_ease)#
76
- df["flesch_kincaid_grade"] = df['context'].apply(textstat.flesch_kincaid_grade)#
77
- df["smog_index"] = df['context'].apply(textstat.smog_index)
78
- df["coleman_liau_index"] = df['context'].apply(textstat.coleman_liau_index)#
79
- df["automated_readability_index"] = df['context'].apply(textstat.automated_readability_index) #
80
- df["dale_chall_readability_score"] = df['context'].apply(textstat.dale_chall_readability_score)#
81
- df["difficult_words"] = df['context'].apply(textstat.difficult_words)
82
- df["linsear_write_formula"] = df['context'].apply(textstat.linsear_write_formula)#
83
 
84
  # feature de sentimento
85
- df["polarity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.polarity)
86
- df["subjectivity"] = df["context"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
87
 
88
- X = df[["gunning_fog", "flesch_reading_ease", "flesch_kincaid_grade", "smog_index", "coleman_liau_index",
89
- "automated_readability_index", "dale_chall_readability_score", "difficult_words", "linsear_write_formula",
90
- "polarity", "subjectivity"]]
91
 
92
  story_points = model.predict(X)
93
  return story_points
94
 
95
- def calcula_TFIDF_SVR(titulo, descricao):
96
- model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidf_svr.joblib"))
97
  context = titulo + descricao
98
- d = {"context": [context]}
99
- df = pd.DataFrame(data=d, columns=["context"])
100
- vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidf.joblib"))
101
- X = vectorizer.transform(df["context"])
 
 
102
  story_points = model.predict(X)
103
  return story_points
104
 
105
- def calcula_TFIDF_Linear(titulo, descricao):
106
- model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidf_linear.joblib"))
107
  context = titulo + descricao
108
- d = {"context": [context]}
109
- df = pd.DataFrame(data=d, columns=["context"])
110
- vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidf.joblib"))
111
- X = vectorizer.transform(df["context"])
 
 
112
  story_points = model.predict(X)
113
  return story_points
114
 
115
- def calcula(titulo, descricao):
116
- return calcula_MbR(titulo, descricao), calcula_Median(titulo, descricao), calcula_NEOSP_SVR(titulo, descricao), calcula_NEOSP_Linear(titulo, descricao), calcula_TFIDF_SVR(titulo, descricao), calcula_TFIDF_Linear(titulo, descricao)
117
 
118
  demo = gr.Interface(fn=calcula,
119
  inputs=[gr.Textbox(placeholder="Título", label="Título"),
120
- gr.Textbox(lines=10, placeholder="Descrição", label="Descrição")],
 
121
  outputs=[gr.Textbox(label="Story Points Estimado Média"),
122
  gr.Textbox(label="Story Points Estimado Mediana"),
123
  gr.Textbox(label="Story Points Estimado NEOSP-SVR"),
@@ -125,6 +172,7 @@ demo = gr.Interface(fn=calcula,
125
  gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
126
  gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
127
  title="Agile Task Story Point Estimator",
 
128
  examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]])
129
 
130
  demo.launch()
 
4
  import textstat
5
  from huggingface_hub import hf_hub_download
6
  from joblib import load
7
+ from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces
8
+
9
+ import nltk
10
+ import nltk
11
+ from nltk.corpus import stopwords
12
+
13
+ nltk.download('stopwords')
14
 
15
  titulo1 = """CLONE - Studio Dashboard: "default" and "Default Project" does not give clear information about Alloy and Project unless description is read."""
16
  descricao1 = """Steps To Reproduce: 1. On dashboard on studio 3.0, navigate to Develop tab. 2. Notice "default" and "Default Project" & "two-tabbed" and "Tabbed Application" names. Actual: User does not get clear information from names that one is alloy project and another one is Titanium project unless he reads the description below. Expected: Naming convention or icon corresponding must suggest type"""
 
27
  titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
28
  descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links: * https://github.com/mishoo/UglifyJS2/issues/137 * https://github.com/mishoo/UglifyJS2/issues/138 When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""
29
 
30
+ def calcula_MbR(titulo, descricao, nome_projeto):
31
  context = titulo + descricao
32
+ d = {"context_": [context]}
33
+ df = pd.DataFrame(data=d, columns=["context_"])
34
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_mbr.joblib", force_download=False))
35
+ story_points_MbR = model.predict(df["context_"])
36
  return story_points_MbR
37
 
38
+ def calcula_Median(titulo, descricao, nome_projeto):
39
  context = titulo + descricao
40
+ d = {"context_": [context]}
41
+ df = pd.DataFrame(data=d, columns=["context_"])
42
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_median.joblib", force_download=False))
43
+ story_points_MbR = model.predict(df["context_"])
44
  return story_points_MbR
45
 
46
+ def calcula_NEOSP_SVR(titulo, descricao, nome_projeto):
47
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp_svr.joblib", force_download=False))
48
+
49
+ # criação de uma nova coluna
50
  context = titulo + descricao
51
  d = {"context": [context]}
52
  df = pd.DataFrame(data=d, columns=["context"])
53
 
54
+ # pré-processamento
55
+ df["context"] = df["context"].apply(lambda x: escape_tags_and_content(x))
56
+ df["context"] = df["context"].apply(lambda x: escape_tags(x))
57
+ df["context"] = df["context"].apply(lambda x: escape_strings(x))
58
+ df["context"] = df["context"].apply(lambda x: escape_links(x))
59
+ df["context"] = df["context"].apply(lambda x: escape_hex_character_codes(x))
60
+ df["context"] = df["context"].apply(lambda x: escape_punctuation_boundaries(x))
61
+ df["context"] = df["context"].apply(lambda x: escape_odd_spaces(x))
62
+
63
+ # removendo stop-words
64
+ stop = stopwords.words('english')
65
+ df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
66
+
67
+ # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP
68
+ df = df.rename(columns={ "context": "context_"})
69
+
70
  # features de legibilidade
71
+ df["gunning_fog_"] = df['context_'].apply(textstat.gunning_fog)#
72
+ df["flesch_reading_ease_"] = df['context_'].apply(textstat.flesch_reading_ease)#
73
+ df["flesch_kincaid_grade_"] = df['context_'].apply(textstat.flesch_kincaid_grade)#
74
+ df["smog_index_"] = df['context_'].apply(textstat.smog_index)
75
+ df["coleman_liau_index_"] = df['context_'].apply(textstat.coleman_liau_index)#
76
+ df["automated_readability_index_"] = df['context_'].apply(textstat.automated_readability_index) #
77
+ df["dale_chall_readability_score_"] = df['context_'].apply(textstat.dale_chall_readability_score)#
78
+ df["difficult_words_"] = df['context_'].apply(textstat.difficult_words)
79
+ df["linsear_write_formula_"] = df['context_'].apply(textstat.linsear_write_formula)#
80
 
81
  # feature de sentimento
82
+ df["polarity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.polarity)
83
+ df["subjectivity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
84
 
85
+ X = df[["gunning_fog_", "flesch_reading_ease_", "flesch_kincaid_grade_", "smog_index_", "coleman_liau_index_",
86
+ "automated_readability_index_", "dale_chall_readability_score_", "difficult_words_", "linsear_write_formula_",
87
+ "polarity_", "subjectivity_"]]
88
 
89
  story_points = model.predict(X)
90
  return story_points
91
 
92
+ def calcula_NEOSP_Linear(titulo, descricao, nome_projeto):
93
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_neosp_linear.joblib", force_download=False))
94
+ # criação de uma nova coluna
95
  context = titulo + descricao
96
  d = {"context": [context]}
97
  df = pd.DataFrame(data=d, columns=["context"])
98
 
99
+ # pré-processamento
100
+ df["context"] = df["context"].apply(lambda x: escape_tags_and_content(x))
101
+ df["context"] = df["context"].apply(lambda x: escape_tags(x))
102
+ df["context"] = df["context"].apply(lambda x: escape_strings(x))
103
+ df["context"] = df["context"].apply(lambda x: escape_links(x))
104
+ df["context"] = df["context"].apply(lambda x: escape_hex_character_codes(x))
105
+ df["context"] = df["context"].apply(lambda x: escape_punctuation_boundaries(x))
106
+ df["context"] = df["context"].apply(lambda x: escape_odd_spaces(x))
107
+
108
+ # removendo stop-words
109
+ stop = stopwords.words('english')
110
+ df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
111
+
112
+ # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP
113
+ df = df.rename(columns={ "context": "context_"})
114
+
115
  # features de legibilidade
116
+ df["gunning_fog_"] = df['context_'].apply(textstat.gunning_fog)#
117
+ df["flesch_reading_ease_"] = df['context_'].apply(textstat.flesch_reading_ease)#
118
+ df["flesch_kincaid_grade_"] = df['context_'].apply(textstat.flesch_kincaid_grade)#
119
+ df["smog_index_"] = df['context_'].apply(textstat.smog_index)
120
+ df["coleman_liau_index_"] = df['context_'].apply(textstat.coleman_liau_index)#
121
+ df["automated_readability_index_"] = df['context_'].apply(textstat.automated_readability_index) #
122
+ df["dale_chall_readability_score_"] = df['context_'].apply(textstat.dale_chall_readability_score)#
123
+ df["difficult_words_"] = df['context_'].apply(textstat.difficult_words)
124
+ df["linsear_write_formula_"] = df['context_'].apply(textstat.linsear_write_formula)#
125
 
126
  # feature de sentimento
127
+ df["polarity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.polarity)
128
+ df["subjectivity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
129
 
130
+ X = df[["gunning_fog_", "flesch_reading_ease_", "flesch_kincaid_grade_", "smog_index_", "coleman_liau_index_",
131
+ "automated_readability_index_", "dale_chall_readability_score_", "difficult_words_", "linsear_write_formula_",
132
+ "polarity_", "subjectivity_"]]
133
 
134
  story_points = model.predict(X)
135
  return story_points
136
 
137
+ def calcula_TFIDF_SVR(titulo, descricao, nome_projeto):
138
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidf_svr.joblib", force_download=False))
139
  context = titulo + descricao
140
+ d = {"context_": [context]}
141
+ df = pd.DataFrame(data=d, columns=["context_"])
142
+ vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidf.joblib", force_download=False))
143
+ X_vec = vectorizer.transform(df["context_"])
144
+ df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())
145
+ X = df_vec
146
  story_points = model.predict(X)
147
  return story_points
148
 
149
+ def calcula_TFIDF_Linear(titulo, descricao, nome_projeto):
150
+ model = load(hf_hub_download("giseldo/model_effort_tawos", "model_tawos_aloy_tfidf_linear.joblib", force_download=False))
151
  context = titulo + descricao
152
+ d = {"context_": [context]}
153
+ df = pd.DataFrame(data=d, columns=["context_"])
154
+ vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "vectorizer_tfidf.joblib", force_download=False))
155
+ X_vec = vectorizer.transform(df["context_"])
156
+ df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())
157
+ X = df_vec
158
  story_points = model.predict(X)
159
  return story_points
160
 
161
+ def calcula(titulo, descricao, nome_projeto):
162
+ return calcula_MbR(titulo, descricao, nome_projeto), calcula_Median(titulo, descricao, nome_projeto), calcula_NEOSP_SVR(titulo, descricao, nome_projeto), calcula_NEOSP_Linear(titulo, descricao, nome_projeto), calcula_TFIDF_SVR(titulo, descricao, nome_projeto), calcula_TFIDF_Linear(titulo, descricao, nome_projeto)
163
 
164
  demo = gr.Interface(fn=calcula,
165
  inputs=[gr.Textbox(placeholder="Título", label="Título"),
166
+ gr.Textbox(lines=10, placeholder="Descrição", label="Descrição"),
167
+ gr.Dropdown(["ALOY", "XD", "TIMOB"], label="Projeto", value= "ALOY", interactive= False)], # info="Nome do projeto!"
168
  outputs=[gr.Textbox(label="Story Points Estimado Média"),
169
  gr.Textbox(label="Story Points Estimado Mediana"),
170
  gr.Textbox(label="Story Points Estimado NEOSP-SVR"),
 
172
  gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
173
  gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
174
  title="Agile Task Story Point Estimator",
175
+ #interpretation="default",
176
  examples=[[titulo1, descricao1], [titulo2, descricao2], [titulo3, descricao3], [titulo4, descricao4], [titulo5, descricao5]])
177
 
178
  demo.launch()
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Título,Descrição,Projeto,Story Points Estimado Média,Story Points Estimado Mediana,Story Points Estimado NEOSP-SVR,Story Points Estimado NEOSP-Linear,Story Points Estimado TFIDF-SVR,Story Points Estimado TFIDF-Linear,flag,username,timestamp
2
+ "CLONE - Studio Dashboard: ""default"" and ""Default Project"" does not give clear information about Alloy and Project unless description is read.","Steps To Reproduce: 1. On dashboard on studio 3.0, navigate to Develop tab. 2. Notice ""default"" and ""Default Project"" & ""two-tabbed"" and ""Tabbed Application"" names. Actual: User does not get clear information from names that one is alloy project and another one is Titanium project unless he reads the description below. Expected: Naming convention or icon corresponding must suggest type",ALOY,[3.70539419],[3.],[3.15496615],[3.84946518],[3.75963544],[6.99007244],,,2023-08-31 23:02:15.199700
requirements.txt CHANGED
@@ -2,3 +2,4 @@ scikit-learn
2
  gradio
3
  textblob
4
  textstat
 
 
2
  gradio
3
  textblob
4
  textstat
5
+ nltk
util.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from string import punctuation
3
+
4
+ def escape_tags_and_content(text):
5
+ """Escape tags and their content containing text, which is not written in natural language, such as code snippets"""
6
+
7
+ NO_TEXT_TAGS = "code", "noformat"
8
+ for tag in NO_TEXT_TAGS:
9
+ regex_matching_tag = re.compile("\{%s(.*?)\}(.*?)\{%s\}" % (tag, tag), re.DOTALL)
10
+ text = re.sub(regex_matching_tag, "", text)
11
+
12
+ return text
13
+
14
+ def escape_tags(text):
15
+ """Escape markup tags, but retain their content"""
16
+
17
+ ESCAPE_TAGS = "color", "quote", "anchor", "panel"
18
+ for tag in ESCAPE_TAGS:
19
+ text = re.sub("\{%s(.*?)\}" % tag, "", text)
20
+
21
+ return text
22
+
23
+ def escape_strings(text):
24
+ """Escape line breaks, tabulators, slashes and JIRA heading markup symbols"""
25
+
26
+ ESCAPE_STRINGS = "\\r", "\\n", "\\t", "\\f", "\\v", "\"", "\\\\", "h1. ", "h2. ", "h3. ", "h4. ", "h5. ", "h6. "
27
+ for escape_string in ESCAPE_STRINGS:
28
+ text = text.replace(escape_string, " ")
29
+
30
+ return text
31
+
32
+ def escape_links(text):
33
+ """Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' """
34
+
35
+ LINK_STARTERS = r"\#", r"\^", r"http\:\/\/", r"https\:\/\/", r"malto\:", r"file\:", r"\~"
36
+ for link_starter in LINK_STARTERS:
37
+ text = re.sub("\[(.*?\\|)?%s(.*?)\]" % link_starter, "", text)
38
+ text = re.sub(r"\bhttps?://\S+", "", text)
39
+
40
+ return text
41
+
42
+ def escape_hex_character_codes(text):
43
+ """Escape characters outside the latin alphabet which are converted to hex code representation"""
44
+
45
+ return re.sub(r"\\x\w\w", "", text)
46
+
47
+ def escape_punctuation_boundaries(text):
48
+ """Remove all punctuation marks from the beginning and end of words,
49
+ except for trailing period at the end of words"""
50
+
51
+ return " ".join([word.strip(punctuation.replace(".", "")).lstrip(".") for word in text.split()])
52
+
53
+ def escape_odd_spaces(text):
54
+ """Replace several consequent spaces with one space
55
+ and remove spaces from string start and end"""
56
+
57
+ text = re.sub(r"\s+", " ", text)
58
+ text = text.strip()
59
+
60
+ return text