Spaces:

giseldo
/

story_point_estimator

Sleeping

File size: 12,806 Bytes

66d63c7
c3ee309
d3f86a4
 
420755a
a599e86
a6e7f6b
 
 
 
 
d3f86a4
1549063
 
1194d1e
1549063
 
1194d1e
1549063
 
 
 
 
 
 
 
d3f86a4
a6e7f6b
c3ee309
a6e7f6b
 
4984926
a6e7f6b
4d76ff8
c3ee309
a6e7f6b
a599e86
a6e7f6b
 
4984926
a6e7f6b
a599e86
09b169e
a6e7f6b
4984926
4d76ff8
 
 
d3f86a4
a6e7f6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3f86a4
a6e7f6b
 
 
 
 
 
 
 
 
d3f86a4
 
a6e7f6b
 
d3f86a4
a6e7f6b
 
 
d3f86a4
 
4d76ff8
09b169e
a6e7f6b
4984926
 
4d76ff8
 
 
09b169e
a6e7f6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a599e86
a6e7f6b
 
 
 
 
 
 
 
 
09b169e
a599e86
a6e7f6b
 
09b169e
a6e7f6b
 
 
a599e86
 
 
 
a6e7f6b
4984926
a599e86
a6e7f6b
 
4984926
a6e7f6b
 
 
f34d18f
 
 
a6e7f6b
4984926
f34d18f
a6e7f6b
 
4984926
a6e7f6b
 
 
c3ee309
 
 
a6e7f6b
 
4d76ff8
ed10886
c3ee309
a6e7f6b
b5f9615
a599e86
 
1549063
a599e86
f34d18f
 
 
a6e7f6b
b5f9615
ed10886
e6d4fdd

import gradio as gr
import pandas as pd
from textblob import TextBlob
import textstat
from huggingface_hub import hf_hub_download
from joblib import load
from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces
import nltk
from nltk.corpus import stopwords
 
nltk.download('stopwords')

titulo1 = """CLONE - Studio Dashboard: "default" and "Default Project" does not give clear information about Alloy and Project unless description is read."""
descricao1 = """Steps To Reproduce: 1. On dashboard on studio 3.0, navigate to Develop tab. 2. Notice "default" and "Default Project" & "two-tabbed" and "Tabbed Application" names. Actual: User does not get clear information from names that one is alloy project and another one is Titanium project unless he reads the description below. Expected: Naming convention or icon corresponding must suggest type"""

titulo2 = """Ti.UI.Picker has no collection binding"""
descricao2 = """h3. original discussion http://developer.appcelerator.com/question/145992/databinding-on-picker h3. problem Collection binding is not implemented for Ti.UI.Picker as it is for Ti.UI.TableView and other generic Titaniums views (View, Window, ScrollView, etc...). h3. solution Support collection binding on Ti.UI.Picker just as it is on TableView. It will need special handling as the Ti.UI.Picker requires custom parsing for columns and rows. Something like this should be how it would work for devs: {code:xml} <Alloy> <Collection src="book" /> <Window class="container"> <Picker dataCollection="book"> <PickerRow title="{title}" /> </Picker> </Window> </Alloy> {code}"""

titulo3 = """Enable more complex notation in binding"""
descricao3 = """Allow developers to use syntax like the following in collection/model bindings:    {code:xml}  <Alloy>      <Model src=""someModel""/>      <Window title=""{someModel.title} {someModel.subtitle}""/>  </Alloy>  {code}    Basically, instead of assuming the whole property needs to be wrapped in \{\}, allow developers to put as many of them in the attribute as they want."""

titulo4 = """Orphan file cleanup deletes builtins and widget assets"""
descricao4 = """During the compile process Alloy will attempt to remove files from the Resources directory that are no longer present anywhere in the ""app"" folder. Alloy searches a number of locations in the ""app"" folder to see if the file is an orphan or not. False negatives should be avoided as they will leave unused files in the project. False positives on the other hand are not really worrisome since those resources will be recreated on the next compile anyway.     With that in mind, there are currently false positives for orphan file deletion for builtins and widgets. Builtins and widgets will be pulled in fresh each time. Again, this will not negatively impact a developer's build process or app in any way, it would just be more true to the logic if these files were left alone during the orphan cleanup phase."""

titulo5 = """Resolve suboptimal compression from uglify-js v2 update"""
descricao5 = """The v2 update of uglify-js in Alloy, specifically version 2.2.5, has some suboptimal compressions, which are causing the optimizer.js test spec to fail in certain cases. Specifically the issues are around booleans and cascading of variables in assignments. These issues have been logged with the Uglifyjs2 project in the following links:    * https://github.com/mishoo/UglifyJS2/issues/137  * https://github.com/mishoo/UglifyJS2/issues/138    When these issues are resolved and distributed in an npm release, we need to revisit these compressions and testing to ensure that the fixes are in place, and that new uglify-js version has no regressions that impact alloy."""

def calcula_MbR(titulo, descricao, nome_projeto):
    context = titulo + descricao
    d = {"context_": [context]}
    df = pd.DataFrame(data=d, columns=["context_"])
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_mbr.joblib".format(nome_projeto, nome_projeto), force_download=False))
    story_points_MbR = model.predict(df["context_"])
    return story_points_MbR

def calcula_Median(titulo, descricao, nome_projeto):
    context = titulo + descricao
    d = {"context_": [context]}
    df = pd.DataFrame(data=d, columns=["context_"])
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_median.joblib".format(nome_projeto, nome_projeto), force_download=False))
    story_points_MbR = model.predict(df["context_"])
    return story_points_MbR

def calcula_NEOSP_SVR(titulo, descricao, nome_projeto):
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_neosp_svr.joblib".format(nome_projeto, nome_projeto), force_download=False))
    context = titulo + descricao
    d = {"context": [context]}
    df = pd.DataFrame(data=d, columns=["context"])

    # pré-processamento
    df["context"] = df["context"].apply(lambda x: escape_tags_and_content(x))
    df["context"] = df["context"].apply(lambda x: escape_tags(x))
    df["context"] = df["context"].apply(lambda x: escape_strings(x))
    df["context"] = df["context"].apply(lambda x: escape_links(x))
    df["context"] = df["context"].apply(lambda x: escape_hex_character_codes(x))
    df["context"] = df["context"].apply(lambda x: escape_punctuation_boundaries(x))
    df["context"] = df["context"].apply(lambda x: escape_odd_spaces(x))

    # removendo stop-words
    stop = stopwords.words('english')
    df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

    # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP
    df = df.rename(columns={ "context": "context_"})
    
    # features de legibilidade
    df["gunning_fog_"] = df['context_'].apply(textstat.gunning_fog)#
    df["flesch_reading_ease_"] = df['context_'].apply(textstat.flesch_reading_ease)#
    df["flesch_kincaid_grade_"] = df['context_'].apply(textstat.flesch_kincaid_grade)#
    df["smog_index_"] = df['context_'].apply(textstat.smog_index)
    df["coleman_liau_index_"] = df['context_'].apply(textstat.coleman_liau_index)#
    df["automated_readability_index_"] = df['context_'].apply(textstat.automated_readability_index) #
    df["dale_chall_readability_score_"] = df['context_'].apply(textstat.dale_chall_readability_score)#
    df["difficult_words_"] = df['context_'].apply(textstat.difficult_words)
    df["linsear_write_formula_"] = df['context_'].apply(textstat.linsear_write_formula)#

    # feature de sentimento
    df["polarity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["subjectivity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.subjectivity)

    X = df[["gunning_fog_", "flesch_reading_ease_", "flesch_kincaid_grade_", "smog_index_", "coleman_liau_index_", 
            "automated_readability_index_", "dale_chall_readability_score_", "difficult_words_", "linsear_write_formula_", 
            "polarity_", "subjectivity_"]]

    story_points = model.predict(X)
    return story_points

def calcula_NEOSP_Linear(titulo, descricao, nome_projeto):
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_neosp_linear.joblib".format(nome_projeto, nome_projeto), force_download=False))
    
    context = titulo + descricao
    d = {"context": [context]}
    df = pd.DataFrame(data=d, columns=["context"])

    # pré-processamento
    df["context"] = df["context"].apply(lambda x: escape_tags_and_content(x))
    df["context"] = df["context"].apply(lambda x: escape_tags(x))
    df["context"] = df["context"].apply(lambda x: escape_strings(x))
    df["context"] = df["context"].apply(lambda x: escape_links(x))
    df["context"] = df["context"].apply(lambda x: escape_hex_character_codes(x))
    df["context"] = df["context"].apply(lambda x: escape_punctuation_boundaries(x))
    df["context"] = df["context"].apply(lambda x: escape_odd_spaces(x))

    # removendo stop-words
    stop = stopwords.words('english')
    df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

    # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP
    df = df.rename(columns={ "context": "context_"})

    # features de legibilidade
    df["gunning_fog_"] = df['context_'].apply(textstat.gunning_fog)#
    df["flesch_reading_ease_"] = df['context_'].apply(textstat.flesch_reading_ease)#
    df["flesch_kincaid_grade_"] = df['context_'].apply(textstat.flesch_kincaid_grade)#
    df["smog_index_"] = df['context_'].apply(textstat.smog_index)
    df["coleman_liau_index_"] = df['context_'].apply(textstat.coleman_liau_index)#
    df["automated_readability_index_"] = df['context_'].apply(textstat.automated_readability_index) #
    df["dale_chall_readability_score_"] = df['context_'].apply(textstat.dale_chall_readability_score)#
    df["difficult_words_"] = df['context_'].apply(textstat.difficult_words)
    df["linsear_write_formula_"] = df['context_'].apply(textstat.linsear_write_formula)#

    # feature de sentimento
    df["polarity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["subjectivity_"] = df["context_"].apply(lambda x: TextBlob(x).sentiment.subjectivity)

    X = df[["gunning_fog_", "flesch_reading_ease_", "flesch_kincaid_grade_", "smog_index_", "coleman_liau_index_", 
            "automated_readability_index_", "dale_chall_readability_score_", "difficult_words_", "linsear_write_formula_", 
            "polarity_", "subjectivity_"]]

    story_points = model.predict(X)
    return story_points
    
def calcula_TFIDF_SVR(titulo, descricao, nome_projeto):
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_tfidf_svr.joblib".format(nome_projeto, nome_projeto), force_download=False))
    context = titulo + descricao
    d = {"context_": [context]}
    df = pd.DataFrame(data=d, columns=["context_"])
    vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib".format(nome_projeto, nome_projeto), force_download=False))
    X_vec = vectorizer.transform(df["context_"])
    df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())
    X = df_vec
    story_points = model.predict(X)
    return story_points

def calcula_TFIDF_Linear(titulo, descricao, nome_projeto):
    model = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/model_tawos_{}_tfidf_linear.joblib".format(nome_projeto, nome_projeto), force_download=False))
    context = titulo + descricao
    d = {"context_": [context]}
    df = pd.DataFrame(data=d, columns=["context_"])
    vectorizer = load(hf_hub_download("giseldo/model_effort_tawos", "models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib".format(nome_projeto, nome_projeto), force_download=False))
    X_vec = vectorizer.transform(df["context_"])
    df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())
    X = df_vec
    story_points = model.predict(X)
    return story_points

def calcula(titulo, descricao, nome_projeto):
    return calcula_MbR(titulo, descricao, nome_projeto), calcula_Median(titulo, descricao, nome_projeto), calcula_NEOSP_SVR(titulo, descricao, nome_projeto), calcula_NEOSP_Linear(titulo, descricao, nome_projeto),  calcula_TFIDF_SVR(titulo, descricao, nome_projeto),  calcula_TFIDF_Linear(titulo, descricao, nome_projeto)

demo = gr.Interface(fn=calcula, 
                    inputs=[gr.Textbox(placeholder="Título", label="Título"), 
                            gr.Textbox(lines=10, placeholder="Descrição", label="Descrição"),
                            gr.Dropdown(["ALOY", "APSTUD", "CLI", "TIMOB", "XD"], label="Projeto", value= "ALOY")], # info="Nome do projeto!"
                    outputs=[gr.Textbox(label="Story Points Estimado Média"),
                             gr.Textbox(label="Story Points Estimado Mediana"),
                             gr.Textbox(label="Story Points Estimado NEOSP-SVR"),
                             gr.Textbox(label="Story Points Estimado NEOSP-Linear"),
                             gr.Textbox(label="Story Points Estimado TFIDF-SVR"),
                             gr.Textbox(label="Story Points Estimado TFIDF-Linear")],
                    title="Agile Task Story Point Estimator",
                    #interpretation="default",
                    examples=[[titulo1, descricao1, "ALOY"], [titulo2, descricao2, "ALOY"], [titulo3, descricao3, "ALOY"], [titulo4, descricao4, "ALOY"], [titulo5, descricao5, "ALOY"]])

demo.launch()