File size: 2,614 Bytes
b1c9d77
 
 
 
 
fca9b2e
 
 
 
b1c9d77
fca9b2e
b1c9d77
 
 
 
 
 
 
fca9b2e
b1c9d77
fca9b2e
 
 
 
 
b1c9d77
fca9b2e
 
445da3d
 
fca9b2e
 
 
 
445da3d
 
b1c9d77
fca9b2e
 
b1c9d77
 
fca9b2e
445da3d
b1c9d77
 
fca9b2e
b1c9d77
 
 
 
 
 
ce39427
 
b1c9d77
 
ce39427
 
b1c9d77
4d8e8da
 
ce39427
b1c9d77
 
4d8e8da
 
 
 
 
 
445da3d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import joblib
import pandas as pd
import numpy as np

# Carregar o modelo treinado e o encoder
model_path = 'tiebreak_model_v1b.pkl'
encoder_path = 'ordinal_encoder_tiebreak_model_v1b.pkl'

model = joblib.load(model_path)
encoder = joblib.load(encoder_path)

# Função para prever utilizando o modelo
def predict_tiebreak(df):
    def calculate_features(row):
        odds_min = min(row['Odds 1'], row['Odds 2'])
        odds_max = max(row['Odds 1'], row['Odds 2'])
        odds_ratio = odds_min / odds_max
        diff_log_odds = np.log(odds_max) / np.log(odds_min)
        sum_prob = (1 / odds_min) + (1 / odds_max)
        mean_log_odds = (np.log(odds_max) + np.log(odds_min)) / 2
        
        return pd.Series([mean_log_odds, sum_prob, odds_ratio])

    df[['Mean_Log_Odds', 'Sum_Prob', 'Ratio_Log_Odds']] = df.apply(calculate_features, axis=1)

    # Aplicar o OptimalBinning e transformar os dados
    bin_columns = ['Mean_Log_Odds', 'Sum_Prob']
    bin_transformed_columns = [f"{col}_bin" for col in bin_columns]
    
    for column in bin_columns:
        optb = joblib.load(f'{column}_binning_tiebreak_model_v1b.pkl')
        df[f"{column}_bin"] = optb.transform(df[column], metric="bins")
    
    # Aplicar o encoder nas variáveis binadas, na ordem correta
    df[bin_transformed_columns] = encoder.transform(df[bin_transformed_columns])

    # Selecionar as features para predição
    features = df[['Mean_Log_Odds_bin', 'Sum_Prob_bin', 'Ratio_Log_Odds']]
    df['Probability'] = model.predict_proba(features)[:, 1]

    # Usar o threshold definido
    best_threshold = 0.9420000000000005
    df['entrada'] = df['Probability'] >= best_threshold
    df = df[df['entrada'] == True]
    
    return df

# Função para carregar o arquivo Excel e prever
def predict_from_excel(file):
    df = pd.read_excel(file)
    df_predictions = predict_tiebreak(df)

    # Salvar o DataFrame resultante em um arquivo Excel
    output_file = "predictions.xlsx"
    df_predictions.to_excel(output_file, index=False)
    
    return df_predictions, output_file

# Interface Gradio usando a nova API de componentes
inputs = gr.File(label="Upload Excel File")
outputs = [gr.DataFrame(label="Tabela de Previsões"), gr.File(label="Download Predictions Excel")]

# Criando a interface
gr.Interface(
    fn=predict_from_excel, 
    inputs=inputs, 
    outputs=outputs, 
    title="Previsão de Tiebreaks",
    description="Faça o upload de um arquivo Excel contendo dados no formato final_df para prever a probabilidade de menos de 1.5 tiebreaks e verificar se deve entrar na aposta."
).launch()