File size: 6,335 Bytes
62cec1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcd9be8
 
 
 
62cec1c
fcd9be8
 
62cec1c
 
 
 
fcd9be8
62cec1c
 
fcd9be8
 
 
 
 
 
 
 
62cec1c
fcd9be8
 
 
62cec1c
fcd9be8
 
62cec1c
 
 
 
 
 
fcd9be8
 
 
62cec1c
 
fcd9be8
62cec1c
fcd9be8
 
 
 
62cec1c
 
 
fcd9be8
62cec1c
 
 
fcd9be8
 
 
 
 
62cec1c
 
 
 
 
fcd9be8
62cec1c
 
fcd9be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62cec1c
 
fcd9be8
 
 
62cec1c
 
 
fcd9be8
 
 
62cec1c
 
fcd9be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62cec1c
fcd9be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62cec1c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
### ----------------------------- ###
###           libraries           ###
### ----------------------------- ###

import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics


### ------------------------------ ###
###       data transformation      ###
### ------------------------------ ###

# load dataset
data = pd.read_csv('data.csv')

# remove timestamp from dataset (first column)
data = data.iloc[:, 1:]

# create a copy for transformed data
transformed_data = pd.DataFrame()

# keep track of which columns are categorical and what 
# those columns' value mappings are
cat_value_dicts = {}
final_colname = data.columns[-1]  # club recommendation

# for each column...
for colname in data.columns:
    if pd.api.types.is_numeric_dtype(data[colname]):
        transformed_data[colname] = data[colname].copy()
        continue
        
    # Create mapping for categorical variables
    unique_vals = data[colname].unique()
    val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
    
    # If it's the target column, store the reverse mapping
    if colname == final_colname:
        val_dict = {idx: val for val, idx in val_dict.items()}
    
    cat_value_dicts[colname] = val_dict
    transformed_data[colname] = data[colname].map(val_dict)


### -------------------------------- ###
###           model training         ###
### -------------------------------- ###

# select features and prediction
X = transformed_data.iloc[:, :-1]  # all columns except last
y = transformed_data.iloc[:, -1]   # last column

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


### -------------------------------- ###
###        model evaluation         ###
### -------------------------------- ###

def get_feat():
    feats = [abs(x) for x in model.coef_[0]]
    max_val = max(feats)
    idx = feats.index(max_val)
    return data.columns[idx]
    
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
most_imp_feat = get_feat()


### ------------------------------- ###
###        predictor function      ###
### ------------------------------- ###

def predict(*args):
    features = []
    
    # transform categorical input using our mappings
    for colname, arg in zip(data.columns[:-1], args):
        if arg is None:
            return "Please fill in all fields"
            
        if colname in cat_value_dicts:
            if arg not in cat_value_dicts[colname]:
                return f"Invalid value for {colname}"
            features.append(cat_value_dicts[colname][arg])
        else:
            try:
                features.append(float(arg))
            except:
                return f"Invalid numeric value for {colname}"
    
    # predict using the model
    try:
        new_input = [features]
        result = model.predict(new_input)
        return cat_value_dicts[final_colname][result[0]]
    except Exception as e:
        return f"Error making prediction: {str(e)}"


### ------------------------------- ###
###        interface creation      ###
### ------------------------------- ###

block = gr.Blocks()

with block:
    gr.Markdown("# Club Recommendation System")
    gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
    
    with gr.Row():
        with gr.Column(variant="panel"):  # Changed from Box to Column with panel variant
            inputls = []
            
            # Create input components for each feature
            for colname in data.columns[:-1]:  # Exclude the target column
                if colname in cat_value_dicts:
                    choices = list(cat_value_dicts[colname].keys())
                    inputls.append(gr.Dropdown(
                        choices=choices,
                        label=colname,
                        type="value"
                    ))
                else:
                    inputls.append(gr.Number(label=colname))
                gr.Markdown("<br />")
            
            submit = gr.Button("Click to see your personalized result!", variant="primary")
            gr.Markdown("<br />")
            output = gr.Textbox(
                label="Your recommendation:",
                placeholder="your recommendation will appear here"
            )
            
            submit.click(fn=predict, inputs=inputls, outputs=output)
            gr.Markdown("<br />")
            
            with gr.Row():
                with gr.Column(variant="panel"):  # Changed from Box to Column
                    gr.Markdown(f"### Model Accuracy\n{acc}")
                with gr.Column(variant="panel"):  # Changed from Box to Column
                    gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
            
            gr.Markdown("<br />")
            
            with gr.Column(variant="panel"):  # Changed from Box to Column
                gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well 
                           the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy 
                           and most important feature can be helpful for understanding how the model works, but 
                           <em>should not be considered absolute facts about the real world</em>.''')
        
        with gr.Column(variant="panel"):  # Changed from Box to Column
            gr.Markdown("""
            # About the Club Recommendation System
            
            This system uses machine learning to suggest clubs based on your preferences and personality.
            Fill out the questionnaire on the left to get your personalized recommendation.
            
            The system takes into account factors like:
            - Your social preferences
            - Activity preferences
            - Personal strengths
            - Time commitment
            
            Remember that this is just a suggestion - you should always choose the club that interests you most!
            """)

block.launch()