DanCip commited on
Commit
bb5127a
·
1 Parent(s): 79ab211

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -41
app.py CHANGED
@@ -3,65 +3,190 @@ import hopsworks
3
  import joblib
4
  import pandas as pd
5
 
6
- features = ['fixed_acidity',
7
- 'volatile_acidity',
8
- 'citric_acid',
9
- 'residual_sugar',
10
- 'chlorides',
11
- 'free_sulfur_dioxide',
12
- 'total_sulfur_dioxide',
13
- 'density',
14
- 'pH',
15
- 'sulphates',
16
- 'alcohol',
17
- 'is_white']
18
- labels = ["Low", "Medium", "High"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  project = hopsworks.login()
21
  fs = project.get_feature_store()
22
 
23
  mr = project.get_model_registry()
24
- model = mr.get_model("wine_model", version=1)
25
  model_dir = model.download()
26
- model = joblib.load(model_dir + "/wine_model.pkl")
27
  print("Model downloaded")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- def wine(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide,
31
- total_sulfur_dioxide, density, pH, sulphates, alcohol, white) -> str:
32
- print("Calling function")
33
- df = pd.DataFrame([[fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide,
34
- total_sulfur_dioxide, density, pH, sulphates, alcohol, white]], columns=features)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  print("Predicting")
36
  print(df)
37
- # 'res' is a list of predictions returned as the label.
38
  res = model.predict(df)
39
- # We add '[0]' to the result of the transformed 'res', because 'res' is a list, and we only want
40
- # the first element.
41
- # print("Res: {0}").format(res)
42
- print(res)
43
 
44
- return f"{labels[res[0]]} quality"
 
45
 
 
 
 
 
 
 
 
 
46
 
47
  demo = gr.Interface(
48
- fn=wine,
49
- title="Wine Quality Predictive Analytics",
50
- description="Experiment with wine characteristics to get the wine quality (low, medium, high)",
51
  allow_flagging="never",
52
  inputs=[
53
- gr.components.Number(label='fixed acidity'),
54
- gr.components.Number(label='volatile acidity'),
55
- gr.components.Number(label='citric acid'),
56
- gr.components.Number(label='residual sugar'),
57
- gr.components.Number(label='chlorides'),
58
- gr.components.Number(label='free sulfur dioxide'),
59
- gr.components.Number(label='total sulfur dioxide'),
60
- gr.components.Number(label='density'),
61
- gr.components.Number(label='pH'),
62
- gr.components.Number(label='sulphates'),
63
- gr.components.Number(label='alcohol'),
64
- gr.components.Checkbox(label='is white'),
65
  ],
66
  outputs=gr.Text())
67
 
 
3
  import joblib
4
  import pandas as pd
5
 
6
+ features = ['work_year',
7
+ 'experience_level',
8
+ 'company_size',
9
+ 'eur',
10
+ 'gbp',
11
+ 'usd',
12
+ 'engineer',
13
+ 'scientist',
14
+ 'research',
15
+ 'analyst',
16
+ 'analytics_engineer',
17
+ 'applied_scientist',
18
+ 'bi_developer',
19
+ 'business_intelligence_analyst',
20
+ 'business_intelligence_engineer',
21
+ 'data_analyst',
22
+ 'data_architect',
23
+ 'data_engineer',
24
+ 'data_manager',
25
+ 'data_science_consultant',
26
+ 'data_science_manager',
27
+ 'data_scientist',
28
+ 'ml_engineer',
29
+ 'machine_learning_engineer',
30
+ 'machine_learning_scientist',
31
+ 'research_analyst',
32
+ 'research_engineer',
33
+ 'research_scientist',
34
+ 'gdp',
35
+ 'cpi']
36
+
37
+
38
+ labels = ['(16454.999, 122000.0]', '(122000.0, 170000.0]', '(170000.0, 329700.0]']
39
 
40
  project = hopsworks.login()
41
  fs = project.get_feature_store()
42
 
43
  mr = project.get_model_registry()
44
+ model = mr.get_model("salary_model", version=4)
45
  model_dir = model.download()
46
+ model = joblib.load(model_dir + "/model.pkl")
47
  print("Model downloaded")
48
 
49
+ import requests
50
+
51
+ def get_gdp_by_country_code(country_code, year=2023, index='FP.CPI.TOTL'):
52
+ # World Bank API endpoint for GDP data
53
+ api_url = f'http://api.worldbank.org/v2/country/{country_code}/indicator/{index}?data={year}&format=json'
54
+
55
+
56
+ # Make a GET request to the API
57
+ response = requests.get(api_url)
58
+
59
+ # Check if the request was successful (status code 200)
60
+ if response.status_code == 200:
61
+ # Parse the JSON response
62
+ data = response.json()
63
+
64
+ # Extract the GDP value from the response
65
+ gdp_value = data[1][0]['value'] if data[1] else None
66
+
67
+ return gdp_value
68
+ else:
69
+ # If the request was not successful, print an error message
70
+ print(f"Error: Unable to fetch data. Status code: {response.status_code}")
71
+ return None
72
+
73
+ def salary(work_year,
74
+ experience_level,
75
+ company_size,
76
+ currency,
77
+ job_title,
78
+ country)-> str:
79
+
80
+ other_param = {}
81
+
82
+ other_param['gdp'] = get_gdp_by_country_code(country, work_year, 'NY.GDP.MKTP.CD')
83
+ other_param['cpi'] = get_gdp_by_country_code(country, work_year, 'FP.CPI.TOTL')
84
+
85
+ jobs = ['analytics_engineer',
86
+ 'applied_scientist',
87
+ 'bi_developer',
88
+ 'business_intelligence_analyst',
89
+ 'business_intelligence_engineer',
90
+ 'data_analyst',
91
+ 'data_architect',
92
+ 'data_engineer',
93
+ 'data_manager',
94
+ 'data_science_consultant',
95
+ 'data_science_manager',
96
+ 'data_scientist',
97
+ 'ml_engineer',
98
+ 'machine_learning_engineer',
99
+ 'machine_learning_scientist',
100
+ 'research_analyst',
101
+ 'research_engineer',
102
+ 'research_scientist']
103
+
104
+ jobs_flag ={}
105
+
106
+ for name in jobs:
107
+ if name == job_title.lower().replace(' ', '_'):
108
+ jobs_flag[name] = True
109
+ else:
110
+ jobs_flag[name] = False
111
 
112
+ role = [
113
+ 'engineer',
114
+ 'scientist',
115
+ 'research',
116
+ 'analyst'
117
+ ]
118
+
119
+ role_flag = {}
120
+
121
+ for name in role:
122
+ if role in job_title.lower():
123
+ role_flag[name]= True
124
+ else:
125
+ role_flag[name] = False
126
+
127
+ currency_flag = {
128
+ 'eur': False,
129
+ 'gbp': False,
130
+ 'usd': False
131
+ }
132
+
133
+ currency_flag[currency.lower()] = True
134
+
135
+ company_size_dic = {
136
+ 'S': 0,
137
+ 'M': 1,
138
+ 'L': 2,
139
+ }
140
+
141
+ other_param['company_size'] = company_size_dic[company_size]
142
+
143
+ experience_level_map = {
144
+ 'EN': 0,
145
+ 'MI': 1,
146
+ 'SE': 2,
147
+ 'EX': 3
148
+ }
149
+
150
+ other_param['experience_level'] = experience_level_map[experience_level]
151
+
152
+ params = {}
153
+
154
+ params.update(other_param)
155
+ params.update(jobs_flag)
156
+ params.update(currency_flag)
157
+ params.update(role_flag)
158
+
159
+
160
+ df = pd.DataFrame(params)
161
  print("Predicting")
162
  print(df)
163
+
164
  res = model.predict(df)
 
 
 
 
165
 
166
+ print(f"{labels[res[0]]} $")
167
+ return f"{labels[res[0]]} $"
168
 
169
+ job_title_options = [
170
+ 'Analytics Engineer', 'Applied Scientist', 'BI Developer',
171
+ 'Business Intelligence Analyst', 'Business Intelligence Engineer',
172
+ 'Data Analyst', 'Data Architect', 'Data Engineer', 'Data Manager',
173
+ 'Data Science Consultant', 'Data Science Manager', 'Data Scientist',
174
+ 'ML Engineer', 'Machine Learning Engineer', 'Machine Learning Scientist',
175
+ 'Research Analyst', 'Research Engineer', 'Research Scientist'
176
+ ]
177
 
178
  demo = gr.Interface(
179
+ fn=salary,
180
+ title="Salary prediction",
181
+ description="Prediction of the salary in USD",
182
  allow_flagging="never",
183
  inputs=[
184
+ gr.components.Number(label='Work Year', bind='work_year'),
185
+ gr.components.Select(label='Experience Level', options=['EN', 'MI', 'SE', 'EX'], bind='experience_level'),
186
+ gr.components.Select(label='Company Size', options=['S', 'M', 'L'], bind='company_size'),
187
+ gr.components.Select(label='Currency', options=['EUR', 'GBP', 'USD'], bind='currency'),
188
+ gr.components.Select(label='Job Title', options=job_title_options, bind='job_title'),
189
+ gr.components.TextInput(label='Country (3 letter code)', bind='country')
 
 
 
 
 
 
190
  ],
191
  outputs=gr.Text())
192