Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
from sklearn.preprocessing import MinMaxScaler
|
5 |
from sklearn.feature_extraction.text import CountVectorizer
|
6 |
-
from sklearn.tree import DecisionTreeClassifier
|
7 |
from sklearn.ensemble import RandomForestClassifier
|
8 |
import pickle
|
9 |
|
@@ -27,27 +25,55 @@ if uploaded_file is not None:
|
|
27 |
st.subheader('Data Preview')
|
28 |
st.dataframe(data.head(20))
|
29 |
# Feature selection
|
30 |
-
features = ['a_ApplicableMarkets', 'Number of Unique Finished Packs in BOM',
|
31 |
'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description',
|
32 |
'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code',
|
33 |
'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant',
|
34 |
'Stibo_Pack_variant']
|
35 |
df = data[features]
|
36 |
-
|
37 |
-
df[
|
|
|
38 |
df = df.replace(np.nan, 0, regex=True)
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
df = pd.get_dummies(data=df, columns=text_cols)
|
|
|
41 |
v = CountVectorizer()
|
42 |
text_vectors = v.fit_transform(df['Product_Description'])
|
43 |
text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out())
|
44 |
df_ext = pd.concat([df, text_vectors_df],axis=1)
|
45 |
-
df = df_ext.drop(
|
46 |
-
|
47 |
-
|
48 |
-
loaded_model = pickle.load(open('rfc_model.pkl', 'rb'))
|
49 |
result = loaded_model.predict(df)
|
50 |
-
data['
|
51 |
|
52 |
out=data.to_csv().encode('utf-8')
|
53 |
-
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
5 |
from sklearn.ensemble import RandomForestClassifier
|
6 |
import pickle
|
7 |
|
|
|
25 |
st.subheader('Data Preview')
|
26 |
st.dataframe(data.head(20))
|
27 |
# Feature selection
|
28 |
+
features = ['a_ApplicableMarkets', 'Manufacturing Plant','Number of Unique Finished Packs in BOM',
|
29 |
'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description',
|
30 |
'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code',
|
31 |
'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant',
|
32 |
'Stibo_Pack_variant']
|
33 |
df = data[features]
|
34 |
+
|
35 |
+
df['Manufacturing Plant'] = df['Manufacturing Plant'].replace({'Commerical Plant':'Commercial Plant'})
|
36 |
+
df['Stibo_Pack_variant'] = df['Stibo_Pack_variant'].replace({'Migration Open Stock':'Migration OpenStock'})
|
37 |
df = df.replace(np.nan, 0, regex=True)
|
38 |
+
df['EA_GTIN'] = df['EA_GTIN'].astype(str)
|
39 |
+
df['CV_GTIN'] = df['CV_GTIN'].astype(str)
|
40 |
+
|
41 |
+
def GTIN_validity(x):
|
42 |
+
gtin=str(x)
|
43 |
+
if x=="0.0":
|
44 |
+
return False
|
45 |
+
if x:
|
46 |
+
gtin=gtin[:-2]
|
47 |
+
original_digits = [int(x) for x in gtin]
|
48 |
+
digits_without_check_digit = original_digits[:-1]
|
49 |
+
|
50 |
+
digits_without_check_digit.reverse()
|
51 |
+
multiplied_digits = [x*3 if not i%2 else x
|
52 |
+
for i,x
|
53 |
+
in enumerate(digits_without_check_digit)]
|
54 |
+
digits_sum = sum(multiplied_digits)
|
55 |
+
|
56 |
+
if (digits_sum % 10):
|
57 |
+
uprounded_sum = digits_sum + (10 - digits_sum % 10)
|
58 |
+
else:
|
59 |
+
uprounded_sum = digits_sum
|
60 |
+
expected_check_digit = uprounded_sum - digits_sum
|
61 |
+
return (original_digits[-1] == expected_check_digit)
|
62 |
+
|
63 |
+
df['EA_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['EA_GTIN']),axis=1)
|
64 |
+
df['CV_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['CV_GTIN']),axis=1)
|
65 |
+
text_cols = ['a_ApplicableMarkets', 'Manufacturing Plant', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant']
|
66 |
df = pd.get_dummies(data=df, columns=text_cols)
|
67 |
+
|
68 |
v = CountVectorizer()
|
69 |
text_vectors = v.fit_transform(df['Product_Description'])
|
70 |
text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out())
|
71 |
df_ext = pd.concat([df, text_vectors_df],axis=1)
|
72 |
+
df = df_ext.drop(['GMN','Product_Description','EA_GTIN','CV_GTIN'],axis=1)
|
73 |
+
|
74 |
+
loaded_model = pickle.load(open('rfc_model_grid.pkl', 'rb'))
|
|
|
75 |
result = loaded_model.predict(df)
|
76 |
+
data['Product_Type_Predicted']=result
|
77 |
|
78 |
out=data.to_csv().encode('utf-8')
|
79 |
+
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Type_Output.csv',mime='csv')
|