niladridutta commited on
Commit
ad20e1a
·
verified ·
1 Parent(s): f067601

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -12
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- from sklearn.preprocessing import MinMaxScaler
5
  from sklearn.feature_extraction.text import CountVectorizer
6
- from sklearn.tree import DecisionTreeClassifier
7
  from sklearn.ensemble import RandomForestClassifier
8
  import pickle
9
 
@@ -27,27 +25,55 @@ if uploaded_file is not None:
27
  st.subheader('Data Preview')
28
  st.dataframe(data.head(20))
29
  # Feature selection
30
- features = ['a_ApplicableMarkets', 'Number of Unique Finished Packs in BOM',
31
  'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description',
32
  'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code',
33
  'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant',
34
  'Stibo_Pack_variant']
35
  df = data[features]
36
- df = df.replace(r'^\s*$', np.nan, regex=True)
37
- df[['EA_GTIN', 'CV_GTIN']] = df[['EA_GTIN', 'CV_GTIN']].apply(pd.to_numeric)
 
38
  df = df.replace(np.nan, 0, regex=True)
39
- text_cols = ['a_ApplicableMarkets', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  df = pd.get_dummies(data=df, columns=text_cols)
 
41
  v = CountVectorizer()
42
  text_vectors = v.fit_transform(df['Product_Description'])
43
  text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out())
44
  df_ext = pd.concat([df, text_vectors_df],axis=1)
45
- df = df_ext.drop("Product_Description",axis=1)
46
- scaler = MinMaxScaler()
47
- df[['GMN','EA_GTIN', 'CV_GTIN']] = scaler.fit_transform(df[['GMN','EA_GTIN', 'CV_GTIN']])
48
- loaded_model = pickle.load(open('rfc_model.pkl', 'rb'))
49
  result = loaded_model.predict(df)
50
- data['Product_Type']=result
51
 
52
  out=data.to_csv().encode('utf-8')
53
- st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Output.csv',mime='csv')
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
4
  from sklearn.feature_extraction.text import CountVectorizer
 
5
  from sklearn.ensemble import RandomForestClassifier
6
  import pickle
7
 
 
25
  st.subheader('Data Preview')
26
  st.dataframe(data.head(20))
27
  # Feature selection
28
+ features = ['a_ApplicableMarkets', 'Manufacturing Plant','Number of Unique Finished Packs in BOM',
29
  'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description',
30
  'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code',
31
  'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant',
32
  'Stibo_Pack_variant']
33
  df = data[features]
34
+
35
+ df['Manufacturing Plant'] = df['Manufacturing Plant'].replace({'Commerical Plant':'Commercial Plant'})
36
+ df['Stibo_Pack_variant'] = df['Stibo_Pack_variant'].replace({'Migration Open Stock':'Migration OpenStock'})
37
  df = df.replace(np.nan, 0, regex=True)
38
+ df['EA_GTIN'] = df['EA_GTIN'].astype(str)
39
+ df['CV_GTIN'] = df['CV_GTIN'].astype(str)
40
+
41
+ def GTIN_validity(x):
42
+ gtin=str(x)
43
+ if x=="0.0":
44
+ return False
45
+ if x:
46
+ gtin=gtin[:-2]
47
+ original_digits = [int(x) for x in gtin]
48
+ digits_without_check_digit = original_digits[:-1]
49
+
50
+ digits_without_check_digit.reverse()
51
+ multiplied_digits = [x*3 if not i%2 else x
52
+ for i,x
53
+ in enumerate(digits_without_check_digit)]
54
+ digits_sum = sum(multiplied_digits)
55
+
56
+ if (digits_sum % 10):
57
+ uprounded_sum = digits_sum + (10 - digits_sum % 10)
58
+ else:
59
+ uprounded_sum = digits_sum
60
+ expected_check_digit = uprounded_sum - digits_sum
61
+ return (original_digits[-1] == expected_check_digit)
62
+
63
+ df['EA_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['EA_GTIN']),axis=1)
64
+ df['CV_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['CV_GTIN']),axis=1)
65
+ text_cols = ['a_ApplicableMarkets', 'Manufacturing Plant', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant']
66
  df = pd.get_dummies(data=df, columns=text_cols)
67
+
68
  v = CountVectorizer()
69
  text_vectors = v.fit_transform(df['Product_Description'])
70
  text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out())
71
  df_ext = pd.concat([df, text_vectors_df],axis=1)
72
+ df = df_ext.drop(['GMN','Product_Description','EA_GTIN','CV_GTIN'],axis=1)
73
+
74
+ loaded_model = pickle.load(open('rfc_model_grid.pkl', 'rb'))
 
75
  result = loaded_model.predict(df)
76
+ data['Product_Type_Predicted']=result
77
 
78
  out=data.to_csv().encode('utf-8')
79
+ st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Type_Output.csv',mime='csv')