hardik27 commited on
Commit
9865c91
·
verified ·
1 Parent(s): 9d22702

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -27
app.py CHANGED
@@ -6,18 +6,43 @@ import streamlit as st
6
  import pandas as pd
7
  import os
8
  from google.oauth2.credentials import Credentials
 
 
9
  from googleapiclient.discovery import build
10
  from googleapiclient.http import MediaIoBaseDownload,MediaFileUpload
11
 
12
  # Load credentials from environment variables
13
- credentials_dict = {
14
- "token": os.environ.get("token"),
15
- "refresh_token": os.environ.get("refresh_token"),
16
- "token_uri": os.environ.get("token_uri"),
17
- "client_id": os.environ.get("client_id"),
18
- "client_secret": os.environ.get("client_secret"),
19
- "scopes": [os.environ.get("scopes")]
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  MAPPING_FILENAME = "Data Mapping with ItemCode.xlsx"
23
 
@@ -75,8 +100,11 @@ def convert_pdf_to_excel(pdf_file):
75
  whole_data.columns = ["Part No.","Part Color Code","Part Name",'Date Qty']
76
  extracted_file = "Data Extracted.xlsx"
77
  data_for_mapping = "Data Mapping.xlsx"
 
78
  extracted_data_for_mapping = whole_data.drop('Date Qty',axis=1)
79
  extracted_data_for_mapping = extracted_data_for_mapping.drop_duplicates(subset=["Part No.","Part Color Code","Part Name"])
 
 
80
  whole_data.to_excel(extracted_file, index=False)
81
  extracted_data_for_mapping.to_excel(data_for_mapping, index=False)
82
  return extracted_file,data_for_mapping
@@ -85,7 +113,7 @@ def map_data_to_template(excel_file, mapping_file):
85
  # Load Excel file and mapping file
86
  extracted_data = pd.read_excel(excel_file)
87
  mapping_data = pd.read_excel(mapping_file)
88
- mapping_data.to_excel(MAPPING_FILENAME)
89
  save_mapping_file_to_drive()
90
  mapping_data = mapping_data.rename(columns = {'Customer Part no as per pdf':'Part No.'})
91
 
@@ -102,7 +130,9 @@ def map_data_to_template(excel_file, mapping_file):
102
  return mapped_data
103
 
104
  def save_mapping_file_to_drive():
105
- creds = Credentials.from_authorized_user_info(credentials_dict)
 
 
106
  # Authenticate with Google Drive API
107
  service = build('drive', 'v3', credentials=creds)
108
  folder_id = "1HBRUZePST0D0buyU9MxeYg2vQyEL4wLF"
@@ -112,7 +142,7 @@ def save_mapping_file_to_drive():
112
  q=f"'{folder_id}' in parents and mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'",
113
  fields="files(id, name)").execute()
114
  files = results.get('files', [])
115
- files = [i for i in files if i.get('name')=='Data Mapping with ItemCode.xlsx']
116
 
117
  if not files:
118
  print('No Excel Mapping files found in the folder.')
@@ -130,7 +160,9 @@ def save_mapping_file_to_drive():
130
  service.files().create(body=file_metadata, media_body=media, fields='id').execute()
131
 
132
  def pull_mapping_file_from_drive():
133
- creds = Credentials.from_authorized_user_info(credentials_dict)
 
 
134
  # Authenticate with Google Drive API
135
  service = build('drive', 'v3', credentials=creds)
136
 
@@ -138,7 +170,7 @@ def pull_mapping_file_from_drive():
138
  q="mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'",
139
  fields="files(id, name)").execute()
140
  files = results.get('files', [])
141
- files = [i for i in files if i.get('name')=='Data Mapping with ItemCode.xlsx']
142
  if files:
143
  file_id = files[0]['id']
144
  file_name = files[0]['name']
@@ -153,8 +185,9 @@ def pull_mapping_file_from_drive():
153
 
154
  fh.close()
155
  return 1
156
- return 0
157
 
 
 
158
 
159
  def main():
160
  st.title("PDF to Excel Converter")
@@ -168,6 +201,14 @@ def main():
168
  # Convert PDF to Excel
169
  extracted_file,data_for_mapping = convert_pdf_to_excel(uploaded_file)
170
 
 
 
 
 
 
 
 
 
171
  # Download link for the Excel file
172
  # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
173
 
@@ -183,23 +224,26 @@ def main():
183
  else:
184
  st.error("Error: Converted Excel file not found")
185
 
186
- file_present = pull_mapping_file_from_drive()
187
- if not os.path.exists("Data Mapping with ItemCode.xlsx"):
188
  st.markdown("## Upload the Data Master file with Item Code mapping")
189
  mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
190
  else:
191
- mapping_data = pd.read_excel("Data Mapping with ItemCode.xlsx")
192
- mapping_data = mapping_data.rename(columns = {'Customer Part no as per pdf':'Part No.'})
193
- data_for_mapping = "Data Mapping.xlsx"
194
- extracted_data_for_mapping = pd.read_excel(data_for_mapping)
195
- extracted_data_for_mapping = extracted_data_for_mapping[~extracted_data_for_mapping['Part No.'].isin(mapping_data['Part No.'])]
196
- unmapped_part_no = extracted_data_for_mapping['Part No.'].nunique()
197
- if unmapped_part_no>0:
198
- st.markdown("#### There are {} Part No. with No ItemCode present. Upload a new file after mapping them".format(unmapped_part_no))
199
- mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
200
- else:
 
 
 
 
201
  st.markdown("#### Using the Mapping file available in Google Drive")
202
- mapping_uploaded_file = "Data Mapping with ItemCode.xlsx"
203
 
204
  if mapping_uploaded_file is not None:
205
  # st.write("Uploaded Mapping Excel file:", mapping_uploaded_file.name)
 
6
  import pandas as pd
7
  import os
8
  from google.oauth2.credentials import Credentials
9
+ from google.auth.transport.requests import Request
10
+ from google_auth_oauthlib.flow import InstalledAppFlow
11
  from googleapiclient.discovery import build
12
  from googleapiclient.http import MediaIoBaseDownload,MediaFileUpload
13
 
14
  # Load credentials from environment variables
15
+ config = {'installed': {'client_id': os.environ.get("client_id"),
16
+ 'project_id': os.environ.get("project_id"),
17
+ 'auth_uri': os.environ.get("auth_uri"),
18
+ 'token_uri': os.environ.get("token_uri"),
19
+ 'auth_provider_x509_cert_url': os.environ.get("auth_provider_x509_cert_url"),
20
+ 'client_secret': os.environ.get("client_secret"),
21
+ 'redirect_uris': ['http://localhost']}}
22
+
23
+ SCOPES = ['https://www.googleapis.com/auth/drive']
24
+
25
+ def authenticate():
26
+ creds = None
27
+
28
+ # Check if token file exists
29
+ if os.path.exists('token.json'):
30
+ creds = Credentials.from_authorized_user_file('token.json')
31
+
32
+ # If no valid credentials available, ask the user to login
33
+ if not creds or not creds.valid:
34
+ if creds and creds.expired and creds.refresh_token:
35
+ creds.refresh(Request())
36
+ else:
37
+ # flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
38
+ flow = InstalledAppFlow.from_client_config(config,SCOPES)
39
+ creds = flow.run_local_server(port=0)
40
+
41
+ # Save the credentials for next run
42
+ with open('token.json', 'w') as token:
43
+ token.write(creds.to_json())
44
+
45
+ return creds
46
 
47
  MAPPING_FILENAME = "Data Mapping with ItemCode.xlsx"
48
 
 
100
  whole_data.columns = ["Part No.","Part Color Code","Part Name",'Date Qty']
101
  extracted_file = "Data Extracted.xlsx"
102
  data_for_mapping = "Data Mapping.xlsx"
103
+
104
  extracted_data_for_mapping = whole_data.drop('Date Qty',axis=1)
105
  extracted_data_for_mapping = extracted_data_for_mapping.drop_duplicates(subset=["Part No.","Part Color Code","Part Name"])
106
+ extracted_data_for_mapping.columns = ['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color as per pdf']
107
+
108
  whole_data.to_excel(extracted_file, index=False)
109
  extracted_data_for_mapping.to_excel(data_for_mapping, index=False)
110
  return extracted_file,data_for_mapping
 
113
  # Load Excel file and mapping file
114
  extracted_data = pd.read_excel(excel_file)
115
  mapping_data = pd.read_excel(mapping_file)
116
+ mapping_data.to_excel(MAPPING_FILENAME,index=False)
117
  save_mapping_file_to_drive()
118
  mapping_data = mapping_data.rename(columns = {'Customer Part no as per pdf':'Part No.'})
119
 
 
130
  return mapped_data
131
 
132
  def save_mapping_file_to_drive():
133
+ # creds = Credentials.from_authorized_user_info(credentials_dict)
134
+ creds = authenticate()
135
+ service = build('drive', 'v3', credentials=creds)
136
  # Authenticate with Google Drive API
137
  service = build('drive', 'v3', credentials=creds)
138
  folder_id = "1HBRUZePST0D0buyU9MxeYg2vQyEL4wLF"
 
142
  q=f"'{folder_id}' in parents and mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'",
143
  fields="files(id, name)").execute()
144
  files = results.get('files', [])
145
+ files = [i for i in files if i.get('name')==MAPPING_FILENAME]
146
 
147
  if not files:
148
  print('No Excel Mapping files found in the folder.')
 
160
  service.files().create(body=file_metadata, media_body=media, fields='id').execute()
161
 
162
  def pull_mapping_file_from_drive():
163
+ creds = authenticate()
164
+ service = build('drive', 'v3', credentials=creds)
165
+ # creds = Credentials.from_authorized_user_info(credentials_dict)
166
  # Authenticate with Google Drive API
167
  service = build('drive', 'v3', credentials=creds)
168
 
 
170
  q="mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'",
171
  fields="files(id, name)").execute()
172
  files = results.get('files', [])
173
+ files = [i for i in files if i.get('name')==MAPPING_FILENAME]
174
  if files:
175
  file_id = files[0]['id']
176
  file_name = files[0]['name']
 
185
 
186
  fh.close()
187
  return 1
 
188
 
189
+ print('No Excel files found.')
190
+ return 0
191
 
192
  def main():
193
  st.title("PDF to Excel Converter")
 
201
  # Convert PDF to Excel
202
  extracted_file,data_for_mapping = convert_pdf_to_excel(uploaded_file)
203
 
204
+ file_present = pull_mapping_file_from_drive()
205
+
206
+ if file_present:
207
+ mapping_data_from_drive = pd.read_excel(MAPPING_FILENAME)
208
+ extracted_data_for_mapping = pd.read_excel(data_for_mapping)
209
+ extracted_data_for_mapping = extracted_data_for_mapping.merge(mapping_data_from_drive, on = ['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color as per pdf'], how='outer')
210
+ extracted_data_for_mapping.to_excel(data_for_mapping,index=False)
211
+
212
  # Download link for the Excel file
213
  # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
214
 
 
224
  else:
225
  st.error("Error: Converted Excel file not found")
226
 
227
+ if not file_present:
 
228
  st.markdown("## Upload the Data Master file with Item Code mapping")
229
  mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
230
  else:
231
+ try:
232
+ mapping_data = pd.read_excel(MAPPING_FILENAME)
233
+ # mapping_data = mapping_data.rename(columns = {'Customer Part no as per pdf':'Part No.'})
234
+ data_for_mapping = "Data Mapping.xlsx"
235
+ extracted_data_for_mapping = pd.read_excel(data_for_mapping)
236
+ extracted_data_for_mapping = extracted_data_for_mapping[~extracted_data_for_mapping['Customer Part no as per pdf'].isin(mapping_data['Customer Part no as per pdf'])]
237
+ unmapped_part_no = extracted_data_for_mapping['Customer Part no as per pdf'].nunique()
238
+ if unmapped_part_no>0:
239
+ st.markdown("#### There are {} Part No. with No ItemCode present. Upload a new file after mapping them".format(unmapped_part_no))
240
+ mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
241
+ else:
242
+ st.markdown("#### Using the Mapping file available in Google Drive")
243
+ mapping_uploaded_file = MAPPING_FILENAME
244
+ except:
245
  st.markdown("#### Using the Mapping file available in Google Drive")
246
+ mapping_uploaded_file = MAPPING_FILENAME
247
 
248
  if mapping_uploaded_file is not None:
249
  # st.write("Uploaded Mapping Excel file:", mapping_uploaded_file.name)