Spaces:

hardik27
/

dataextraction

Running

App Files Files Community

hardik27 commited on Jul 8, 2024

Commit

338af71

verified ·

1 Parent(s): 8e68d7a

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -11

app.py CHANGED Viewed

@@ -11,7 +11,12 @@ from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload,MediaFileUpload
 from google.oauth2 import service_account
 # Load credentials from environment variables
 SERVICE_ACCOUNT_INFO = {
   "type": "service_account",
@@ -145,10 +150,10 @@ def map_data_to_template(excel_file, mapping_file):
     extracted_data = extracted_data[~extracted_data['SchDate'].isna()]
     mapped_data = extracted_data.merge(mapping_data, on =['Part No.'],how='outer')[['Item Code','SchDate','Qty','Inventory Category']]
     mapped_data = mapped_data[~mapped_data["SchDate"].isna()]
-    mapped_data = mapped_data[~mapped_data["SchDate"].str.strip().isin(["",None])]
     mapped_data['SOType'] = "R"
-    return mapped_data
 def save_mapping_file_to_drive():
     # creds = Credentials.from_authorized_user_info(credentials_dict)
@@ -213,11 +218,51 @@ def pull_mapping_file_from_drive():
     return 0
 def main():
-    st.title("PDF to Excel Converter")
     # File uploader
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     if uploaded_file is not None:
         st.write("Uploaded PDF file:", uploaded_file.name)
@@ -228,10 +273,20 @@ def main():
         file_present = pull_mapping_file_from_drive()
         if file_present:
-            mapping_data_from_drive = pd.read_excel(MAPPING_FILENAME)
-            extracted_data_for_mapping = pd.read_excel(data_for_mapping)
-            extracted_data_for_mapping = extracted_data_for_mapping[['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color  as per pdf',"Inventory Category as per pdf"]].merge(mapping_data_from_drive, on = ['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color  as per pdf'], how='outer')
-            extracted_data_for_mapping.to_excel(data_for_mapping,index=False)
         # Download link for the Excel file
         # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
@@ -247,9 +302,10 @@ def main():
             )
         else:
             st.error("Error: Converted Excel file not found")
         if not file_present:
-            st.markdown("## Upload the Data Master file with Item Code mapping")
             mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
         else:
             mapping_data = pd.read_excel(MAPPING_FILENAME)
@@ -275,6 +331,7 @@ def main():
             mapped_data = map_data_to_template(extracted_file, mapping_uploaded_file)
             # Provide a link to download the final Excel file after mapping
             st.markdown("### Final Excel File After Mapping")
             final_excel_file = 'Final Data.xlsx'

 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload,MediaFileUpload
 from google.oauth2 import service_account
+import base64
+def get_image_as_base64(image_path):
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode()
 # Load credentials from environment variables
 SERVICE_ACCOUNT_INFO = {
   "type": "service_account",
     extracted_data = extracted_data[~extracted_data['SchDate'].isna()]
     mapped_data = extracted_data.merge(mapping_data, on =['Part No.'],how='outer')[['Item Code','SchDate','Qty','Inventory Category']]
     mapped_data = mapped_data[~mapped_data["SchDate"].isna()]
+    mapped_data = mapped_data[~mapped_data["SchDate"].str.strip().isin(["",None])]
     mapped_data['SOType'] = "R"
+    return mapped_data[["SchDate","SOType","Item Code","Qty","Inventory Category"]]
 def save_mapping_file_to_drive():
     # creds = Credentials.from_authorized_user_info(credentials_dict)
     return 0
+def delete_master_file():
+    creds = authenticate()
+    service = build('drive', 'v3', credentials=creds)
+    # Authenticate with Google Drive API
+    # service = build('drive', 'v3', credentials=creds)
+    folder_id = "1HBRUZePST0D0buyU9MxeYg2vQyEL4wLF"
+    # List all files in the folder
+    results = service.files().list(
+        q=f"mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'",
+        fields="files(id, name)").execute()
+    files = results.get('files', [])
+    files = [i for i in files if i.get('name')==MAPPING_FILENAME]
+    if not files:
+        print('No Excel Mapping files found in the folder.')
+    else:
+        for file in files:
+            # Get the ID and name of the first Excel file found in the folder
+            existing_file_id = file['id']
+            existing_file_name = file['name']
+            # Delete the existing file
+            service.files().delete(fileId=existing_file_id).execute()
+            print("Deleted master file")
 def main():
+    # Load your logo image
+    logo_path = "logo.jpeg"
+    logo_base64 = get_image_as_base64(logo_path)
+    logo_html = f"""
+    <div style="display: flex; justify-content: center; align-items: center; height: 100px;">
+        <img src="data:image/jpeg;base64,{logo_base64}" style="width: 100px; height: 100px;">
+    </div>
+    """
+    # Display the logo HTML
+    st.markdown(logo_html, unsafe_allow_html=True)
+    st.markdown("<h1 style='text-align: center;'>PDF to Excel Converter</h1>", unsafe_allow_html=True)
     # File uploader
+    st.markdown("### STEP 1")
+    st.markdown("#### Upload a PDF File")
+    uploaded_file = st.file_uploader("### Upload a PDF file", type=["pdf"])
     if uploaded_file is not None:
         st.write("Uploaded PDF file:", uploaded_file.name)
         file_present = pull_mapping_file_from_drive()
         if file_present:
+            try:
+                mapping_data_from_drive = pd.read_excel(MAPPING_FILENAME)
+                extracted_data_for_mapping = pd.read_excel(data_for_mapping)
+                extracted_data_for_mapping.columns = [i.strip() for i in extracted_data_for_mapping.columns]
+                mapping_data_from_drive.columns = [i.strip() for i in mapping_data_from_drive.columns]
+                extracted_data_for_mapping = extracted_data_for_mapping[['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color  as per pdf',"Inventory Category as per pdf"]].merge(mapping_data_from_drive, on = ['Customer Part no as per pdf','Customer Part name as per pdf','Customer Part color  as per pdf'], how='outer')
+                extracted_data_for_mapping.to_excel(data_for_mapping,index=False)
+            except Exception as e:
+                st.error("Error in the Mapping Master file on Cloud. " + str(e))
+                st.error("Please reupload the Data Master file with Item Code mapping")
+                delete_master_file()
+                file_present = None
         # Download link for the Excel file
         # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
             )
         else:
             st.error("Error: Converted Excel file not found")
+        st.markdown("### STEP 2")
         if not file_present:
+            st.markdown("#### Upload the Data Master file with Item Code mapping")
             mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
         else:
             mapping_data = pd.read_excel(MAPPING_FILENAME)
             mapped_data = map_data_to_template(extracted_file, mapping_uploaded_file)
             # Provide a link to download the final Excel file after mapping
+            st.markdown("### FINAL DOWNLOAD")
             st.markdown("### Final Excel File After Mapping")
             final_excel_file = 'Final Data.xlsx'