hardik27 commited on
Commit
3ad62d6
·
verified ·
1 Parent(s): 27a08f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -11
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import PyPDF2
2
  import pandas as pd
3
  import os
4
-
5
  import streamlit as st
6
  import pandas as pd
7
 
8
  def convert_pdf_to_excel(pdf_file):
9
- # Use tabula to extract tables from PDF
10
  inputpdf = PyPDF2.PdfReader(pdf_file)
11
  pages_no = len(inputpdf.pages)
12
  whole_data = []
@@ -19,6 +18,9 @@ def convert_pdf_to_excel(pdf_file):
19
  for each_table in [i for i in page_content.split('Delivery Schedule Sheet') if i]:
20
  data = each_table.split('\n')
21
  each_table_data = []
 
 
 
22
  for index in range(len(data)):
23
  if data[index].strip() == 'Part No.':
24
  each_table_data.append(data[index+1].replace('Part Color Code',""))
@@ -29,14 +31,46 @@ def convert_pdf_to_excel(pdf_file):
29
 
30
  if 'Part Name' in data[index].strip():
31
  each_table_data.append(data[index+1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  whole_data.append(each_table_data)
33
 
34
  whole_data = pd.DataFrame(whole_data)
35
- whole_data.columns = ["Part No.","Part Color Code","Part Name"]
36
- excel_file = pdf_file.name.replace('.pdf', '.xlsx')
37
- whole_data.to_excel(excel_file, index=False)
38
-
39
- return excel_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def main():
42
  st.title("PDF to Excel Converter")
@@ -48,22 +82,54 @@ def main():
48
  st.write("Uploaded PDF file:", uploaded_file.name)
49
 
50
  # Convert PDF to Excel
51
- excel_file = convert_pdf_to_excel(uploaded_file)
52
 
53
  # Download link for the Excel file
54
  # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
55
 
56
- if os.path.exists(excel_file):
57
- with open(excel_file, "rb") as f:
58
  excel_bytes = f.read()
59
  st.download_button(
60
  label="Download Excel file",
61
  data=excel_bytes,
62
- file_name=excel_file,
63
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
64
  )
65
  else:
66
  st.error("Error: Converted Excel file not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  if __name__ == "__main__":
69
  main()
 
1
  import PyPDF2
2
  import pandas as pd
3
  import os
4
+ import ast
5
  import streamlit as st
6
  import pandas as pd
7
 
8
  def convert_pdf_to_excel(pdf_file):
 
9
  inputpdf = PyPDF2.PdfReader(pdf_file)
10
  pages_no = len(inputpdf.pages)
11
  whole_data = []
 
18
  for each_table in [i for i in page_content.split('Delivery Schedule Sheet') if i]:
19
  data = each_table.split('\n')
20
  each_table_data = []
21
+ date_qty = []
22
+ row_start_index = 0
23
+ row_stop_index = 0
24
  for index in range(len(data)):
25
  if data[index].strip() == 'Part No.':
26
  each_table_data.append(data[index+1].replace('Part Color Code',""))
 
31
 
32
  if 'Part Name' in data[index].strip():
33
  each_table_data.append(data[index+1])
34
+
35
+ if data[index].strip() == 'ADJ':
36
+ row_start_index = index + 1
37
+
38
+ if data[index].strip() == 'Total':
39
+ row_stop_index = index
40
+
41
+ if row_start_index>0 and row_stop_index>0:
42
+ for index in range(row_start_index,row_stop_index):
43
+ if '/' in data[index].strip():
44
+ date_qty.append([data[index].strip()[-5:].strip(),data[index+1].strip()])
45
+ if not date_qty:
46
+ date_qty = [["",""]]
47
+ each_table_data.append(date_qty)
48
  whole_data.append(each_table_data)
49
 
50
  whole_data = pd.DataFrame(whole_data)
51
+ whole_data.columns = ["Part No.","Part Color Code","Part Name",'Date Qty']
52
+ extracted_file = "Data Extracted.xlsx"
53
+ data_for_mapping = "Data Mapping.xlsx"
54
+ extracted_data_for_mapping = whole_data.drop('Date Qty',axis=1)
55
+ extracted_data_for_mapping = extracted_data_for_mapping.drop_duplicates(subset=["Part No.","Part Color Code","Part Name"])
56
+ whole_data.to_excel(extracted_file, index=False)
57
+ extracted_data_for_mapping.to_excel(data_for_mapping, index=False)
58
+ return extracted_file,data_for_mapping
59
+
60
+ def map_data_to_template(excel_file, mapping_file):
61
+ # Load Excel file and mapping file
62
+ extracted_data = pd.read_excel(excel_file)
63
+ mapping_data = pd.read_excel(mapping_file)
64
+ mapping_data = mapping_data.rename(columns = {'Customer Part no as per pdf':'Part No.'})
65
+
66
+ # Perform mapping
67
+ extracted_data['Date Qty'] = extracted_data['Date Qty'].apply(lambda x: ast.literal_eval(x))
68
+ extracted_data = extracted_data.explode('Date Qty')
69
+ extracted_data[['SchDate','Qty']]= pd.DataFrame(extracted_data['Date Qty'].to_list(), index= extracted_data.index)
70
+ extracted_data = extracted_data.drop('Date Qty',axis=1)
71
+ mapped_data = extracted_data.merge(mapping_data, on =['Part No.'])[['Item Code','SchDate','Qty']]
72
+
73
+ return mapped_data
74
 
75
  def main():
76
  st.title("PDF to Excel Converter")
 
82
  st.write("Uploaded PDF file:", uploaded_file.name)
83
 
84
  # Convert PDF to Excel
85
+ extracted_file,data_for_mapping = convert_pdf_to_excel(uploaded_file)
86
 
87
  # Download link for the Excel file
88
  # st.markdown(f"Download the extracted data in Excel file [here](/{excel_file})")
89
 
90
+ if os.path.exists(data_for_mapping):
91
+ with open(data_for_mapping, "rb") as f:
92
  excel_bytes = f.read()
93
  st.download_button(
94
  label="Download Excel file",
95
  data=excel_bytes,
96
+ file_name=data_for_mapping,
97
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
98
  )
99
  else:
100
  st.error("Error: Converted Excel file not found")
101
+
102
+
103
+ st.markdown("## Upload the Data Master file with Item Code mapping")
104
+ mapping_uploaded_file = st.file_uploader("Upload the Data Master file with Item Code mapping", type=["xlsx","ods"])
105
+
106
+ if mapping_uploaded_file is not None:
107
+ st.write("Uploaded Mapping Excel file:", mapping_uploaded_file.name)
108
+
109
+ # Perform data mapping
110
+ mapped_data = map_data_to_template(extracted_file, mapping_uploaded_file)
111
+
112
+ # Provide a link to download the final Excel file after mapping
113
+ st.markdown("### Final Excel File After Mapping")
114
+
115
+ final_excel_file = 'Final Data.xlsx'
116
+ mapped_data.to_excel(final_excel_file, index=False)
117
+
118
+ if os.path.exists(final_excel_file):
119
+ with open(final_excel_file, "rb") as f:
120
+ excel_bytes = f.read()
121
+ st.download_button(
122
+ label="Download Excel file",
123
+ data=excel_bytes,
124
+ file_name=final_excel_file,
125
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
126
+ )
127
+ else:
128
+ st.error("Error: Converted Excel file not found")
129
+
130
+
131
+
132
+
133
 
134
  if __name__ == "__main__":
135
  main()