bobpeulen commited on
Commit
03bc446
1 Parent(s): 5e692ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -21
app.py CHANGED
@@ -1,42 +1,59 @@
1
- from sepa import parser
2
- import re
3
- import pandas as pd
4
  import gradio as gr
 
5
  import numpy as np
 
 
6
 
7
- def full_function(xml_file):
8
 
9
- # Utility function to remove additional namespaces from the XML
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def strip_namespace(xml):
11
  return re.sub(' xmlns="[^"]+"', '', xml, count=1)
12
 
13
  # Read file
14
- with open(xml_file, 'r') as f:
15
  input_data = f.read()
16
 
17
  # Parse the bank statement XML to dictionary
 
18
  camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
19
 
20
  statements = pd.DataFrame.from_dict(camt_dict['statements'])
21
  all_entries = []
 
 
 
22
  for i,_ in statements.iterrows():
23
  if 'entries' in camt_dict['statements'][i]:
24
 
25
  #create empty df
26
  df = pd.DataFrame()
27
- dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
28
 
29
  df['reference'] = dd['reference']
30
  df['credit_debit_indicator'] = dd['credit_debit_indicator']
31
  df['status'] = dd['status']
32
  df['account_servicer_reference'] = dd['account_servicer_reference']
33
-
34
  iban = camt_dict['statements'][i]['account']['id']['iban']
35
  name = camt_dict['statements'][i]['account']['name']
36
  df['iban'] = iban
37
  df['name'] = name
38
  df['currency'] = dd['amount'].str['currency']
39
  df['amount'] = dd['amount'].str['_value']
 
40
 
41
  df['value_date'] = dd['value_date'].str['date']
42
  df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
@@ -54,38 +71,68 @@ def full_function(xml_file):
54
  #transaction details
55
  df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
56
  df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
 
 
 
 
 
57
 
58
  df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
59
  df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
60
 
 
61
 
62
- all_entries.append(df)
63
-
64
  df_entries = pd.concat(all_entries)
65
- df_entries.head()
66
 
67
- return df_entries
 
 
 
 
 
 
 
 
 
68
 
69
 
70
 
 
 
 
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  desc = "Upload XML file, convert to .csv file, and analyze transactions"
74
 
75
  with gr.Blocks() as demo:
76
 
77
  xml_file = gr.File(label = "Upload XML file here")
78
 
79
- # input_employees = gr.CheckboxGroup(["Transfer Solutions", "Ordina", "PwC", "Quistor", "Full Orbit", "Accenture", "Atos", "AMIS"], label="Oracle Partners", info="Who to judge?")
80
- # input_question = gr.Text(label="What activity is the Oracle Partner performing?")
81
- # additional_info = gr.Text(label="Additional information (optional)")
82
- output_text = gr.Text(label="R")
83
- df_entries = gr.DataFrame(label="Output table")
84
-
85
-
86
- submit_btn = gr.Button("Run analysis on XML file")
87
 
88
 
89
- gr.Interface(fn=full_function, inputs=xml_file, outputs=df_entries, title=desc).launch(share=True)
 
90
 
91
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
  import numpy as np
4
+ from sepa import parser
5
+ import re
6
 
 
7
 
8
+ ##################################################################
9
+ ##################################################################
10
+ ##################################################################
11
+
12
+ def full_function(xml_file):
13
+
14
+ #for gradio: swap with xml_file for local testing
15
+ full_name = xml_file.name
16
+ #full_name = xml_file
17
+
18
+
19
+ print("File name in gradio is ")
20
+ print(full_name)
21
+
22
  def strip_namespace(xml):
23
  return re.sub(' xmlns="[^"]+"', '', xml, count=1)
24
 
25
  # Read file
26
+ with open(full_name, 'r') as f:
27
  input_data = f.read()
28
 
29
  # Parse the bank statement XML to dictionary
30
+ print("Parse full xml string")
31
  camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
32
 
33
  statements = pd.DataFrame.from_dict(camt_dict['statements'])
34
  all_entries = []
35
+ dd_all = []
36
+
37
+ print("Start loop all the transactions and add to df")
38
  for i,_ in statements.iterrows():
39
  if 'entries' in camt_dict['statements'][i]:
40
 
41
  #create empty df
42
  df = pd.DataFrame()
43
+ dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
44
 
45
  df['reference'] = dd['reference']
46
  df['credit_debit_indicator'] = dd['credit_debit_indicator']
47
  df['status'] = dd['status']
48
  df['account_servicer_reference'] = dd['account_servicer_reference']
49
+
50
  iban = camt_dict['statements'][i]['account']['id']['iban']
51
  name = camt_dict['statements'][i]['account']['name']
52
  df['iban'] = iban
53
  df['name'] = name
54
  df['currency'] = dd['amount'].str['currency']
55
  df['amount'] = dd['amount'].str['_value']
56
+ df['reference'] = dd['reference']
57
 
58
  df['value_date'] = dd['value_date'].str['date']
59
  df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
 
71
  #transaction details
72
  df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
73
  df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
74
+ df['creditor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor'].str['name']
75
+ df['creditor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor_account'].str['id'].str['iban']
76
+
77
+ df['bic'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_agents'].str['debtor_agent'].str['financial_institution'].str['bic']
78
+ df['remittance_information'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['remittance_information'].str['unstructured'].str[0]
79
 
80
  df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
81
  df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
82
 
83
+ all_entries.append(df)
84
 
85
+ print("merge the list into df")
 
86
  df_entries = pd.concat(all_entries)
 
87
 
88
+ #drop duplicates
89
+ print("remove duplicate rows")
90
+ df_entries = df_entries.drop_duplicates(subset=['reference'], keep='last')
91
+
92
+ print("all done")
93
+
94
+ df_entries_example = df_entries[['reference', 'credit_debit_indicator', 'iban', 'name', 'currency', 'amount', 'value_date', 'debtor_name', 'debtor_iban', 'creditor_name', 'creditor_iban', 'remittance_information']].head(20)
95
+ #print(df_entries_example)
96
+
97
+ return df_entries, df_entries_example
98
 
99
 
100
 
101
+ ##################################################################
102
+ ##################################################################
103
+ ##################################################################
104
 
105
 
106
+ def export_csv(xml_file):
107
+
108
+ df_entries, df_entries_example = full_function(xml_file)
109
+
110
+ df_entries.to_csv("./output.csv")
111
+
112
+ out = gr.File.update(value="output.csv", visible=True)
113
+
114
+ return out, df_entries_example
115
+
116
+ ##################################################################
117
+ ##################################################################
118
+ ##################################################################
119
+
120
  desc = "Upload XML file, convert to .csv file, and analyze transactions"
121
 
122
  with gr.Blocks() as demo:
123
 
124
  xml_file = gr.File(label = "Upload XML file here")
125
 
126
+ #output table.
127
+ df_entries_example = gr.DataFrame(label="Example output table, top 20 rows (not all columns)")
128
+
129
+ with gr.Row():
130
+ #export_button = gr.Button("Export")
131
+ out = gr.File(label = "Output file", interactive=False, visible=False)
 
 
132
 
133
 
134
+ #submit_btn = gr.Button("Run analysis on XML file")
135
+ #export_button.click(export_csv, df_entries, csv)
136
 
137
 
138
+ gr.Interface(fn=export_csv, inputs=xml_file, outputs=[out, df_entries_example], title=desc).launch(share=True, debug =True)