Pranay25 commited on
Commit
f90d327
Β·
verified Β·
1 Parent(s): 3a8a0f9

Create salesforce_ocr_patient_registration.py

Browse files
salesforce_ocr_patient_registration.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from paddleocr import PaddleOCR
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import requests
6
+ import re
7
+ from simple_salesforce import Salesforce
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ from io import BytesIO
11
+ import kaleido
12
+
13
+ # Attribute mappings: readable names to Salesforce API names
14
+ ATTRIBUTE_MAPPING = {
15
+ "Name": "Name__c",
16
+ "Age": "Age__c",
17
+ "Gender": "Gender__c",
18
+ "Phone Number": "Phone__c"
19
+ }
20
+
21
+ # Salesforce credentials
22
+ SALESFORCE_USERNAME = "[email protected]"
23
+ SALESFORCE_PASSWORD = "HMS@2025"
24
+ SALESFORCE_SECURITY_TOKEN = "5W0grfOaOxM9ocl3zYDgZ5CF"
25
+
26
+ # Initialize PaddleOCR
27
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
28
+
29
+ # Function to extract text using PaddleOCR
30
+ def extract_text(image):
31
+ result = ocr.ocr(image)
32
+ extracted_text = []
33
+ for line in result[0]:
34
+ extracted_text.append(line[1][0])
35
+ return "\n".join(extracted_text)
36
+
37
+ # Function to extract attributes and their values
38
+ def extract_attributes(extracted_text):
39
+ attributes = {}
40
+
41
+ # Patterns for extracting personal information
42
+ patterns = {
43
+ "Name": r"Name[:\-]?\s*([A-Za-z\s]+)",
44
+ "Age": r"Age[:\-]?\s*(\d{1,3})",
45
+ "Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
46
+ "Phone Number": r"Phone[:\-]?\s*(\+?\d{10,12})"
47
+ }
48
+
49
+ for readable_attr, pattern in patterns.items():
50
+ match = re.search(pattern, extracted_text, re.IGNORECASE)
51
+ if match:
52
+ attributes[readable_attr] = match.group(1).strip()
53
+
54
+ return attributes
55
+
56
+ # Function to filter attributes for valid Salesforce fields
57
+ def filter_valid_attributes(attributes, valid_fields):
58
+ return {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
59
+
60
+ # Function to interact with Salesforce
61
+ def interact_with_salesforce(attributes):
62
+ try:
63
+ sf = Salesforce(
64
+ username=SALESFORCE_USERNAME,
65
+ password=SALESFORCE_PASSWORD,
66
+ security_token=SALESFORCE_SECURITY_TOKEN
67
+ )
68
+
69
+ object_name = "Patient_Registration__c" # Using custom Patient Registration object
70
+ sf_object = sf.__getattr__(object_name)
71
+ schema = sf_object.describe()
72
+ valid_fields = {field["name"] for field in schema["fields"]}
73
+
74
+ filtered_attributes = filter_valid_attributes(attributes, valid_fields)
75
+
76
+ # Create a new record in Salesforce
77
+ result = sf_object.create(filtered_attributes)
78
+ return f"βœ… Successfully created Patient Registration record with ID: {result['id']}."
79
+
80
+ except Exception as e:
81
+ return f"❌ Error interacting with Salesforce: {str(e)}"
82
+
83
+ # Function to process image and extract attributes
84
+ def process_image(image):
85
+ extracted_text = extract_text(image)
86
+ if not extracted_text:
87
+ return "No text detected in the image.", None, None
88
+
89
+ attributes = extract_attributes(extracted_text)
90
+
91
+ # Ensure all attributes are present, even if empty
92
+ for attr in ATTRIBUTE_MAPPING.keys():
93
+ if attr not in attributes:
94
+ attributes[attr] = ""
95
+
96
+ # Convert attributes to DataFrame for editing
97
+ df = pd.DataFrame(list(attributes.items()), columns=["Attribute", "Value"])
98
+ return f"Extracted Text:\n{extracted_text}", df, None
99
+
100
+ # Function to handle edited attributes and export to Salesforce
101
+ def export_to_salesforce(edited_df):
102
+ try:
103
+ # Convert edited DataFrame back to dictionary
104
+ edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
105
+
106
+ # Export to Salesforce
107
+ message = interact_with_salesforce(edited_attributes)
108
+ return message
109
+
110
+ except Exception as e:
111
+ return f"❌ Error exporting to Salesforce: {str(e)}"
112
+
113
+ # Function to pull structured data from Salesforce and display as a table
114
+ def pull_data_from_salesforce():
115
+ try:
116
+ sf = Salesforce(
117
+ username=SALESFORCE_USERNAME,
118
+ password=SALESFORCE_PASSWORD,
119
+ security_token=SALESFORCE_SECURITY_TOKEN
120
+ )
121
+
122
+ query = "SELECT Name__c, Age__c, Gender__c, Phone__c FROM Patient_Registration__c WHERE Age__c != NULL LIMIT 100"
123
+ response = sf.query_all(query)
124
+ records = response.get("records", [])
125
+
126
+ if not records:
127
+ return "No data found in Salesforce.", None, None, None
128
+
129
+ df = pd.DataFrame(records)
130
+ df = df.drop(columns=['attributes'], errors='ignore')
131
+
132
+ # Rename columns for better readability
133
+ df.rename(columns={
134
+ "Name__c": "Name",
135
+ "Age__c": "Age",
136
+ "Gender__c": "Gender",
137
+ "Phone__c": "Phone Number"
138
+ }, inplace=True)
139
+
140
+ excel_path = "salesforce_patient_registration.xlsx"
141
+ df.to_excel(excel_path, index=False)
142
+
143
+ # Generate a bar graph for age distribution
144
+ fig, ax = plt.subplots(figsize=(12, 8))
145
+ df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
146
+ df.groupby('Age').size().plot(kind='bar', ax=ax)
147
+ ax.set_title("Age Distribution of Patient Registrations")
148
+ ax.set_xlabel("Age")
149
+ ax.set_ylabel("Number of Patients")
150
+ plt.xticks(rotation=45, ha="right", fontsize=10)
151
+ plt.tight_layout()
152
+ buffer = BytesIO()
153
+ plt.savefig(buffer, format="png")
154
+ buffer.seek(0)
155
+ img = Image.open(buffer)
156
+
157
+ return df, excel_path, img
158
+
159
+ except Exception as e:
160
+ return f"Error fetching data: {str(e)}", None, None, None
161
+
162
+ # Gradio Interface
163
+ def app():
164
+ with gr.Blocks() as demo:
165
+ with gr.Tab("πŸ“₯ OCR Processing"):
166
+ with gr.Row():
167
+ image_input = gr.Image(type="numpy", label="πŸ“„ Upload Image")
168
+ extract_button = gr.Button("Extract Text and Attributes")
169
+ extracted_text_output = gr.Text(label="πŸ“ Extracted Image Data")
170
+ editable_df_output = gr.Dataframe(label="✏️ Edit Attributes (Key-Value Pairs)", interactive=True)
171
+ ok_button = gr.Button("OK")
172
+ result_output = gr.Text(label="πŸš€ Result")
173
+
174
+ with gr.Tab("πŸ“Š Salesforce Data"):
175
+ pull_button = gr.Button("Pull Data from Salesforce")
176
+ salesforce_data_output = gr.Dataframe(label="πŸ“Š Salesforce Data")
177
+ excel_download_output = gr.File(label="πŸ“₯ Download Excel")
178
+ graph_output = gr.Image(label="πŸ“ˆ Age Distribution Graph")
179
+
180
+ # Define button actions
181
+ extract_button.click(
182
+ fn=process_image,
183
+ inputs=[image_input],
184
+ outputs=[extracted_text_output, editable_df_output, result_output]
185
+ )
186
+ ok_button.click(
187
+ fn=export_to_salesforce,
188
+ inputs=[editable_df_output],
189
+ outputs=[result_output]
190
+ )
191
+ pull_button.click(
192
+ fn=pull_data_from_salesforce,
193
+ inputs=[],
194
+ outputs=[salesforce_data_output, excel_download_output, graph_output]
195
+ )
196
+
197
+ return demo
198
+
199
+ if __name__ == "__main__":
200
+ app().launch(share=True)