Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,11 @@
|
|
1 |
import os
|
2 |
from paddleocr import PaddleOCR
|
|
|
3 |
import gradio as gr
|
4 |
import re
|
5 |
from simple_salesforce import Salesforce
|
6 |
import pandas as pd
|
7 |
|
8 |
-
# ---------------------------
|
9 |
-
# Salesforce Credentials Setup
|
10 |
-
# Replace these with your actual credentials or set them as environment variables
|
11 |
-
SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME", "[email protected]")
|
12 |
-
SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD", "Hms@2025")
|
13 |
-
SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN", "2TM7U3O9HGSiJuFMUbVd3xuY")
|
14 |
-
|
15 |
-
print(f"Using Salesforce credentials:")
|
16 |
-
print(f"Username: {SALESFORCE_USERNAME}")
|
17 |
-
print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
|
18 |
-
print(f"Security Token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
|
19 |
-
|
20 |
-
# Test Salesforce login once at startup to catch invalid credentials early
|
21 |
-
try:
|
22 |
-
sf_test = Salesforce(
|
23 |
-
username=SALESFORCE_USERNAME,
|
24 |
-
password=SALESFORCE_PASSWORD,
|
25 |
-
security_token=SALESFORCE_SECURITY_TOKEN,
|
26 |
-
domain="login" # Use "test" if sandbox
|
27 |
-
)
|
28 |
-
print(f"β
Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
|
29 |
-
except Exception as e:
|
30 |
-
print(f"β Salesforce login test failed: {e}")
|
31 |
-
|
32 |
# Attribute mappings: readable names to Salesforce API names
|
33 |
ATTRIBUTE_MAPPING = {
|
34 |
"Name": "Patient_Name__c",
|
@@ -37,17 +14,30 @@ ATTRIBUTE_MAPPING = {
|
|
37 |
"Phone Number": "Phone_Number__c"
|
38 |
}
|
39 |
|
|
|
40 |
ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
|
41 |
|
|
|
42 |
GENDER_MAPPING = {
|
43 |
"Male": "Male",
|
44 |
"Female": "Female",
|
45 |
"Other": "Others"
|
46 |
}
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# Initialize PaddleOCR
|
49 |
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
50 |
|
|
|
51 |
def extract_text(image):
|
52 |
result = ocr.ocr(image)
|
53 |
extracted_text = []
|
@@ -55,92 +45,123 @@ def extract_text(image):
|
|
55 |
extracted_text.append(line[1][0])
|
56 |
return "\n".join(extracted_text)
|
57 |
|
|
|
58 |
def extract_attributes(extracted_text):
|
59 |
attributes = {}
|
|
|
|
|
60 |
patterns = {
|
61 |
"Name": r"Name[:\-]?\s*([A-Za-z\s]+?)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$))",
|
62 |
"Age": r"Age[:\-]?\s*(\d{1,3})",
|
63 |
"Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
|
64 |
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)[:\-]?\s*(?:\+91)?([6-9]\d{9})"
|
65 |
}
|
|
|
66 |
for readable_attr, pattern in patterns.items():
|
67 |
match = re.search(pattern, extracted_text, re.IGNORECASE)
|
68 |
if match:
|
69 |
attributes[readable_attr] = match.group(1).strip()
|
|
|
70 |
if "Gender" in attributes:
|
71 |
attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
|
|
|
72 |
return attributes
|
73 |
|
|
|
74 |
def filter_valid_attributes(attributes, valid_fields):
|
75 |
-
|
|
|
76 |
|
|
|
77 |
def interact_with_salesforce(attributes):
|
78 |
try:
|
79 |
-
# Validate credentials
|
80 |
if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
|
81 |
-
raise ValueError("Salesforce credentials are missing
|
82 |
|
|
|
83 |
sf = Salesforce(
|
84 |
username=SALESFORCE_USERNAME,
|
85 |
password=SALESFORCE_PASSWORD,
|
86 |
security_token=SALESFORCE_SECURITY_TOKEN,
|
87 |
-
domain="login"
|
|
|
88 |
)
|
|
|
|
|
|
|
89 |
object_name = "Patient_Registration__c"
|
90 |
sf_object = sf.__getattr__(object_name)
|
91 |
|
92 |
-
#
|
93 |
schema = sf_object.describe()
|
94 |
valid_fields = {field["name"] for field in schema["fields"]}
|
|
|
95 |
|
|
|
96 |
field_details = {field["name"]: {
|
97 |
"createable": field["createable"],
|
98 |
"required": not field["nillable"] and not field["defaultedOnCreate"],
|
99 |
"picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
|
100 |
} for field in schema["fields"]}
|
|
|
101 |
|
102 |
-
# Filter attributes
|
103 |
filtered_attributes = filter_valid_attributes(attributes, valid_fields)
|
104 |
|
105 |
-
#
|
106 |
if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
|
107 |
-
raise ValueError("Patient_Name__c is required but not provided.")
|
|
|
|
|
|
|
108 |
|
109 |
-
#
|
110 |
if "Age__c" in filtered_attributes:
|
111 |
filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
|
112 |
|
113 |
-
# Validate Gender__c against picklist
|
114 |
if "Gender__c" in filtered_attributes:
|
115 |
gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
|
116 |
if gender_values and filtered_attributes["Gender__c"] not in gender_values:
|
117 |
-
raise ValueError(f"Invalid Gender__c
|
118 |
|
119 |
-
# Create
|
120 |
result = sf_object.create(filtered_attributes)
|
121 |
return f"β
Successfully created Patient Registration record with ID: {result['id']}."
|
122 |
|
123 |
except Exception as e:
|
124 |
return f"β Error interacting with Salesforce: {str(e)}"
|
125 |
|
|
|
126 |
def process_image(image):
|
127 |
extracted_text = extract_text(image)
|
128 |
if not extracted_text:
|
129 |
return "No text detected in the image.", None, None
|
130 |
|
131 |
attributes = extract_attributes(extracted_text)
|
|
|
|
|
132 |
ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
|
|
|
|
|
133 |
df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
|
134 |
return f"Extracted Text:\n{extracted_text}", df, None
|
135 |
|
|
|
136 |
def export_to_salesforce(edited_df):
|
137 |
try:
|
|
|
138 |
edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
|
|
|
|
|
139 |
message = interact_with_salesforce(edited_attributes)
|
140 |
return message
|
|
|
141 |
except Exception as e:
|
142 |
return f"β Error exporting to Salesforce: {str(e)}"
|
143 |
|
|
|
144 |
def app():
|
145 |
with gr.Blocks() as demo:
|
146 |
with gr.Tab("π₯ OCR Processing"):
|
@@ -152,6 +173,7 @@ def app():
|
|
152 |
ok_button = gr.Button("OK")
|
153 |
result_output = gr.Text(label="π Result")
|
154 |
|
|
|
155 |
extract_button.click(
|
156 |
fn=process_image,
|
157 |
inputs=[image_input],
|
@@ -166,4 +188,4 @@ def app():
|
|
166 |
return demo
|
167 |
|
168 |
if __name__ == "__main__":
|
169 |
-
app().launch(share=True)
|
|
|
1 |
import os
|
2 |
from paddleocr import PaddleOCR
|
3 |
+
from PIL import Image
|
4 |
import gradio as gr
|
5 |
import re
|
6 |
from simple_salesforce import Salesforce
|
7 |
import pandas as pd
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Attribute mappings: readable names to Salesforce API names
|
10 |
ATTRIBUTE_MAPPING = {
|
11 |
"Name": "Patient_Name__c",
|
|
|
14 |
"Phone Number": "Phone_Number__c"
|
15 |
}
|
16 |
|
17 |
+
# Desired order of attributes for display
|
18 |
ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
|
19 |
|
20 |
+
# Mapping for Gender__c picklist values
|
21 |
GENDER_MAPPING = {
|
22 |
"Male": "Male",
|
23 |
"Female": "Female",
|
24 |
"Other": "Others"
|
25 |
}
|
26 |
|
27 |
+
# Salesforce credentials from environment variables (removed hardcoded defaults)
|
28 |
+
SALESFORCE_USERNAME = os.getenv("[email protected]")
|
29 |
+
SALESFORCE_PASSWORD = os.getenv("Hms@2025")
|
30 |
+
SALESFORCE_SECURITY_TOKEN = os.getenv("jxumt3OHbuMTJjMyFnnG2v3e")
|
31 |
+
|
32 |
+
# Log the credentials being used (for debugging)
|
33 |
+
print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
|
34 |
+
print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
|
35 |
+
print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
|
36 |
+
|
37 |
# Initialize PaddleOCR
|
38 |
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
39 |
|
40 |
+
# Function to extract text from an image using PaddleOCR
|
41 |
def extract_text(image):
|
42 |
result = ocr.ocr(image)
|
43 |
extracted_text = []
|
|
|
45 |
extracted_text.append(line[1][0])
|
46 |
return "\n".join(extracted_text)
|
47 |
|
48 |
+
# Function to extract attributes using regex
|
49 |
def extract_attributes(extracted_text):
|
50 |
attributes = {}
|
51 |
+
|
52 |
+
# Patterns for extracting personal information
|
53 |
patterns = {
|
54 |
"Name": r"Name[:\-]?\s*([A-Za-z\s]+?)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$))",
|
55 |
"Age": r"Age[:\-]?\s*(\d{1,3})",
|
56 |
"Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
|
57 |
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)[:\-]?\s*(?:\+91)?([6-9]\d{9})"
|
58 |
}
|
59 |
+
|
60 |
for readable_attr, pattern in patterns.items():
|
61 |
match = re.search(pattern, extracted_text, re.IGNORECASE)
|
62 |
if match:
|
63 |
attributes[readable_attr] = match.group(1).strip()
|
64 |
+
|
65 |
if "Gender" in attributes:
|
66 |
attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
|
67 |
+
|
68 |
return attributes
|
69 |
|
70 |
+
# Function to filter attributes for valid Salesforce fields
|
71 |
def filter_valid_attributes(attributes, valid_fields):
|
72 |
+
filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
|
73 |
+
return filtered
|
74 |
|
75 |
+
# Function to create a record in Salesforce
|
76 |
def interact_with_salesforce(attributes):
|
77 |
try:
|
78 |
+
# Validate that credentials are not empty
|
79 |
if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
|
80 |
+
raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")
|
81 |
|
82 |
+
# Initialize Salesforce connection
|
83 |
sf = Salesforce(
|
84 |
username=SALESFORCE_USERNAME,
|
85 |
password=SALESFORCE_PASSWORD,
|
86 |
security_token=SALESFORCE_SECURITY_TOKEN,
|
87 |
+
domain="login", # Changed from "test" to "login" for production/developer org
|
88 |
+
version="60.0"
|
89 |
)
|
90 |
+
print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
|
91 |
+
|
92 |
+
# Reference the Patient_Registration__c object
|
93 |
object_name = "Patient_Registration__c"
|
94 |
sf_object = sf.__getattr__(object_name)
|
95 |
|
96 |
+
# Get the object's schema to validate fields
|
97 |
schema = sf_object.describe()
|
98 |
valid_fields = {field["name"] for field in schema["fields"]}
|
99 |
+
print(f"Valid fields for {object_name}: {valid_fields}")
|
100 |
|
101 |
+
# Check field permissions and picklist values for Gender__c
|
102 |
field_details = {field["name"]: {
|
103 |
"createable": field["createable"],
|
104 |
"required": not field["nillable"] and not field["defaultedOnCreate"],
|
105 |
"picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
|
106 |
} for field in schema["fields"]}
|
107 |
+
print(f"Field details: {field_details}")
|
108 |
|
109 |
+
# Filter attributes to match valid Salesforce fields
|
110 |
filtered_attributes = filter_valid_attributes(attributes, valid_fields)
|
111 |
|
112 |
+
# Ensure Patient_Name__c is provided (likely required)
|
113 |
if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
|
114 |
+
raise ValueError("Patient_Name__c is required but was not provided.")
|
115 |
+
|
116 |
+
# Log the attributes being sent for debugging
|
117 |
+
print(f"Attributes being sent to Salesforce: {filtered_attributes}")
|
118 |
|
119 |
+
# Ensure Age__c is a number
|
120 |
if "Age__c" in filtered_attributes:
|
121 |
filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
|
122 |
|
123 |
+
# Validate Gender__c against picklist values
|
124 |
if "Gender__c" in filtered_attributes:
|
125 |
gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
|
126 |
if gender_values and filtered_attributes["Gender__c"] not in gender_values:
|
127 |
+
raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")
|
128 |
|
129 |
+
# Create the record
|
130 |
result = sf_object.create(filtered_attributes)
|
131 |
return f"β
Successfully created Patient Registration record with ID: {result['id']}."
|
132 |
|
133 |
except Exception as e:
|
134 |
return f"β Error interacting with Salesforce: {str(e)}"
|
135 |
|
136 |
+
# Function to process the image and extract attributes
|
137 |
def process_image(image):
|
138 |
extracted_text = extract_text(image)
|
139 |
if not extracted_text:
|
140 |
return "No text detected in the image.", None, None
|
141 |
|
142 |
attributes = extract_attributes(extracted_text)
|
143 |
+
|
144 |
+
# Ensure all attributes are present, even if empty, in the desired order
|
145 |
ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
|
146 |
+
|
147 |
+
# Convert attributes to DataFrame for display
|
148 |
df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
|
149 |
return f"Extracted Text:\n{extracted_text}", df, None
|
150 |
|
151 |
+
# Function to handle edited attributes and export to Salesforce
|
152 |
def export_to_salesforce(edited_df):
|
153 |
try:
|
154 |
+
# Convert edited DataFrame back to dictionary
|
155 |
edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
|
156 |
+
|
157 |
+
# Export to Salesforce
|
158 |
message = interact_with_salesforce(edited_attributes)
|
159 |
return message
|
160 |
+
|
161 |
except Exception as e:
|
162 |
return f"β Error exporting to Salesforce: {str(e)}"
|
163 |
|
164 |
+
# Gradio Interface
|
165 |
def app():
|
166 |
with gr.Blocks() as demo:
|
167 |
with gr.Tab("π₯ OCR Processing"):
|
|
|
173 |
ok_button = gr.Button("OK")
|
174 |
result_output = gr.Text(label="π Result")
|
175 |
|
176 |
+
# Define button actions
|
177 |
extract_button.click(
|
178 |
fn=process_image,
|
179 |
inputs=[image_input],
|
|
|
188 |
return demo
|
189 |
|
190 |
if __name__ == "__main__":
|
191 |
+
app().launch(share=True)
|