Pranay25 commited on
Commit
82a8657
Β·
verified Β·
1 Parent(s): dea052c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -37
app.py CHANGED
@@ -1,34 +1,11 @@
1
  import os
2
  from paddleocr import PaddleOCR
 
3
  import gradio as gr
4
  import re
5
  from simple_salesforce import Salesforce
6
  import pandas as pd
7
 
8
- # ---------------------------
9
- # Salesforce Credentials Setup
10
- # Replace these with your actual credentials or set them as environment variables
11
- SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME", "[email protected]")
12
- SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD", "Hms@2025")
13
- SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN", "2TM7U3O9HGSiJuFMUbVd3xuY")
14
-
15
- print(f"Using Salesforce credentials:")
16
- print(f"Username: {SALESFORCE_USERNAME}")
17
- print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
18
- print(f"Security Token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
19
-
20
- # Test Salesforce login once at startup to catch invalid credentials early
21
- try:
22
- sf_test = Salesforce(
23
- username=SALESFORCE_USERNAME,
24
- password=SALESFORCE_PASSWORD,
25
- security_token=SALESFORCE_SECURITY_TOKEN,
26
- domain="login" # Use "test" if sandbox
27
- )
28
- print(f"βœ… Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
29
- except Exception as e:
30
- print(f"❌ Salesforce login test failed: {e}")
31
-
32
  # Attribute mappings: readable names to Salesforce API names
33
  ATTRIBUTE_MAPPING = {
34
  "Name": "Patient_Name__c",
@@ -37,17 +14,30 @@ ATTRIBUTE_MAPPING = {
37
  "Phone Number": "Phone_Number__c"
38
  }
39
 
 
40
  ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
41
 
 
42
  GENDER_MAPPING = {
43
  "Male": "Male",
44
  "Female": "Female",
45
  "Other": "Others"
46
  }
47
 
 
 
 
 
 
 
 
 
 
 
48
  # Initialize PaddleOCR
49
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
50
 
 
51
  def extract_text(image):
52
  result = ocr.ocr(image)
53
  extracted_text = []
@@ -55,92 +45,123 @@ def extract_text(image):
55
  extracted_text.append(line[1][0])
56
  return "\n".join(extracted_text)
57
 
 
58
  def extract_attributes(extracted_text):
59
  attributes = {}
 
 
60
  patterns = {
61
  "Name": r"Name[:\-]?\s*([A-Za-z\s]+?)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$))",
62
  "Age": r"Age[:\-]?\s*(\d{1,3})",
63
  "Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
64
  "Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)[:\-]?\s*(?:\+91)?([6-9]\d{9})"
65
  }
 
66
  for readable_attr, pattern in patterns.items():
67
  match = re.search(pattern, extracted_text, re.IGNORECASE)
68
  if match:
69
  attributes[readable_attr] = match.group(1).strip()
 
70
  if "Gender" in attributes:
71
  attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
 
72
  return attributes
73
 
 
74
  def filter_valid_attributes(attributes, valid_fields):
75
- return {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
 
76
 
 
77
  def interact_with_salesforce(attributes):
78
  try:
79
- # Validate credentials presence
80
  if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
81
- raise ValueError("Salesforce credentials are missing or incomplete.")
82
 
 
83
  sf = Salesforce(
84
  username=SALESFORCE_USERNAME,
85
  password=SALESFORCE_PASSWORD,
86
  security_token=SALESFORCE_SECURITY_TOKEN,
87
- domain="login"
 
88
  )
 
 
 
89
  object_name = "Patient_Registration__c"
90
  sf_object = sf.__getattr__(object_name)
91
 
92
- # Describe object to get valid fields and picklist values
93
  schema = sf_object.describe()
94
  valid_fields = {field["name"] for field in schema["fields"]}
 
95
 
 
96
  field_details = {field["name"]: {
97
  "createable": field["createable"],
98
  "required": not field["nillable"] and not field["defaultedOnCreate"],
99
  "picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
100
  } for field in schema["fields"]}
 
101
 
102
- # Filter attributes for valid Salesforce fields
103
  filtered_attributes = filter_valid_attributes(attributes, valid_fields)
104
 
105
- # Check required field Patient_Name__c
106
  if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
107
- raise ValueError("Patient_Name__c is required but not provided.")
 
 
 
108
 
109
- # Convert Age__c to int if present
110
  if "Age__c" in filtered_attributes:
111
  filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
112
 
113
- # Validate Gender__c against picklist
114
  if "Gender__c" in filtered_attributes:
115
  gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
116
  if gender_values and filtered_attributes["Gender__c"] not in gender_values:
117
- raise ValueError(f"Invalid Gender__c value '{filtered_attributes['Gender__c']}'. Allowed: {gender_values}")
118
 
119
- # Create Salesforce record
120
  result = sf_object.create(filtered_attributes)
121
  return f"βœ… Successfully created Patient Registration record with ID: {result['id']}."
122
 
123
  except Exception as e:
124
  return f"❌ Error interacting with Salesforce: {str(e)}"
125
 
 
126
  def process_image(image):
127
  extracted_text = extract_text(image)
128
  if not extracted_text:
129
  return "No text detected in the image.", None, None
130
 
131
  attributes = extract_attributes(extracted_text)
 
 
132
  ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
 
 
133
  df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
134
  return f"Extracted Text:\n{extracted_text}", df, None
135
 
 
136
  def export_to_salesforce(edited_df):
137
  try:
 
138
  edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
 
 
139
  message = interact_with_salesforce(edited_attributes)
140
  return message
 
141
  except Exception as e:
142
  return f"❌ Error exporting to Salesforce: {str(e)}"
143
 
 
144
  def app():
145
  with gr.Blocks() as demo:
146
  with gr.Tab("πŸ“₯ OCR Processing"):
@@ -152,6 +173,7 @@ def app():
152
  ok_button = gr.Button("OK")
153
  result_output = gr.Text(label="πŸš€ Result")
154
 
 
155
  extract_button.click(
156
  fn=process_image,
157
  inputs=[image_input],
@@ -166,4 +188,4 @@ def app():
166
  return demo
167
 
168
  if __name__ == "__main__":
169
- app().launch(share=True)
 
1
  import os
2
  from paddleocr import PaddleOCR
3
+ from PIL import Image
4
  import gradio as gr
5
  import re
6
  from simple_salesforce import Salesforce
7
  import pandas as pd
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Attribute mappings: readable names to Salesforce API names
10
  ATTRIBUTE_MAPPING = {
11
  "Name": "Patient_Name__c",
 
14
  "Phone Number": "Phone_Number__c"
15
  }
16
 
17
+ # Desired order of attributes for display
18
  ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
19
 
20
+ # Mapping for Gender__c picklist values
21
  GENDER_MAPPING = {
22
  "Male": "Male",
23
  "Female": "Female",
24
  "Other": "Others"
25
  }
26
 
27
+ # Salesforce credentials from environment variables (removed hardcoded defaults)
28
+ SALESFORCE_USERNAME = os.getenv("[email protected]")
29
+ SALESFORCE_PASSWORD = os.getenv("Hms@2025")
30
+ SALESFORCE_SECURITY_TOKEN = os.getenv("jxumt3OHbuMTJjMyFnnG2v3e")
31
+
32
+ # Log the credentials being used (for debugging)
33
+ print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
34
+ print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
35
+ print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
36
+
37
  # Initialize PaddleOCR
38
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
39
 
40
+ # Function to extract text from an image using PaddleOCR
41
  def extract_text(image):
42
  result = ocr.ocr(image)
43
  extracted_text = []
 
45
  extracted_text.append(line[1][0])
46
  return "\n".join(extracted_text)
47
 
48
+ # Function to extract attributes using regex
49
  def extract_attributes(extracted_text):
50
  attributes = {}
51
+
52
+ # Patterns for extracting personal information
53
  patterns = {
54
  "Name": r"Name[:\-]?\s*([A-Za-z\s]+?)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$))",
55
  "Age": r"Age[:\-]?\s*(\d{1,3})",
56
  "Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
57
  "Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)[:\-]?\s*(?:\+91)?([6-9]\d{9})"
58
  }
59
+
60
  for readable_attr, pattern in patterns.items():
61
  match = re.search(pattern, extracted_text, re.IGNORECASE)
62
  if match:
63
  attributes[readable_attr] = match.group(1).strip()
64
+
65
  if "Gender" in attributes:
66
  attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
67
+
68
  return attributes
69
 
70
+ # Function to filter attributes for valid Salesforce fields
71
  def filter_valid_attributes(attributes, valid_fields):
72
+ filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
73
+ return filtered
74
 
75
+ # Function to create a record in Salesforce
76
  def interact_with_salesforce(attributes):
77
  try:
78
+ # Validate that credentials are not empty
79
  if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
80
+ raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")
81
 
82
+ # Initialize Salesforce connection
83
  sf = Salesforce(
84
  username=SALESFORCE_USERNAME,
85
  password=SALESFORCE_PASSWORD,
86
  security_token=SALESFORCE_SECURITY_TOKEN,
87
+ domain="login", # Changed from "test" to "login" for production/developer org
88
+ version="60.0"
89
  )
90
+ print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
91
+
92
+ # Reference the Patient_Registration__c object
93
  object_name = "Patient_Registration__c"
94
  sf_object = sf.__getattr__(object_name)
95
 
96
+ # Get the object's schema to validate fields
97
  schema = sf_object.describe()
98
  valid_fields = {field["name"] for field in schema["fields"]}
99
+ print(f"Valid fields for {object_name}: {valid_fields}")
100
 
101
+ # Check field permissions and picklist values for Gender__c
102
  field_details = {field["name"]: {
103
  "createable": field["createable"],
104
  "required": not field["nillable"] and not field["defaultedOnCreate"],
105
  "picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
106
  } for field in schema["fields"]}
107
+ print(f"Field details: {field_details}")
108
 
109
+ # Filter attributes to match valid Salesforce fields
110
  filtered_attributes = filter_valid_attributes(attributes, valid_fields)
111
 
112
+ # Ensure Patient_Name__c is provided (likely required)
113
  if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
114
+ raise ValueError("Patient_Name__c is required but was not provided.")
115
+
116
+ # Log the attributes being sent for debugging
117
+ print(f"Attributes being sent to Salesforce: {filtered_attributes}")
118
 
119
+ # Ensure Age__c is a number
120
  if "Age__c" in filtered_attributes:
121
  filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
122
 
123
+ # Validate Gender__c against picklist values
124
  if "Gender__c" in filtered_attributes:
125
  gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
126
  if gender_values and filtered_attributes["Gender__c"] not in gender_values:
127
+ raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")
128
 
129
+ # Create the record
130
  result = sf_object.create(filtered_attributes)
131
  return f"βœ… Successfully created Patient Registration record with ID: {result['id']}."
132
 
133
  except Exception as e:
134
  return f"❌ Error interacting with Salesforce: {str(e)}"
135
 
136
+ # Function to process the image and extract attributes
137
  def process_image(image):
138
  extracted_text = extract_text(image)
139
  if not extracted_text:
140
  return "No text detected in the image.", None, None
141
 
142
  attributes = extract_attributes(extracted_text)
143
+
144
+ # Ensure all attributes are present, even if empty, in the desired order
145
  ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
146
+
147
+ # Convert attributes to DataFrame for display
148
  df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
149
  return f"Extracted Text:\n{extracted_text}", df, None
150
 
151
+ # Function to handle edited attributes and export to Salesforce
152
  def export_to_salesforce(edited_df):
153
  try:
154
+ # Convert edited DataFrame back to dictionary
155
  edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
156
+
157
+ # Export to Salesforce
158
  message = interact_with_salesforce(edited_attributes)
159
  return message
160
+
161
  except Exception as e:
162
  return f"❌ Error exporting to Salesforce: {str(e)}"
163
 
164
+ # Gradio Interface
165
  def app():
166
  with gr.Blocks() as demo:
167
  with gr.Tab("πŸ“₯ OCR Processing"):
 
173
  ok_button = gr.Button("OK")
174
  result_output = gr.Text(label="πŸš€ Result")
175
 
176
+ # Define button actions
177
  extract_button.click(
178
  fn=process_image,
179
  inputs=[image_input],
 
188
  return demo
189
 
190
  if __name__ == "__main__":
191
+ app().launch(share=True)