AuditEdge commited on
Commit
3bb9361
·
1 Parent(s): 8538469

S3 configuration done

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +98 -44
  3. requirements.txt +2 -1
  4. s3_setup.py +44 -0
  5. sample.py +8 -7
.gitignore CHANGED
@@ -19,3 +19,4 @@ test_images_folder
19
  uploads
20
  pause_space.py
21
  .DS_Store
 
 
19
  uploads
20
  pause_space.py
21
  .DS_Store
22
+ test_s3_client.py
app.py CHANGED
@@ -4,6 +4,7 @@ from typing import Dict
4
  import os
5
  import shutil
6
  import logging
 
7
 
8
  import torch
9
  from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
@@ -159,57 +160,98 @@ def perform_inference(file_paths: Dict[str, str]):
159
  "cheque_file": cheque_model,
160
  "gst_file": gst_model,
161
  }
 
 
 
162
 
163
- # Dictionary to store results for each document type
164
- inference_results = {}
165
-
166
- # Loop through the file paths and perform inference
167
- for doc_type, file_path in file_paths.items():
168
- if doc_type in model_dirs:
169
- print(f"Processing {doc_type} using model at {model_dirs[doc_type]}")
170
-
171
- # Prepare batch for inference
172
- images_path = [file_path]
173
- inference_batch = prepare_batch_for_inference(images_path)
174
-
175
- # Prepare context for the specific document type
176
- # context = {"model_dir": model_dirs[doc_type]}
177
- # context = aadhar_model
178
- if doc_type == "aadhar_file":
179
- context = aadhar_model
180
- processor = processor_aadhar
181
- name = "aadhar"
182
- attachemnt_num = 3
183
 
184
- if doc_type == "pan_file":
185
- context = pan_model
186
- processor = processor_pan
187
- name = "pan"
188
- attachemnt_num = 2
189
 
190
- if doc_type == "gst_file":
191
- context = gst_model
192
- processor = processor_gst
193
- name = "gst"
194
- attachemnt_num = 4
195
 
196
- if doc_type == "cheque_file":
197
- context = cheque_model
198
- processor = processor_cheque
199
- name = "cheque"
200
- attachemnt_num = 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
 
 
 
203
 
204
- # Perform inference (replace `handle` with your actual function)
205
- result = handle(inference_batch, context,processor,name)
206
 
207
- # Store the result
208
- inference_results["attachment_{}".format(attachemnt_num)] = result
209
- else:
210
- print(f"Model directory not found for {doc_type}. Skipping.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- return inference_results
213
 
214
  # Routes
215
  @app.get("/")
@@ -247,18 +289,30 @@ async def aadhar_ocr(
247
  f_path = value
248
  preprocessing = doc_processing(name,id_type,doc_type,f_path)
249
  response = preprocessing.process()
250
- files[key] = response["output_p"]
 
251
  print("response",response)
252
 
253
 
254
  # Perform inference
255
  result = perform_inference(files)
256
 
 
 
 
 
 
 
 
257
  return {"status": "success", "result": result}
258
 
 
259
  except Exception as e:
260
  logging.error(f"Error processing files: {e}")
261
  # raise HTTPException(status_code=500, detail="Internal Server Error")
262
- return {"status":400}
 
 
 
263
 
264
 
 
4
  import os
5
  import shutil
6
  import logging
7
+ from s3_setup import s3_client
8
 
9
  import torch
10
  from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
 
160
  "cheque_file": cheque_model,
161
  "gst_file": gst_model,
162
  }
163
+ try:
164
+ # Dictionary to store results for each document type
165
+ inference_results = {}
166
 
167
+ # Loop through the file paths and perform inference
168
+ for doc_type, file_path in file_paths.items():
169
+ if doc_type in model_dirs:
170
+ print(f"Processing {doc_type} using model at {model_dirs[doc_type]}")
171
+
172
+ # Prepare batch for inference
173
+ processed_file_p = file_path.split("&&")[0]
174
+ unprocessed_file_path = file_path.split("&&")[1]
175
+
176
+ images_path = [processed_file_p]
177
+ inference_batch = prepare_batch_for_inference(images_path)
178
+
179
+ # Prepare context for the specific document type
180
+ # context = {"model_dir": model_dirs[doc_type]}
181
+ #initialize s3 client
182
+ client = s3_client()
183
+
184
+ local_file_path= unprocessed_file_path
185
+ bucket_name = "edgekycdocs"
 
186
 
187
+ file_name = unprocessed_file_path.split("/")[-1]
 
 
 
 
188
 
 
 
 
 
 
189
 
190
+
191
+
192
+ # context = aadhar_model
193
+ if doc_type == "aadhar_file":
194
+ context = aadhar_model
195
+ processor = processor_aadhar
196
+ name = "aadhar"
197
+ attachemnt_num = 3
198
+ folder_name = "aadhardocs"
199
+
200
+
201
+ if doc_type == "pan_file":
202
+ context = pan_model
203
+ processor = processor_pan
204
+ name = "pan"
205
+ attachemnt_num = 2
206
+ folder_name = "pandocs"
207
+
208
+ if doc_type == "gst_file":
209
+ context = gst_model
210
+ processor = processor_gst
211
+ name = "gst"
212
+ attachemnt_num = 4
213
+ folder_name = "gstdocs"
214
+
215
+ if doc_type == "cheque_file":
216
+ context = cheque_model
217
+ processor = processor_cheque
218
+ name = "cheque"
219
+ attachemnt_num = 8
220
+ folder_name = "bankchequedocs"
221
+
222
 
223
 
224
+ # upload the document to s3 bucket here
225
+
226
 
227
+ response = client.upload_file(local_file_path,bucket_name,folder_name,file_name)
 
228
 
229
+ print("The file has been uploaded to s3 bucket",response)
230
+
231
+
232
+ # Perform inference (replace `handle` with your actual function)
233
+ result = handle(inference_batch, context,processor,name)
234
+ # result["attachment_url": response["url"]]
235
+ result["attachment_url"] = response["url"]
236
+ result["detect"] = True
237
+
238
+ print("result required",result)
239
+
240
+ # if result[""]
241
+
242
+ # Store the result
243
+ inference_results["attachment_{}".format(attachemnt_num)] = result
244
+ else:
245
+ print(f"Model directory not found for {doc_type}. Skipping.")
246
+ # print(Javed)
247
+
248
+ return inference_results
249
+ except:
250
+ return {
251
+ "status": "error",
252
+ "message": "Text extraction failed."
253
+ }
254
 
 
255
 
256
  # Routes
257
  @app.get("/")
 
289
  f_path = value
290
  preprocessing = doc_processing(name,id_type,doc_type,f_path)
291
  response = preprocessing.process()
292
+ files[key] = response["output_p"] + "&&" + f_path
293
+ # files["unprocessed_file_path"] = f_path
294
  print("response",response)
295
 
296
 
297
  # Perform inference
298
  result = perform_inference(files)
299
 
300
+ print("this is the result we got",result)
301
+ if "status" in list(result.keys()):
302
+ raise Exception("Custom error message")
303
+ # if result["status"] == "error":
304
+
305
+
306
+
307
  return {"status": "success", "result": result}
308
 
309
+
310
  except Exception as e:
311
  logging.error(f"Error processing files: {e}")
312
  # raise HTTPException(status_code=500, detail="Internal Server Error")
313
+ return {
314
+ "status": 400,
315
+ "message": "Text extraction failed."
316
+ }
317
 
318
 
requirements.txt CHANGED
@@ -8,4 +8,5 @@ pillow
8
  google-cloud-vision
9
  python-dotenv
10
  pymupdf
11
- pillow
 
 
8
  google-cloud-vision
9
  python-dotenv
10
  pymupdf
11
+ pillow
12
+ boto3
s3_setup.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ # AWS credentials (if not set in environment variables or AWS CLI config)
3
+ from dotenv import load_dotenv
4
+ import os
5
+ import sys
6
+
7
+ from utils import doc_processing
8
+
9
+ # Load .env file
10
+ load_dotenv()
11
+ # Access variables
12
+ AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
13
+ AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
14
+ print("AWS_ACCESS_KEY_ID",AWS_ACCESS_KEY_ID)
15
+ print("AWS_SECRET_ACCESS_KEY",AWS_SECRET_ACCESS_KEY)
16
+ # Initialize S3 client
17
+
18
+ class s3_client:
19
+ def __init__(self):
20
+ self.aws_access_key_id = AWS_ACCESS_KEY_ID
21
+ self.aws_secret_access_key = AWS_SECRET_ACCESS_KEY
22
+
23
+ def initialize(self):
24
+ return boto3.client(
25
+ 's3',
26
+ aws_access_key_id=self.aws_access_key_id,
27
+ aws_secret_access_key=self.aws_secret_access_key
28
+ )
29
+
30
+ def upload_file(self,local_file_path, bucket_name,folder_name,file_name):
31
+ try:
32
+ client = self.initialize()
33
+ client.upload_file(local_file_path, bucket_name, f"{folder_name}/{file_name}")
34
+ print(f"File uploaded successfully to {bucket_name}/{folder_name}{file_name}")
35
+ url = f"https://edgekycdocs.s3.eu-north-1.amazonaws.com/{folder_name}/{file_name}"
36
+ print("file url",url)
37
+ return {"status": 200, "message":"file uploaded successfully" , "url" : url}
38
+ except Exception as e:
39
+ print("Error uploading file:", e)
40
+ return {"status": 400, "message":e}
41
+
42
+
43
+
44
+
sample.py CHANGED
@@ -2,17 +2,18 @@ import requests
2
  import sys
3
 
4
  # Define the API endpoint
5
- # url = "http://127.0.0.0:7860/api/home"
6
- test_url = "http://127.0.0.1:7860/"
7
 
8
- response = requests.get(test_url)
9
 
10
- post_url = "http://127.0.0.1:7860/api/aadhar_ocr"
11
- print("Status Code:", response.status_code)
12
- print("Response Text:", response.text)
13
 
14
- sys.exit()
 
 
 
15
 
 
16
 
17
  # response = requests.get(url)
18
 
 
2
  import sys
3
 
4
  # Define the API endpoint
5
+ # url = "http://localhost:7680/"
6
+ # # test_url = "http://localhost:7860/"
7
 
8
+ # response = requests.get(url)
9
 
 
 
 
10
 
11
+ # print("Status Code:", response.status_code)
12
+ # print("Response Text:", response.text)
13
+
14
+ # sys.exit()
15
 
16
+ post_url = "http://localhost:7680/api/aadhar_ocr"
17
 
18
  # response = requests.get(url)
19