thejagstudio commited on
Commit
f757564
·
verified ·
1 Parent(s): 161f2e6

Update api/views.py

Browse files
Files changed (1) hide show
  1. api/views.py +44 -19
api/views.py CHANGED
@@ -11,13 +11,15 @@ import threading
11
  import random
12
  import google.generativeai as genai
13
  import google.ai.generativelanguage as glm
 
 
14
  import os
15
  from .models import UseCases, DocumentTypes
16
 
17
 
18
-
19
  # host_url = "http://16.170.244.54"
20
- host_url = "https://thejagstudio-absoluteai.hf.space/"
21
  googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
22
  genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
23
 
@@ -25,9 +27,19 @@ genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
25
  def getAnswer(images):
26
  url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
27
  payload = {"requests": []}
28
- for i in images:
 
 
 
 
 
 
 
 
 
 
29
  temp = {
30
- "image": {"source": {"imageUri": i}},
31
  "features": [
32
  {
33
  "type": "DOCUMENT_TEXT_DETECTION",
@@ -54,7 +66,7 @@ def getAnswer(images):
54
  for i in range(len(response.json()["responses"])):
55
  OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
56
  except Exception as e:
57
- print(e)
58
  return OCRString
59
 
60
 
@@ -65,27 +77,40 @@ def dataExtract(request, link):
65
  pdf_file = request.FILES["pdf"]
66
  randomUUID = str(uuid.uuid4())
67
  fs = FileSystemStorage(location="static/pdf/")
68
- filename = fs.save(f"{randomUUID}.pdf", pdf_file)
69
- os.mkdir(f"./static/pages/{randomUUID}")
70
  image_list = []
71
- images = convert_from_path(
72
- f"./static/pdf/{randomUUID}.pdf",
73
  dpi=150,
74
- output_folder=f"./static/pages/{randomUUID}",
75
  fmt="png",
76
  output_file=f"image",
77
  thread_count=5,
78
  poppler_path="./poppler-23.05.0/Library/bin/"
79
  )
80
- for filename in os.listdir(f"./static/pages/{randomUUID}"):
81
- image_list.append(f"/static/pages/{randomUUID}/{filename}")
82
- image_Array = []
83
- for i in range(len(image_list)):
84
- image_Array.append(host_url + image_list[i])
85
-
86
- OCRString = getAnswer(image_Array)
87
- with open(f"./OCR.txt", "w", encoding="utf-8") as f:
88
- f.write(OCRString)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  fields = documentData.fields
90
  properties = {}
91
  for field in fields:
 
11
  import random
12
  import google.generativeai as genai
13
  import google.ai.generativelanguage as glm
14
+ import io
15
+ import base64
16
  import os
17
  from .models import UseCases, DocumentTypes
18
 
19
 
20
+ os.environ["_BARD_API_KEY"] = "WwhMDr8qCuXIPs1pwR-lOgPsB51q86WuevPCH5VpcCcxVEEQqywHYShAfkE19lCRXUOQaQ."
21
  # host_url = "http://16.170.244.54"
22
+ host_url = "https://qbh39rzw-8000.euw.devtunnels.ms"
23
  googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
24
  genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
25
 
 
27
  def getAnswer(images):
28
  url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
29
  payload = {"requests": []}
30
+ for img in images:
31
+ # temp = {
32
+ # "image": {"source": {"imageUri": i}},
33
+ # "features": [
34
+ # {
35
+ # "type": "DOCUMENT_TEXT_DETECTION",
36
+ # "maxResults": 50,
37
+ # "model": "builtin/latest",
38
+ # }
39
+ # ],
40
+ # }
41
  temp = {
42
+ "image": {"content": img},
43
  "features": [
44
  {
45
  "type": "DOCUMENT_TEXT_DETECTION",
 
66
  for i in range(len(response.json()["responses"])):
67
  OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
68
  except Exception as e:
69
+ print(e, response.text)
70
  return OCRString
71
 
72
 
 
77
  pdf_file = request.FILES["pdf"]
78
  randomUUID = str(uuid.uuid4())
79
  fs = FileSystemStorage(location="static/pdf/")
80
+ # filename = fs.save(f"{randomUUID}.pdf", pdf_file)
81
+ # os.mkdir(f"./static/pages/{randomUUID}")
82
  image_list = []
83
+ images = convert_from_bytes(
84
+ pdf_file.read(),
85
  dpi=150,
 
86
  fmt="png",
87
  output_file=f"image",
88
  thread_count=5,
89
  poppler_path="./poppler-23.05.0/Library/bin/"
90
  )
91
+ for img in images:
92
+ buffer = io.BytesIO()
93
+ img.save(buffer, format='PNG')
94
+ img_bytes = buffer.getvalue()
95
+ # Encode the bytes to base64
96
+ img_base64 = base64.b64encode(img_bytes).decode()
97
+ image_list.append(img_base64)
98
+ # images = convert_from_path(
99
+ # f"./static/pdf/{randomUUID}.pdf",
100
+ # dpi=150,
101
+ # output_folder=f"./static/pages/{randomUUID}",
102
+ # fmt="png",
103
+ # output_file=f"image",
104
+ # thread_count=5,
105
+ # poppler_path="./poppler-23.05.0/Library/bin/"
106
+ # )
107
+ # for filename in os.listdir(f"./static/pages/{randomUUID}"):
108
+ # image_list.append(f"/static/pages/{randomUUID}/{filename}")
109
+ # image_Array = []
110
+ # for i in range(len(image_list)):
111
+ # image_Array.append(host_url + image_list[i])
112
+
113
+ OCRString = getAnswer(image_list)
114
  fields = documentData.fields
115
  properties = {}
116
  for field in fields: