Spaces:
Running
Running
Update api/views.py
Browse files- api/views.py +44 -19
api/views.py
CHANGED
@@ -11,13 +11,15 @@ import threading
|
|
11 |
import random
|
12 |
import google.generativeai as genai
|
13 |
import google.ai.generativelanguage as glm
|
|
|
|
|
14 |
import os
|
15 |
from .models import UseCases, DocumentTypes
|
16 |
|
17 |
|
18 |
-
|
19 |
# host_url = "http://16.170.244.54"
|
20 |
-
host_url = "https://
|
21 |
googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
|
22 |
genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
|
23 |
|
@@ -25,9 +27,19 @@ genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
|
|
25 |
def getAnswer(images):
|
26 |
url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
|
27 |
payload = {"requests": []}
|
28 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
temp = {
|
30 |
-
"image": {"
|
31 |
"features": [
|
32 |
{
|
33 |
"type": "DOCUMENT_TEXT_DETECTION",
|
@@ -54,7 +66,7 @@ def getAnswer(images):
|
|
54 |
for i in range(len(response.json()["responses"])):
|
55 |
OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
|
56 |
except Exception as e:
|
57 |
-
print(e)
|
58 |
return OCRString
|
59 |
|
60 |
|
@@ -65,27 +77,40 @@ def dataExtract(request, link):
|
|
65 |
pdf_file = request.FILES["pdf"]
|
66 |
randomUUID = str(uuid.uuid4())
|
67 |
fs = FileSystemStorage(location="static/pdf/")
|
68 |
-
filename = fs.save(f"{randomUUID}.pdf", pdf_file)
|
69 |
-
os.mkdir(f"./static/pages/{randomUUID}")
|
70 |
image_list = []
|
71 |
-
images =
|
72 |
-
|
73 |
dpi=150,
|
74 |
-
output_folder=f"./static/pages/{randomUUID}",
|
75 |
fmt="png",
|
76 |
output_file=f"image",
|
77 |
thread_count=5,
|
78 |
poppler_path="./poppler-23.05.0/Library/bin/"
|
79 |
)
|
80 |
-
for
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
fields = documentData.fields
|
90 |
properties = {}
|
91 |
for field in fields:
|
|
|
11 |
import random
|
12 |
import google.generativeai as genai
|
13 |
import google.ai.generativelanguage as glm
|
14 |
+
import io
|
15 |
+
import base64
|
16 |
import os
|
17 |
from .models import UseCases, DocumentTypes
|
18 |
|
19 |
|
20 |
+
os.environ["_BARD_API_KEY"] = "WwhMDr8qCuXIPs1pwR-lOgPsB51q86WuevPCH5VpcCcxVEEQqywHYShAfkE19lCRXUOQaQ."
|
21 |
# host_url = "http://16.170.244.54"
|
22 |
+
host_url = "https://qbh39rzw-8000.euw.devtunnels.ms"
|
23 |
googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
|
24 |
genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
|
25 |
|
|
|
27 |
def getAnswer(images):
|
28 |
url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
|
29 |
payload = {"requests": []}
|
30 |
+
for img in images:
|
31 |
+
# temp = {
|
32 |
+
# "image": {"source": {"imageUri": i}},
|
33 |
+
# "features": [
|
34 |
+
# {
|
35 |
+
# "type": "DOCUMENT_TEXT_DETECTION",
|
36 |
+
# "maxResults": 50,
|
37 |
+
# "model": "builtin/latest",
|
38 |
+
# }
|
39 |
+
# ],
|
40 |
+
# }
|
41 |
temp = {
|
42 |
+
"image": {"content": img},
|
43 |
"features": [
|
44 |
{
|
45 |
"type": "DOCUMENT_TEXT_DETECTION",
|
|
|
66 |
for i in range(len(response.json()["responses"])):
|
67 |
OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
|
68 |
except Exception as e:
|
69 |
+
print(e, response.text)
|
70 |
return OCRString
|
71 |
|
72 |
|
|
|
77 |
pdf_file = request.FILES["pdf"]
|
78 |
randomUUID = str(uuid.uuid4())
|
79 |
fs = FileSystemStorage(location="static/pdf/")
|
80 |
+
# filename = fs.save(f"{randomUUID}.pdf", pdf_file)
|
81 |
+
# os.mkdir(f"./static/pages/{randomUUID}")
|
82 |
image_list = []
|
83 |
+
images = convert_from_bytes(
|
84 |
+
pdf_file.read(),
|
85 |
dpi=150,
|
|
|
86 |
fmt="png",
|
87 |
output_file=f"image",
|
88 |
thread_count=5,
|
89 |
poppler_path="./poppler-23.05.0/Library/bin/"
|
90 |
)
|
91 |
+
for img in images:
|
92 |
+
buffer = io.BytesIO()
|
93 |
+
img.save(buffer, format='PNG')
|
94 |
+
img_bytes = buffer.getvalue()
|
95 |
+
# Encode the bytes to base64
|
96 |
+
img_base64 = base64.b64encode(img_bytes).decode()
|
97 |
+
image_list.append(img_base64)
|
98 |
+
# images = convert_from_path(
|
99 |
+
# f"./static/pdf/{randomUUID}.pdf",
|
100 |
+
# dpi=150,
|
101 |
+
# output_folder=f"./static/pages/{randomUUID}",
|
102 |
+
# fmt="png",
|
103 |
+
# output_file=f"image",
|
104 |
+
# thread_count=5,
|
105 |
+
# poppler_path="./poppler-23.05.0/Library/bin/"
|
106 |
+
# )
|
107 |
+
# for filename in os.listdir(f"./static/pages/{randomUUID}"):
|
108 |
+
# image_list.append(f"/static/pages/{randomUUID}/{filename}")
|
109 |
+
# image_Array = []
|
110 |
+
# for i in range(len(image_list)):
|
111 |
+
# image_Array.append(host_url + image_list[i])
|
112 |
+
|
113 |
+
OCRString = getAnswer(image_list)
|
114 |
fields = documentData.fields
|
115 |
properties = {}
|
116 |
for field in fields:
|