Spaces:
Running
Running
Update api/views.py
Browse files- api/views.py +215 -215
api/views.py
CHANGED
@@ -1,215 +1,215 @@
|
|
1 |
-
from django.shortcuts import render, redirect
|
2 |
-
from django.http import HttpResponse, JsonResponse, StreamingHttpResponse
|
3 |
-
import requests
|
4 |
-
import uuid
|
5 |
-
import json
|
6 |
-
import os
|
7 |
-
from pdf2image import convert_from_path, convert_from_bytes
|
8 |
-
from django.views.decorators.csrf import csrf_exempt
|
9 |
-
from django.core.files.storage import FileSystemStorage
|
10 |
-
import threading
|
11 |
-
import random
|
12 |
-
import google.generativeai as genai
|
13 |
-
import google.ai.generativelanguage as glm
|
14 |
-
import os
|
15 |
-
from .models import UseCases, DocumentTypes
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
# host_url = "http://16.170.244.54"
|
20 |
-
host_url = "https://
|
21 |
-
googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
|
22 |
-
genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
|
23 |
-
|
24 |
-
|
25 |
-
def getAnswer(images):
|
26 |
-
url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
|
27 |
-
payload = {"requests": []}
|
28 |
-
for i in images:
|
29 |
-
temp = {
|
30 |
-
"image": {"source": {"imageUri": i}},
|
31 |
-
"features": [
|
32 |
-
{
|
33 |
-
"type": "DOCUMENT_TEXT_DETECTION",
|
34 |
-
"maxResults": 50,
|
35 |
-
"model": "builtin/latest",
|
36 |
-
}
|
37 |
-
],
|
38 |
-
}
|
39 |
-
payload["requests"].append(temp)
|
40 |
-
|
41 |
-
headers = {
|
42 |
-
"authority": "content-vision.googleapis.com",
|
43 |
-
"accept": "*/*",
|
44 |
-
"accept-language": "en-US,en;q=0.9,gu;q=0.8",
|
45 |
-
"content-type": "application/json",
|
46 |
-
"origin": "https://content-vision.googleapis.com",
|
47 |
-
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
48 |
-
"x-origin": "https://explorer.apis.google.com",
|
49 |
-
"x-requested-with": "XMLHttpRequest",
|
50 |
-
}
|
51 |
-
response = requests.request("POST", url, headers=headers, data=json.dumps(payload))
|
52 |
-
OCRString = ""
|
53 |
-
try:
|
54 |
-
for i in range(len(response.json()["responses"])):
|
55 |
-
OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
|
56 |
-
except Exception as e:
|
57 |
-
print(e)
|
58 |
-
return OCRString
|
59 |
-
|
60 |
-
|
61 |
-
@csrf_exempt
|
62 |
-
def dataExtract(request, link):
|
63 |
-
if request.method == "POST":
|
64 |
-
documentData = DocumentTypes.objects.filter(url=link).first()
|
65 |
-
pdf_file = request.FILES["pdf"]
|
66 |
-
randomUUID = str(uuid.uuid4())
|
67 |
-
fs = FileSystemStorage(location="static/pdf/")
|
68 |
-
filename = fs.save(f"{randomUUID}.pdf", pdf_file)
|
69 |
-
os.mkdir(f"./static/pages/{randomUUID}")
|
70 |
-
image_list = []
|
71 |
-
images = convert_from_path(
|
72 |
-
f"./static/pdf/{randomUUID}.pdf",
|
73 |
-
dpi=150,
|
74 |
-
output_folder=f"./static/pages/{randomUUID}",
|
75 |
-
fmt="png",
|
76 |
-
output_file=f"image",
|
77 |
-
thread_count=5,
|
78 |
-
poppler_path="./poppler-23.05.0/Library/bin/"
|
79 |
-
)
|
80 |
-
for filename in os.listdir(f"./static/pages/{randomUUID}"):
|
81 |
-
image_list.append(f"/static/pages/{randomUUID}/{filename}")
|
82 |
-
image_Array = []
|
83 |
-
for i in range(len(image_list)):
|
84 |
-
image_Array.append(host_url + image_list[i])
|
85 |
-
|
86 |
-
OCRString = getAnswer(image_Array)
|
87 |
-
with open(f"./OCR.txt", "w", encoding="utf-8") as f:
|
88 |
-
f.write(OCRString)
|
89 |
-
fields = documentData.fields
|
90 |
-
properties = {}
|
91 |
-
for field in fields:
|
92 |
-
properties[field] = {'type_': 'STRING'}
|
93 |
-
entityTool = {
|
94 |
-
'function_declarations': [
|
95 |
-
{
|
96 |
-
'name': 'entityTool',
|
97 |
-
'description': 'List of entities and value extracted from the text.',
|
98 |
-
'parameters': {
|
99 |
-
'type_': 'OBJECT',
|
100 |
-
'properties': properties,
|
101 |
-
'required': []
|
102 |
-
}
|
103 |
-
}
|
104 |
-
]
|
105 |
-
}
|
106 |
-
safety_settings = [
|
107 |
-
{
|
108 |
-
"category": "HARM_CATEGORY_HARASSMENT",
|
109 |
-
"threshold": "BLOCK_NONE"
|
110 |
-
},
|
111 |
-
{
|
112 |
-
"category": "HARM_CATEGORY_HATE_SPEECH",
|
113 |
-
"threshold": "BLOCK_NONE"
|
114 |
-
},
|
115 |
-
{
|
116 |
-
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
117 |
-
"threshold": "BLOCK_NONE"
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
121 |
-
"threshold": "BLOCK_NONE"
|
122 |
-
},
|
123 |
-
]
|
124 |
-
|
125 |
-
model = genai.GenerativeModel(model_name='gemini-1.5-pro-latest', tools=entityTool, safety_settings=safety_settings)
|
126 |
-
chat = model.start_chat()
|
127 |
-
response = chat.send_message('PDF Data : \n\n'+OCRString)
|
128 |
-
fc = response.candidates[0].content.parts[0].function_call
|
129 |
-
data = {}
|
130 |
-
if fc.name == "entityTool":
|
131 |
-
for field in fields:
|
132 |
-
try:
|
133 |
-
data[field] = fc.args[field]
|
134 |
-
except:
|
135 |
-
pass
|
136 |
-
|
137 |
-
print(data)
|
138 |
-
return HttpResponse(
|
139 |
-
json.dumps({"images": image_list, "data": data}), content_type="application/json"
|
140 |
-
)
|
141 |
-
else:
|
142 |
-
return HttpResponse("Error")
|
143 |
-
|
144 |
-
|
145 |
-
@csrf_exempt
|
146 |
-
def imageToText(request):
|
147 |
-
if request.method == "POST":
|
148 |
-
jsonData = json.loads(request.body)
|
149 |
-
imageArr = jsonData["images"]
|
150 |
-
for i in range(len(imageArr)):
|
151 |
-
imageArr[i] = host_url + imageArr[i]
|
152 |
-
imageArr = [imageArr[i: i + 10] for i in range(0, len(imageArr), 10)]
|
153 |
-
answers = []
|
154 |
-
text = []
|
155 |
-
box = []
|
156 |
-
for i in range(len(imageArr)):
|
157 |
-
textTemp, boxTemp = getAnswer(imageArr[i])
|
158 |
-
text.extend(textTemp)
|
159 |
-
box.extend(boxTemp)
|
160 |
-
return HttpResponse(
|
161 |
-
json.dumps({"text": text, "box": box}), content_type="application/json"
|
162 |
-
)
|
163 |
-
else:
|
164 |
-
return HttpResponse("Error")
|
165 |
-
|
166 |
-
|
167 |
-
def documentAIData(request):
|
168 |
-
usecases = UseCases.objects.all()
|
169 |
-
documentTypes = DocumentTypes.objects.all()
|
170 |
-
usecasesArr = []
|
171 |
-
documentTypesArr = []
|
172 |
-
for doc in documentTypes:
|
173 |
-
temp = {}
|
174 |
-
temp["img"] = doc.img
|
175 |
-
temp["name"] = doc.name
|
176 |
-
temp["url"] = doc.url
|
177 |
-
temp["usecases"] = []
|
178 |
-
for usecase in doc.usecases.all():
|
179 |
-
temp["usecases"].append(usecase.heading)
|
180 |
-
documentTypesArr.append(temp)
|
181 |
-
for usecase in usecases:
|
182 |
-
usecasesArr.append(usecase.heading)
|
183 |
-
return HttpResponse(json.dumps({"usecases": usecasesArr, "docTypes": documentTypesArr}), content_type="application/json")
|
184 |
-
|
185 |
-
|
186 |
-
def docPages(request, link):
|
187 |
-
documentData = DocumentTypes.objects.filter(url=link).first()
|
188 |
-
usecases = documentData.usecases.all()
|
189 |
-
data = {
|
190 |
-
"title": documentData.title,
|
191 |
-
"name": documentData.name,
|
192 |
-
"subtitle": documentData.subtitle,
|
193 |
-
"img": documentData.img,
|
194 |
-
"usecases": [],
|
195 |
-
"fields": documentData.fields,
|
196 |
-
"url": documentData.url,
|
197 |
-
}
|
198 |
-
for usecase in usecases:
|
199 |
-
data["usecases"].append({"heading": usecase.heading, "paragraph": usecase.paragraph})
|
200 |
-
|
201 |
-
return HttpResponse(json.dumps(data), content_type="application/json")
|
202 |
-
|
203 |
-
|
204 |
-
def edditor(request):
|
205 |
-
with open("./api/nanonetProducts2.json", "r") as f:
|
206 |
-
data = json.load(f)
|
207 |
-
documents = DocumentTypes.objects.all()
|
208 |
-
for doc in documents:
|
209 |
-
for entry in data:
|
210 |
-
if entry["name"] == doc.name:
|
211 |
-
doc.url = entry["link"]
|
212 |
-
doc.save()
|
213 |
-
print(doc.name, "Updated", entry["link"])
|
214 |
-
break
|
215 |
-
return HttpResponse("Hello World")
|
|
|
1 |
+
from django.shortcuts import render, redirect
|
2 |
+
from django.http import HttpResponse, JsonResponse, StreamingHttpResponse
|
3 |
+
import requests
|
4 |
+
import uuid
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
from pdf2image import convert_from_path, convert_from_bytes
|
8 |
+
from django.views.decorators.csrf import csrf_exempt
|
9 |
+
from django.core.files.storage import FileSystemStorage
|
10 |
+
import threading
|
11 |
+
import random
|
12 |
+
import google.generativeai as genai
|
13 |
+
import google.ai.generativelanguage as glm
|
14 |
+
import os
|
15 |
+
from .models import UseCases, DocumentTypes
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
# host_url = "http://16.170.244.54"
|
20 |
+
host_url = "https://thejagstudio-absoluteai.hf.space/"
|
21 |
+
googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
|
22 |
+
genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
|
23 |
+
|
24 |
+
|
25 |
+
def getAnswer(images):
|
26 |
+
url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
|
27 |
+
payload = {"requests": []}
|
28 |
+
for i in images:
|
29 |
+
temp = {
|
30 |
+
"image": {"source": {"imageUri": i}},
|
31 |
+
"features": [
|
32 |
+
{
|
33 |
+
"type": "DOCUMENT_TEXT_DETECTION",
|
34 |
+
"maxResults": 50,
|
35 |
+
"model": "builtin/latest",
|
36 |
+
}
|
37 |
+
],
|
38 |
+
}
|
39 |
+
payload["requests"].append(temp)
|
40 |
+
|
41 |
+
headers = {
|
42 |
+
"authority": "content-vision.googleapis.com",
|
43 |
+
"accept": "*/*",
|
44 |
+
"accept-language": "en-US,en;q=0.9,gu;q=0.8",
|
45 |
+
"content-type": "application/json",
|
46 |
+
"origin": "https://content-vision.googleapis.com",
|
47 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
48 |
+
"x-origin": "https://explorer.apis.google.com",
|
49 |
+
"x-requested-with": "XMLHttpRequest",
|
50 |
+
}
|
51 |
+
response = requests.request("POST", url, headers=headers, data=json.dumps(payload))
|
52 |
+
OCRString = ""
|
53 |
+
try:
|
54 |
+
for i in range(len(response.json()["responses"])):
|
55 |
+
OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
|
56 |
+
except Exception as e:
|
57 |
+
print(e)
|
58 |
+
return OCRString
|
59 |
+
|
60 |
+
|
61 |
+
@csrf_exempt
|
62 |
+
def dataExtract(request, link):
|
63 |
+
if request.method == "POST":
|
64 |
+
documentData = DocumentTypes.objects.filter(url=link).first()
|
65 |
+
pdf_file = request.FILES["pdf"]
|
66 |
+
randomUUID = str(uuid.uuid4())
|
67 |
+
fs = FileSystemStorage(location="static/pdf/")
|
68 |
+
filename = fs.save(f"{randomUUID}.pdf", pdf_file)
|
69 |
+
os.mkdir(f"./static/pages/{randomUUID}")
|
70 |
+
image_list = []
|
71 |
+
images = convert_from_path(
|
72 |
+
f"./static/pdf/{randomUUID}.pdf",
|
73 |
+
dpi=150,
|
74 |
+
output_folder=f"./static/pages/{randomUUID}",
|
75 |
+
fmt="png",
|
76 |
+
output_file=f"image",
|
77 |
+
thread_count=5,
|
78 |
+
poppler_path="./poppler-23.05.0/Library/bin/"
|
79 |
+
)
|
80 |
+
for filename in os.listdir(f"./static/pages/{randomUUID}"):
|
81 |
+
image_list.append(f"/static/pages/{randomUUID}/{filename}")
|
82 |
+
image_Array = []
|
83 |
+
for i in range(len(image_list)):
|
84 |
+
image_Array.append(host_url + image_list[i])
|
85 |
+
|
86 |
+
OCRString = getAnswer(image_Array)
|
87 |
+
with open(f"./OCR.txt", "w", encoding="utf-8") as f:
|
88 |
+
f.write(OCRString)
|
89 |
+
fields = documentData.fields
|
90 |
+
properties = {}
|
91 |
+
for field in fields:
|
92 |
+
properties[field] = {'type_': 'STRING'}
|
93 |
+
entityTool = {
|
94 |
+
'function_declarations': [
|
95 |
+
{
|
96 |
+
'name': 'entityTool',
|
97 |
+
'description': 'List of entities and value extracted from the text.',
|
98 |
+
'parameters': {
|
99 |
+
'type_': 'OBJECT',
|
100 |
+
'properties': properties,
|
101 |
+
'required': []
|
102 |
+
}
|
103 |
+
}
|
104 |
+
]
|
105 |
+
}
|
106 |
+
safety_settings = [
|
107 |
+
{
|
108 |
+
"category": "HARM_CATEGORY_HARASSMENT",
|
109 |
+
"threshold": "BLOCK_NONE"
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
113 |
+
"threshold": "BLOCK_NONE"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
117 |
+
"threshold": "BLOCK_NONE"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
121 |
+
"threshold": "BLOCK_NONE"
|
122 |
+
},
|
123 |
+
]
|
124 |
+
|
125 |
+
model = genai.GenerativeModel(model_name='gemini-1.5-pro-latest', tools=entityTool, safety_settings=safety_settings)
|
126 |
+
chat = model.start_chat()
|
127 |
+
response = chat.send_message('PDF Data : \n\n'+OCRString)
|
128 |
+
fc = response.candidates[0].content.parts[0].function_call
|
129 |
+
data = {}
|
130 |
+
if fc.name == "entityTool":
|
131 |
+
for field in fields:
|
132 |
+
try:
|
133 |
+
data[field] = fc.args[field]
|
134 |
+
except:
|
135 |
+
pass
|
136 |
+
|
137 |
+
print(data)
|
138 |
+
return HttpResponse(
|
139 |
+
json.dumps({"images": image_list, "data": data}), content_type="application/json"
|
140 |
+
)
|
141 |
+
else:
|
142 |
+
return HttpResponse("Error")
|
143 |
+
|
144 |
+
|
145 |
+
@csrf_exempt
|
146 |
+
def imageToText(request):
|
147 |
+
if request.method == "POST":
|
148 |
+
jsonData = json.loads(request.body)
|
149 |
+
imageArr = jsonData["images"]
|
150 |
+
for i in range(len(imageArr)):
|
151 |
+
imageArr[i] = host_url + imageArr[i]
|
152 |
+
imageArr = [imageArr[i: i + 10] for i in range(0, len(imageArr), 10)]
|
153 |
+
answers = []
|
154 |
+
text = []
|
155 |
+
box = []
|
156 |
+
for i in range(len(imageArr)):
|
157 |
+
textTemp, boxTemp = getAnswer(imageArr[i])
|
158 |
+
text.extend(textTemp)
|
159 |
+
box.extend(boxTemp)
|
160 |
+
return HttpResponse(
|
161 |
+
json.dumps({"text": text, "box": box}), content_type="application/json"
|
162 |
+
)
|
163 |
+
else:
|
164 |
+
return HttpResponse("Error")
|
165 |
+
|
166 |
+
|
167 |
+
def documentAIData(request):
|
168 |
+
usecases = UseCases.objects.all()
|
169 |
+
documentTypes = DocumentTypes.objects.all()
|
170 |
+
usecasesArr = []
|
171 |
+
documentTypesArr = []
|
172 |
+
for doc in documentTypes:
|
173 |
+
temp = {}
|
174 |
+
temp["img"] = doc.img
|
175 |
+
temp["name"] = doc.name
|
176 |
+
temp["url"] = doc.url
|
177 |
+
temp["usecases"] = []
|
178 |
+
for usecase in doc.usecases.all():
|
179 |
+
temp["usecases"].append(usecase.heading)
|
180 |
+
documentTypesArr.append(temp)
|
181 |
+
for usecase in usecases:
|
182 |
+
usecasesArr.append(usecase.heading)
|
183 |
+
return HttpResponse(json.dumps({"usecases": usecasesArr, "docTypes": documentTypesArr}), content_type="application/json")
|
184 |
+
|
185 |
+
|
186 |
+
def docPages(request, link):
|
187 |
+
documentData = DocumentTypes.objects.filter(url=link).first()
|
188 |
+
usecases = documentData.usecases.all()
|
189 |
+
data = {
|
190 |
+
"title": documentData.title,
|
191 |
+
"name": documentData.name,
|
192 |
+
"subtitle": documentData.subtitle,
|
193 |
+
"img": documentData.img,
|
194 |
+
"usecases": [],
|
195 |
+
"fields": documentData.fields,
|
196 |
+
"url": documentData.url,
|
197 |
+
}
|
198 |
+
for usecase in usecases:
|
199 |
+
data["usecases"].append({"heading": usecase.heading, "paragraph": usecase.paragraph})
|
200 |
+
|
201 |
+
return HttpResponse(json.dumps(data), content_type="application/json")
|
202 |
+
|
203 |
+
|
204 |
+
def edditor(request):
|
205 |
+
with open("./api/nanonetProducts2.json", "r") as f:
|
206 |
+
data = json.load(f)
|
207 |
+
documents = DocumentTypes.objects.all()
|
208 |
+
for doc in documents:
|
209 |
+
for entry in data:
|
210 |
+
if entry["name"] == doc.name:
|
211 |
+
doc.url = entry["link"]
|
212 |
+
doc.save()
|
213 |
+
print(doc.name, "Updated", entry["link"])
|
214 |
+
break
|
215 |
+
return HttpResponse("Hello World")
|