thejagstudio commited on
Commit
161f2e6
·
verified ·
1 Parent(s): 2e51161

Update api/views.py

Browse files
Files changed (1) hide show
  1. api/views.py +215 -215
api/views.py CHANGED
@@ -1,215 +1,215 @@
1
- from django.shortcuts import render, redirect
2
- from django.http import HttpResponse, JsonResponse, StreamingHttpResponse
3
- import requests
4
- import uuid
5
- import json
6
- import os
7
- from pdf2image import convert_from_path, convert_from_bytes
8
- from django.views.decorators.csrf import csrf_exempt
9
- from django.core.files.storage import FileSystemStorage
10
- import threading
11
- import random
12
- import google.generativeai as genai
13
- import google.ai.generativelanguage as glm
14
- import os
15
- from .models import UseCases, DocumentTypes
16
-
17
-
18
- os.environ["_BARD_API_KEY"] = "WwhMDr8qCuXIPs1pwR-lOgPsB51q86WuevPCH5VpcCcxVEEQqywHYShAfkE19lCRXUOQaQ."
19
- # host_url = "http://16.170.244.54"
20
- host_url = "https://qbh39rzw-8000.euw.devtunnels.ms"
21
- googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
22
- genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
23
-
24
-
25
- def getAnswer(images):
26
- url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
27
- payload = {"requests": []}
28
- for i in images:
29
- temp = {
30
- "image": {"source": {"imageUri": i}},
31
- "features": [
32
- {
33
- "type": "DOCUMENT_TEXT_DETECTION",
34
- "maxResults": 50,
35
- "model": "builtin/latest",
36
- }
37
- ],
38
- }
39
- payload["requests"].append(temp)
40
-
41
- headers = {
42
- "authority": "content-vision.googleapis.com",
43
- "accept": "*/*",
44
- "accept-language": "en-US,en;q=0.9,gu;q=0.8",
45
- "content-type": "application/json",
46
- "origin": "https://content-vision.googleapis.com",
47
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
48
- "x-origin": "https://explorer.apis.google.com",
49
- "x-requested-with": "XMLHttpRequest",
50
- }
51
- response = requests.request("POST", url, headers=headers, data=json.dumps(payload))
52
- OCRString = ""
53
- try:
54
- for i in range(len(response.json()["responses"])):
55
- OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
56
- except Exception as e:
57
- print(e)
58
- return OCRString
59
-
60
-
61
- @csrf_exempt
62
- def dataExtract(request, link):
63
- if request.method == "POST":
64
- documentData = DocumentTypes.objects.filter(url=link).first()
65
- pdf_file = request.FILES["pdf"]
66
- randomUUID = str(uuid.uuid4())
67
- fs = FileSystemStorage(location="static/pdf/")
68
- filename = fs.save(f"{randomUUID}.pdf", pdf_file)
69
- os.mkdir(f"./static/pages/{randomUUID}")
70
- image_list = []
71
- images = convert_from_path(
72
- f"./static/pdf/{randomUUID}.pdf",
73
- dpi=150,
74
- output_folder=f"./static/pages/{randomUUID}",
75
- fmt="png",
76
- output_file=f"image",
77
- thread_count=5,
78
- poppler_path="./poppler-23.05.0/Library/bin/"
79
- )
80
- for filename in os.listdir(f"./static/pages/{randomUUID}"):
81
- image_list.append(f"/static/pages/{randomUUID}/{filename}")
82
- image_Array = []
83
- for i in range(len(image_list)):
84
- image_Array.append(host_url + image_list[i])
85
-
86
- OCRString = getAnswer(image_Array)
87
- with open(f"./OCR.txt", "w", encoding="utf-8") as f:
88
- f.write(OCRString)
89
- fields = documentData.fields
90
- properties = {}
91
- for field in fields:
92
- properties[field] = {'type_': 'STRING'}
93
- entityTool = {
94
- 'function_declarations': [
95
- {
96
- 'name': 'entityTool',
97
- 'description': 'List of entities and value extracted from the text.',
98
- 'parameters': {
99
- 'type_': 'OBJECT',
100
- 'properties': properties,
101
- 'required': []
102
- }
103
- }
104
- ]
105
- }
106
- safety_settings = [
107
- {
108
- "category": "HARM_CATEGORY_HARASSMENT",
109
- "threshold": "BLOCK_NONE"
110
- },
111
- {
112
- "category": "HARM_CATEGORY_HATE_SPEECH",
113
- "threshold": "BLOCK_NONE"
114
- },
115
- {
116
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
117
- "threshold": "BLOCK_NONE"
118
- },
119
- {
120
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
121
- "threshold": "BLOCK_NONE"
122
- },
123
- ]
124
-
125
- model = genai.GenerativeModel(model_name='gemini-1.5-pro-latest', tools=entityTool, safety_settings=safety_settings)
126
- chat = model.start_chat()
127
- response = chat.send_message('PDF Data : \n\n'+OCRString)
128
- fc = response.candidates[0].content.parts[0].function_call
129
- data = {}
130
- if fc.name == "entityTool":
131
- for field in fields:
132
- try:
133
- data[field] = fc.args[field]
134
- except:
135
- pass
136
-
137
- print(data)
138
- return HttpResponse(
139
- json.dumps({"images": image_list, "data": data}), content_type="application/json"
140
- )
141
- else:
142
- return HttpResponse("Error")
143
-
144
-
145
- @csrf_exempt
146
- def imageToText(request):
147
- if request.method == "POST":
148
- jsonData = json.loads(request.body)
149
- imageArr = jsonData["images"]
150
- for i in range(len(imageArr)):
151
- imageArr[i] = host_url + imageArr[i]
152
- imageArr = [imageArr[i: i + 10] for i in range(0, len(imageArr), 10)]
153
- answers = []
154
- text = []
155
- box = []
156
- for i in range(len(imageArr)):
157
- textTemp, boxTemp = getAnswer(imageArr[i])
158
- text.extend(textTemp)
159
- box.extend(boxTemp)
160
- return HttpResponse(
161
- json.dumps({"text": text, "box": box}), content_type="application/json"
162
- )
163
- else:
164
- return HttpResponse("Error")
165
-
166
-
167
- def documentAIData(request):
168
- usecases = UseCases.objects.all()
169
- documentTypes = DocumentTypes.objects.all()
170
- usecasesArr = []
171
- documentTypesArr = []
172
- for doc in documentTypes:
173
- temp = {}
174
- temp["img"] = doc.img
175
- temp["name"] = doc.name
176
- temp["url"] = doc.url
177
- temp["usecases"] = []
178
- for usecase in doc.usecases.all():
179
- temp["usecases"].append(usecase.heading)
180
- documentTypesArr.append(temp)
181
- for usecase in usecases:
182
- usecasesArr.append(usecase.heading)
183
- return HttpResponse(json.dumps({"usecases": usecasesArr, "docTypes": documentTypesArr}), content_type="application/json")
184
-
185
-
186
- def docPages(request, link):
187
- documentData = DocumentTypes.objects.filter(url=link).first()
188
- usecases = documentData.usecases.all()
189
- data = {
190
- "title": documentData.title,
191
- "name": documentData.name,
192
- "subtitle": documentData.subtitle,
193
- "img": documentData.img,
194
- "usecases": [],
195
- "fields": documentData.fields,
196
- "url": documentData.url,
197
- }
198
- for usecase in usecases:
199
- data["usecases"].append({"heading": usecase.heading, "paragraph": usecase.paragraph})
200
-
201
- return HttpResponse(json.dumps(data), content_type="application/json")
202
-
203
-
204
- def edditor(request):
205
- with open("./api/nanonetProducts2.json", "r") as f:
206
- data = json.load(f)
207
- documents = DocumentTypes.objects.all()
208
- for doc in documents:
209
- for entry in data:
210
- if entry["name"] == doc.name:
211
- doc.url = entry["link"]
212
- doc.save()
213
- print(doc.name, "Updated", entry["link"])
214
- break
215
- return HttpResponse("Hello World")
 
1
+ from django.shortcuts import render, redirect
2
+ from django.http import HttpResponse, JsonResponse, StreamingHttpResponse
3
+ import requests
4
+ import uuid
5
+ import json
6
+ import os
7
+ from pdf2image import convert_from_path, convert_from_bytes
8
+ from django.views.decorators.csrf import csrf_exempt
9
+ from django.core.files.storage import FileSystemStorage
10
+ import threading
11
+ import random
12
+ import google.generativeai as genai
13
+ import google.ai.generativelanguage as glm
14
+ import os
15
+ from .models import UseCases, DocumentTypes
16
+
17
+
18
+
19
+ # host_url = "http://16.170.244.54"
20
+ host_url = "https://thejagstudio-absoluteai.hf.space/"
21
+ googleAPIKey = "AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
22
+ genai.configure(api_key='AIzaSyALFCivW9GP25mbxL3W7Fv6u7m2ZHVlC8w')
23
+
24
+
25
+ def getAnswer(images):
26
+ url = "https://content-vision.googleapis.com/v1/images:annotate?alt=json&key="+googleAPIKey
27
+ payload = {"requests": []}
28
+ for i in images:
29
+ temp = {
30
+ "image": {"source": {"imageUri": i}},
31
+ "features": [
32
+ {
33
+ "type": "DOCUMENT_TEXT_DETECTION",
34
+ "maxResults": 50,
35
+ "model": "builtin/latest",
36
+ }
37
+ ],
38
+ }
39
+ payload["requests"].append(temp)
40
+
41
+ headers = {
42
+ "authority": "content-vision.googleapis.com",
43
+ "accept": "*/*",
44
+ "accept-language": "en-US,en;q=0.9,gu;q=0.8",
45
+ "content-type": "application/json",
46
+ "origin": "https://content-vision.googleapis.com",
47
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
48
+ "x-origin": "https://explorer.apis.google.com",
49
+ "x-requested-with": "XMLHttpRequest",
50
+ }
51
+ response = requests.request("POST", url, headers=headers, data=json.dumps(payload))
52
+ OCRString = ""
53
+ try:
54
+ for i in range(len(response.json()["responses"])):
55
+ OCRString += "\n\n\n"+response.json()["responses"][i]["fullTextAnnotation"]["text"]
56
+ except Exception as e:
57
+ print(e)
58
+ return OCRString
59
+
60
+
61
+ @csrf_exempt
62
+ def dataExtract(request, link):
63
+ if request.method == "POST":
64
+ documentData = DocumentTypes.objects.filter(url=link).first()
65
+ pdf_file = request.FILES["pdf"]
66
+ randomUUID = str(uuid.uuid4())
67
+ fs = FileSystemStorage(location="static/pdf/")
68
+ filename = fs.save(f"{randomUUID}.pdf", pdf_file)
69
+ os.mkdir(f"./static/pages/{randomUUID}")
70
+ image_list = []
71
+ images = convert_from_path(
72
+ f"./static/pdf/{randomUUID}.pdf",
73
+ dpi=150,
74
+ output_folder=f"./static/pages/{randomUUID}",
75
+ fmt="png",
76
+ output_file=f"image",
77
+ thread_count=5,
78
+ poppler_path="./poppler-23.05.0/Library/bin/"
79
+ )
80
+ for filename in os.listdir(f"./static/pages/{randomUUID}"):
81
+ image_list.append(f"/static/pages/{randomUUID}/{filename}")
82
+ image_Array = []
83
+ for i in range(len(image_list)):
84
+ image_Array.append(host_url + image_list[i])
85
+
86
+ OCRString = getAnswer(image_Array)
87
+ with open(f"./OCR.txt", "w", encoding="utf-8") as f:
88
+ f.write(OCRString)
89
+ fields = documentData.fields
90
+ properties = {}
91
+ for field in fields:
92
+ properties[field] = {'type_': 'STRING'}
93
+ entityTool = {
94
+ 'function_declarations': [
95
+ {
96
+ 'name': 'entityTool',
97
+ 'description': 'List of entities and value extracted from the text.',
98
+ 'parameters': {
99
+ 'type_': 'OBJECT',
100
+ 'properties': properties,
101
+ 'required': []
102
+ }
103
+ }
104
+ ]
105
+ }
106
+ safety_settings = [
107
+ {
108
+ "category": "HARM_CATEGORY_HARASSMENT",
109
+ "threshold": "BLOCK_NONE"
110
+ },
111
+ {
112
+ "category": "HARM_CATEGORY_HATE_SPEECH",
113
+ "threshold": "BLOCK_NONE"
114
+ },
115
+ {
116
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
117
+ "threshold": "BLOCK_NONE"
118
+ },
119
+ {
120
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
121
+ "threshold": "BLOCK_NONE"
122
+ },
123
+ ]
124
+
125
+ model = genai.GenerativeModel(model_name='gemini-1.5-pro-latest', tools=entityTool, safety_settings=safety_settings)
126
+ chat = model.start_chat()
127
+ response = chat.send_message('PDF Data : \n\n'+OCRString)
128
+ fc = response.candidates[0].content.parts[0].function_call
129
+ data = {}
130
+ if fc.name == "entityTool":
131
+ for field in fields:
132
+ try:
133
+ data[field] = fc.args[field]
134
+ except:
135
+ pass
136
+
137
+ print(data)
138
+ return HttpResponse(
139
+ json.dumps({"images": image_list, "data": data}), content_type="application/json"
140
+ )
141
+ else:
142
+ return HttpResponse("Error")
143
+
144
+
145
+ @csrf_exempt
146
+ def imageToText(request):
147
+ if request.method == "POST":
148
+ jsonData = json.loads(request.body)
149
+ imageArr = jsonData["images"]
150
+ for i in range(len(imageArr)):
151
+ imageArr[i] = host_url + imageArr[i]
152
+ imageArr = [imageArr[i: i + 10] for i in range(0, len(imageArr), 10)]
153
+ answers = []
154
+ text = []
155
+ box = []
156
+ for i in range(len(imageArr)):
157
+ textTemp, boxTemp = getAnswer(imageArr[i])
158
+ text.extend(textTemp)
159
+ box.extend(boxTemp)
160
+ return HttpResponse(
161
+ json.dumps({"text": text, "box": box}), content_type="application/json"
162
+ )
163
+ else:
164
+ return HttpResponse("Error")
165
+
166
+
167
+ def documentAIData(request):
168
+ usecases = UseCases.objects.all()
169
+ documentTypes = DocumentTypes.objects.all()
170
+ usecasesArr = []
171
+ documentTypesArr = []
172
+ for doc in documentTypes:
173
+ temp = {}
174
+ temp["img"] = doc.img
175
+ temp["name"] = doc.name
176
+ temp["url"] = doc.url
177
+ temp["usecases"] = []
178
+ for usecase in doc.usecases.all():
179
+ temp["usecases"].append(usecase.heading)
180
+ documentTypesArr.append(temp)
181
+ for usecase in usecases:
182
+ usecasesArr.append(usecase.heading)
183
+ return HttpResponse(json.dumps({"usecases": usecasesArr, "docTypes": documentTypesArr}), content_type="application/json")
184
+
185
+
186
+ def docPages(request, link):
187
+ documentData = DocumentTypes.objects.filter(url=link).first()
188
+ usecases = documentData.usecases.all()
189
+ data = {
190
+ "title": documentData.title,
191
+ "name": documentData.name,
192
+ "subtitle": documentData.subtitle,
193
+ "img": documentData.img,
194
+ "usecases": [],
195
+ "fields": documentData.fields,
196
+ "url": documentData.url,
197
+ }
198
+ for usecase in usecases:
199
+ data["usecases"].append({"heading": usecase.heading, "paragraph": usecase.paragraph})
200
+
201
+ return HttpResponse(json.dumps(data), content_type="application/json")
202
+
203
+
204
+ def edditor(request):
205
+ with open("./api/nanonetProducts2.json", "r") as f:
206
+ data = json.load(f)
207
+ documents = DocumentTypes.objects.all()
208
+ for doc in documents:
209
+ for entry in data:
210
+ if entry["name"] == doc.name:
211
+ doc.url = entry["link"]
212
+ doc.save()
213
+ print(doc.name, "Updated", entry["link"])
214
+ break
215
+ return HttpResponse("Hello World")