Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -2,6 +2,7 @@ import os , json
|
|
2 |
from flask import Flask, render_template
|
3 |
import threading
|
4 |
import time
|
|
|
5 |
import requests
|
6 |
|
7 |
from langchain_core.tools import Tool
|
@@ -9,8 +10,6 @@ from langchain_google_community import GoogleSearchAPIWrapper, search
|
|
9 |
from langchain_community.tools import DuckDuckGoSearchResults
|
10 |
|
11 |
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
12 |
-
|
13 |
-
|
14 |
|
15 |
|
16 |
API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
@@ -19,7 +18,6 @@ API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/al
|
|
19 |
API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
|
20 |
# API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
21 |
|
22 |
-
|
23 |
|
24 |
|
25 |
search = GoogleSearchAPIWrapper()
|
@@ -47,62 +45,175 @@ def server_one():
|
|
47 |
|
48 |
@app.route('/')
|
49 |
async def server_1():
|
50 |
-
# check
|
51 |
-
|
|
|
52 |
duck_results = []
|
53 |
all_results = []
|
|
|
54 |
try:
|
55 |
-
searchduck = DuckDuckGoSearchResults(output_format="list",
|
56 |
duck_results = searchduck.invoke(query_sentence)
|
57 |
-
|
58 |
-
|
59 |
except:
|
60 |
print("An exception occurred")
|
61 |
duck_results = []
|
62 |
-
|
63 |
-
|
|
|
64 |
|
65 |
tool = Tool(
|
66 |
name="google_search",
|
67 |
description="Search Google for recent results.",
|
68 |
func=search.run,
|
69 |
)
|
|
|
70 |
try:
|
71 |
google_results = search.results( query_sentence , 10 )
|
72 |
print("type(duck_results)")
|
73 |
print(type(duck_results))
|
74 |
print(type(all_results))
|
75 |
-
if type(google_results) == list :
|
76 |
-
all_results = all_results + google_results
|
77 |
except:
|
78 |
print("An exception occurred")
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
# get the snippet put into list
|
81 |
-
split_query_words = query_sentence.split(); important_keywords = []
|
82 |
for x in split_query_words:
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
86 |
important_keywords.append(x)
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
|
108 |
response0 = requests.post(API_URL0, headers=headers, json=payload)
|
@@ -133,7 +244,7 @@ async def server_1():
|
|
133 |
if varcontinue_similarity == 1 :
|
134 |
# call processing with 10 google search result or 15 search results
|
135 |
if len(all_results) == 10 :
|
136 |
-
result_processed =
|
137 |
if len(all_results) > 10 :
|
138 |
result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
|
139 |
# return all_results
|
@@ -154,8 +265,15 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
|
|
154 |
print("length")
|
155 |
# print(len(similarity_scores))
|
156 |
key_index = 0
|
157 |
-
#copy + loop to get index
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
print("the sorted0-3")
|
160 |
print(sorted0)
|
161 |
print(sorted1)
|
@@ -219,251 +337,86 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
|
|
219 |
print("sorted_with_index")
|
220 |
print(sorted3_with_index)
|
221 |
|
|
|
222 |
print("sorted0-3_with_index")
|
223 |
print(sorted0_with_index)
|
224 |
-
print(sorted1_with_index)
|
225 |
print(sorted2_with_index)
|
226 |
print(sorted3_with_index)
|
227 |
-
|
228 |
-
index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
|
229 |
-
# lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
|
230 |
-
# then combine the top 5 values from each list to get the top 3 of 4 llm
|
231 |
-
varcontinue = False
|
232 |
-
# we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
|
233 |
|
234 |
-
if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
|
235 |
-
# continue variable set to true
|
236 |
-
# indent this here # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
|
237 |
-
varcontinue = True
|
238 |
-
print("continue variable set to true")
|
239 |
-
if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
|
240 |
-
# continue variable set to true
|
241 |
-
# if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
|
242 |
-
varcontinue = True
|
243 |
-
print("continue variable set to true")
|
244 |
-
|
245 |
-
# check if llm 1 - 3 has minimum 3
|
246 |
-
|
247 |
-
if varcontinue == True:
|
248 |
-
if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
|
249 |
-
print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
|
250 |
-
print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
|
251 |
-
print("sorted0_with_index")
|
252 |
-
print(sorted0_with_index)
|
253 |
-
for x in sorted0_with_index :
|
254 |
-
index_sorted0.append(x)
|
255 |
-
remaining_padding = 5 - len(index_sorted0)
|
256 |
-
while remaining_padding > 0 :
|
257 |
-
remaining_padding= remaining_padding - 1
|
258 |
-
index_sorted0.append(index_sorted0[0])
|
259 |
-
|
260 |
-
if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
|
261 |
-
print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
|
262 |
-
print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
|
263 |
-
print("sorted1_with_index")
|
264 |
-
print(sorted1_with_index)
|
265 |
-
for x in sorted1_with_index :
|
266 |
-
index_sorted1.append(x)
|
267 |
-
remaining_padding = 5 - len(index_sorted1)
|
268 |
-
while remaining_padding > 0 :
|
269 |
-
remaining_padding= remaining_padding - 1
|
270 |
-
index_sorted1.append(index_sorted1[0])
|
271 |
-
|
272 |
-
if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
|
273 |
-
print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
|
274 |
-
print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
|
275 |
-
print("sorted2_with_index")
|
276 |
-
print(sorted2_with_index)
|
277 |
-
for x in sorted2_with_index :
|
278 |
-
index_sorted2.append(x)
|
279 |
-
remaining_padding = 5 - len(index_sorted2)
|
280 |
-
while remaining_padding > 0 :
|
281 |
-
remaining_padding= remaining_padding - 1
|
282 |
-
index_sorted2.append(index_sorted2[0])
|
283 |
-
|
284 |
-
if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
|
285 |
-
print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
|
286 |
-
print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
|
287 |
-
print("sorted3_with_index")
|
288 |
-
print(sorted3_with_index)
|
289 |
-
for x in sorted3_with_index :
|
290 |
-
index_sorted3.append(x)
|
291 |
-
remaining_padding = 5 - len(index_sorted3)
|
292 |
-
while remaining_padding > 0 :
|
293 |
-
remaining_padding= remaining_padding - 1
|
294 |
-
index_sorted3.append(index_sorted3[0])
|
295 |
-
|
296 |
-
print("index_sorted0-1")
|
297 |
-
print(index_sorted0)
|
298 |
-
print(index_sorted1)
|
299 |
-
print(index_sorted2)
|
300 |
-
print(index_sorted3)
|
301 |
-
|
302 |
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
-
|
311 |
-
|
312 |
-
print(
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
sorted0_with_index = []
|
326 |
-
for x in sorted0:
|
327 |
-
for y in actualscore0:
|
328 |
-
if x == y:
|
329 |
-
print("index of sorted0")
|
330 |
-
print(actualscore0.index(y))
|
331 |
-
if x > 0.90:
|
332 |
-
sorted0_with_index.append(actualscore0.index(y))
|
333 |
-
print("sorted_with_index")
|
334 |
-
print(sorted0_with_index)
|
335 |
-
print("sorted0_with_index")
|
336 |
-
print(sorted0_with_index)
|
337 |
-
sorted1_with_index = []
|
338 |
-
for x in sorted1:
|
339 |
-
for y in actualscore1:
|
340 |
-
if x == y:
|
341 |
-
print("index of sorted1")
|
342 |
-
print(actualscore1.index(y))
|
343 |
-
if y > 0.90:
|
344 |
-
sorted1_with_index.append(actualscore1.index(y))
|
345 |
-
print("sorted_with_index")
|
346 |
-
print(sorted1_with_index)
|
347 |
|
348 |
-
print("
|
349 |
-
print(
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
print(sorted2_with_index)
|
364 |
-
|
365 |
-
print("sorted2_with_index")
|
366 |
-
print(sorted2_with_index)
|
367 |
-
sorted3_with_index = []
|
368 |
-
print("b4 for x in sorted3:")
|
369 |
-
print("resp_list3:" + str(actualscore3))
|
370 |
-
for x in sorted3:
|
371 |
-
for y in actualscore3:
|
372 |
-
if x == y:
|
373 |
-
print("index of sorted3")
|
374 |
-
print(actualscore3.index(y))
|
375 |
-
if y > 0.90:
|
376 |
-
sorted3_with_index.append(actualscore3.index(y))
|
377 |
-
print("sorted_with_index")
|
378 |
-
print(sorted3_with_index)
|
379 |
-
|
380 |
-
print("sorted0-3_with_index")
|
381 |
-
print(sorted0_with_index)
|
382 |
-
print(sorted1_with_index)
|
383 |
-
print(sorted2_with_index)
|
384 |
-
print(sorted3_with_index)
|
385 |
-
|
386 |
-
index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
|
387 |
-
# lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
|
388 |
-
# then combine the top 5 values from each list to get the top 3 of 4 llm
|
389 |
-
varcontinue = False
|
390 |
-
# we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
|
391 |
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
|
398 |
-
# continue variable set to true
|
399 |
-
# if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
|
400 |
-
varcontinue = True
|
401 |
-
print("continue variable set to true")
|
402 |
-
|
403 |
-
# check if llm 1 - 3 has minimum 3
|
404 |
-
|
405 |
-
if varcontinue == True:
|
406 |
-
if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
|
407 |
-
print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
|
408 |
-
print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
|
409 |
-
print("sorted0_with_index")
|
410 |
-
print(sorted0_with_index)
|
411 |
-
for x in sorted0_with_index :
|
412 |
-
index_sorted0.append(x)
|
413 |
-
remaining_padding = 5 - len(index_sorted0)
|
414 |
-
while remaining_padding > 0 :
|
415 |
-
remaining_padding= remaining_padding - 1
|
416 |
-
index_sorted0.append(index_sorted0[0])
|
417 |
-
|
418 |
-
if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
|
419 |
-
print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
|
420 |
-
print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
|
421 |
-
print("sorted1_with_index")
|
422 |
-
print(sorted1_with_index)
|
423 |
-
for x in sorted1_with_index :
|
424 |
-
index_sorted1.append(x)
|
425 |
-
remaining_padding = 5 - len(index_sorted1)
|
426 |
-
while remaining_padding > 0 :
|
427 |
-
remaining_padding= remaining_padding - 1
|
428 |
-
index_sorted1.append(index_sorted1[0])
|
429 |
-
|
430 |
-
if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
|
431 |
-
print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
|
432 |
-
print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
|
433 |
-
print("sorted2_with_index")
|
434 |
-
print(sorted2_with_index)
|
435 |
-
for x in sorted2_with_index :
|
436 |
-
index_sorted2.append(x)
|
437 |
-
remaining_padding = 5 - len(index_sorted2)
|
438 |
-
while remaining_padding > 0 :
|
439 |
-
remaining_padding= remaining_padding - 1
|
440 |
-
index_sorted2.append(index_sorted2[0])
|
441 |
-
|
442 |
-
if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
|
443 |
-
print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
|
444 |
-
print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
|
445 |
-
print("sorted3_with_index")
|
446 |
-
print(sorted3_with_index)
|
447 |
-
for x in sorted3_with_index :
|
448 |
-
index_sorted3.append(x)
|
449 |
-
remaining_padding = 5 - len(index_sorted3)
|
450 |
-
while remaining_padding > 0 :
|
451 |
-
remaining_padding= remaining_padding - 1
|
452 |
-
index_sorted3.append(index_sorted3[0])
|
453 |
-
|
454 |
-
print("index_sorted0-1")
|
455 |
-
print(index_sorted0)
|
456 |
-
print(index_sorted1)
|
457 |
-
print(index_sorted2)
|
458 |
-
print(index_sorted3)
|
459 |
|
460 |
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
|
465 |
-
|
466 |
|
467 |
if __name__ == '__main__':
|
468 |
-
app.run(host='0.0.0.0', port=
|
469 |
|
|
|
2 |
from flask import Flask, render_template
|
3 |
import threading
|
4 |
import time
|
5 |
+
from pydantic.v1.utils import unique_list
|
6 |
import requests
|
7 |
|
8 |
from langchain_core.tools import Tool
|
|
|
10 |
from langchain_community.tools import DuckDuckGoSearchResults
|
11 |
|
12 |
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
|
|
|
|
13 |
|
14 |
|
15 |
API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
|
|
18 |
API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
|
19 |
# API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
20 |
|
|
|
21 |
|
22 |
|
23 |
search = GoogleSearchAPIWrapper()
|
|
|
45 |
|
46 |
@app.route('/')
|
47 |
async def server_1():
|
48 |
+
# TODO :: check html first then check similarity
|
49 |
+
# TODO :: check parts of snipp to pass in the processing func
|
50 |
+
query_sentence = "capital city of the Philippines"
|
51 |
duck_results = []
|
52 |
all_results = []
|
53 |
+
|
54 |
try:
|
55 |
+
searchduck = DuckDuckGoSearchResults(output_format="list", num_results=20)
|
56 |
duck_results = searchduck.invoke(query_sentence)
|
57 |
+
print("type of duck")
|
58 |
+
print(type(duck_results))
|
59 |
except:
|
60 |
print("An exception occurred")
|
61 |
duck_results = []
|
62 |
+
|
63 |
+
if type(duck_results) == list and len(duck_results) > 0 :
|
64 |
+
all_results = duck_results
|
65 |
|
66 |
tool = Tool(
|
67 |
name="google_search",
|
68 |
description="Search Google for recent results.",
|
69 |
func=search.run,
|
70 |
)
|
71 |
+
|
72 |
try:
|
73 |
google_results = search.results( query_sentence , 10 )
|
74 |
print("type(duck_results)")
|
75 |
print(type(duck_results))
|
76 |
print(type(all_results))
|
|
|
|
|
77 |
except:
|
78 |
print("An exception occurred")
|
79 |
+
|
80 |
+
if type(google_results) == list and len(google_results) > 0:
|
81 |
+
all_results = all_results + google_results
|
82 |
+
print("len of google and duck")
|
83 |
+
print(len(all_results))
|
84 |
+
print(len(google_results))
|
85 |
+
print(len(duck_results))
|
86 |
+
print("type of google")
|
87 |
+
print(type(google_results))
|
88 |
+
print(all_results)
|
89 |
+
all_snipps = []
|
90 |
+
new_results = []
|
91 |
# get the snippet put into list
|
92 |
+
split_query_words = query_sentence.split(); important_keywords = []; uppercased_keywords = [];
|
93 |
for x in split_query_words:
|
94 |
+
print(" x.isupper() ")
|
95 |
+
print(x)
|
96 |
+
print( x[0].isupper() )
|
97 |
+
if x[0].isupper() == True :
|
98 |
+
uppercased_keywords.append(x)
|
99 |
+
if ( len(x) > 3 ) & ( x[0].isupper() == False ):
|
100 |
important_keywords.append(x)
|
101 |
+
print("what is important and upper")
|
102 |
+
print(important_keywords)
|
103 |
+
print(uppercased_keywords)
|
104 |
+
snipp_score = 0
|
105 |
+
capitalized_score = 0
|
106 |
+
for x in all_results:
|
107 |
+
snipp_score = 0
|
108 |
+
capitalized_score = 0
|
109 |
+
for words in important_keywords:
|
110 |
+
print("The important words " )
|
111 |
+
print(words)
|
112 |
+
print("x[snippet].find(words)")
|
113 |
+
print(x["snippet"].find(words))
|
114 |
+
if x["snippet"].find(words) != -1 :
|
115 |
+
print("Found word")
|
116 |
+
snipp_score = snipp_score + 1
|
117 |
+
for words in uppercased_keywords:
|
118 |
+
print("The important words capitalized" )
|
119 |
+
print(words)
|
120 |
+
if x["snippet"].find(words) != -1 :
|
121 |
+
snipp_score = snipp_score + 1
|
122 |
+
capitalized_score = capitalized_score + 1
|
123 |
+
|
124 |
+
if ( snipp_score >= len(important_keywords) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ):
|
125 |
+
new_results.append(x)
|
126 |
+
continue
|
127 |
+
if ( (snipp_score <= len(important_keywords) and snipp_score >= 2 ) and (len(important_keywords) <= 4) ) and ( (capitalized_score <= len(uppercased_keywords) and capitalized_score >= 1) or ( len(uppercased_keywords) == 0 ) ):
|
128 |
+
new_results.append(x)
|
129 |
+
continue
|
130 |
+
if ( ( snipp_score <= len(important_keywords) and snipp_score >= 4 ) and ( len(important_keywords) >= 5 and len(important_keywords) <= 7 ) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ) :
|
131 |
+
new_results.append(x)
|
132 |
+
continue
|
133 |
+
else :
|
134 |
+
# skip the result
|
135 |
+
print("This is not added")
|
136 |
+
print(x["snippet"])
|
137 |
+
print("important keywords")
|
138 |
+
print(important_keywords)
|
139 |
+
print("capitalized_score")
|
140 |
+
print(capitalized_score)
|
141 |
+
print("snipp_score")
|
142 |
+
print(snipp_score)
|
143 |
+
|
144 |
+
print("these are new_results")
|
145 |
+
print("===============================")
|
146 |
+
|
147 |
+
print(new_results)
|
148 |
|
149 |
+
print("these are new_results")
|
150 |
+
print("===============================")
|
151 |
+
|
152 |
+
print( " len( new_results) ")
|
153 |
+
print( len( new_results) )
|
154 |
+
print("type of all_results")
|
155 |
+
# TODO :: check html first then check similarity
|
156 |
+
# TODO :: check parts of snipp to pass in the processing func
|
157 |
+
# TODO :: pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
|
158 |
+
|
159 |
+
n_results = {}
|
160 |
+
iter_x = 0
|
161 |
+
for x in new_results:
|
162 |
+
n_results[iter_x] = []
|
163 |
+
print("x[snippet]")
|
164 |
+
print(x["snippet"])
|
165 |
+
for y in (x["snippet"]).split('.') :
|
166 |
+
score = 0 ; cap_score = 0 ;
|
167 |
+
for words in important_keywords :
|
168 |
+
if y.find(words) != -1 :
|
169 |
+
print(y)
|
170 |
+
print(score)
|
171 |
+
score = score + 1
|
172 |
+
for words in uppercased_keywords :
|
173 |
+
if y.find(words) != -1 :
|
174 |
+
print(y)
|
175 |
+
print(cap_score)
|
176 |
+
cap_score = cap_score + 1
|
177 |
+
if ( score == ( len(important_keywords) ) ) and ( cap_score >= ( len(uppercased_keywords) ) ):
|
178 |
+
n_results[iter_x].append(y)
|
179 |
+
if ( score >= ( len(important_keywords)-1 ) ) or ( cap_score >= len(uppercased_keywords) and (len(uppercased_keywords) > 0) ):
|
180 |
+
n_results[iter_x].append(y)
|
181 |
+
iter_x = iter_x + 1
|
182 |
+
print("iterator")
|
183 |
+
print(iter_x)
|
184 |
+
|
185 |
+
print("n_results")
|
186 |
+
print(n_results)
|
187 |
+
print(len(n_results))
|
188 |
+
print("nresults")
|
189 |
+
print(n_results[1])
|
190 |
+
# nresults={}
|
191 |
+
# new_results loop
|
192 |
+
# sentences loop
|
193 |
+
# score = 0 ; cap_score = 0
|
194 |
+
# words loop
|
195 |
+
# if found score ++
|
196 |
+
#
|
197 |
+
# capitalized loop
|
198 |
+
# if found cap_score ++
|
199 |
+
# if cap_score >= len words && if score >= len words
|
200 |
+
#
|
201 |
+
#
|
202 |
+
# nresults[i].append(x)
|
203 |
+
|
204 |
+
|
205 |
+
|
206 |
|
207 |
+
|
208 |
+
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
# TODO :: check parts of snipp
|
215 |
+
# TODO :: check parts of snipp
|
216 |
+
# TODO :: check parts of snipp
|
217 |
|
218 |
payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
|
219 |
response0 = requests.post(API_URL0, headers=headers, json=payload)
|
|
|
244 |
if varcontinue_similarity == 1 :
|
245 |
# call processing with 10 google search result or 15 search results
|
246 |
if len(all_results) == 10 :
|
247 |
+
result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
|
248 |
if len(all_results) > 10 :
|
249 |
result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
|
250 |
# return all_results
|
|
|
265 |
print("length")
|
266 |
# print(len(similarity_scores))
|
267 |
key_index = 0
|
268 |
+
# copy + loop to get index
|
269 |
+
print("actual scores")
|
270 |
+
print("actual scores")
|
271 |
+
print(actualscore0)
|
272 |
+
print(actualscore1)
|
273 |
+
print(actualscore2)
|
274 |
+
print(actualscore3)
|
275 |
+
|
276 |
+
print("the sorted0-3")
|
277 |
print("the sorted0-3")
|
278 |
print(sorted0)
|
279 |
print(sorted1)
|
|
|
337 |
print("sorted_with_index")
|
338 |
print(sorted3_with_index)
|
339 |
|
340 |
+
print("sorted0-3_with_index")
|
341 |
print("sorted0-3_with_index")
|
342 |
print(sorted0_with_index)
|
343 |
+
print(sorted1_with_index)
|
344 |
print(sorted2_with_index)
|
345 |
print(sorted3_with_index)
|
346 |
+
print("sorted0-3_with_index")
|
|
|
|
|
|
|
|
|
|
|
347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
+
# At this point the scores have been sorted also indexes are stored in lists
|
350 |
+
# At this point the scores have been sorted also indexes are stored in lists
|
351 |
+
this_unique_list = set( sorted0_with_index + sorted1_with_index + sorted2_with_index + sorted3_with_index )
|
352 |
+
webgraph_list = []
|
353 |
+
iterator_x = 0
|
354 |
+
for x in sorted0_with_index:
|
355 |
+
print(x)
|
356 |
+
if ( x in sorted3_with_index and x in sorted1_with_index and x in sorted2_with_index ) :
|
357 |
+
webgraph_list.append(x)
|
358 |
+
if ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted3_with_index ):
|
359 |
+
webgraph_list.append(x)
|
360 |
+
if (x in sorted1_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore0[iterator_x] > 0.96 :
|
361 |
+
webgraph_list.append(x)
|
362 |
+
iterator_x = iterator_x + 1
|
363 |
|
364 |
+
print("webgraph_list0")
|
365 |
+
print("webgraph_list0")
|
366 |
+
print(webgraph_list)
|
367 |
+
iterator_x = 0
|
368 |
+
for x in sorted1_with_index:
|
369 |
+
print(x)
|
370 |
+
if x in sorted3_with_index and x in sorted0_with_index and x in sorted2_with_index :
|
371 |
+
webgraph_list.append(x)
|
372 |
+
if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
|
373 |
+
webgraph_list.append(x)
|
374 |
+
if (x in sorted0_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore1[iterator_x] > 0.96 :
|
375 |
+
webgraph_list.append(x)
|
376 |
+
iterator_x = iterator_x + 1
|
377 |
|
378 |
+
print("webgraph_list1")
|
379 |
+
print("webgraph_list1")
|
380 |
+
print(webgraph_list)
|
381 |
+
|
382 |
+
|
383 |
+
iterator_x = 0
|
384 |
+
for x in sorted2_with_index:
|
385 |
+
print(x)
|
386 |
+
if x in sorted3_with_index and x in sorted0_with_index and x in sorted1_with_index :
|
387 |
+
webgraph_list.append(x)
|
388 |
+
if ( x in sorted0_with_index and x in sorted1_with_index ) or ( x in sorted3_with_index and x in sorted1_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
|
389 |
+
webgraph_list.append(x)
|
390 |
+
if (x in sorted0_with_index or x in sorted1_with_index or x in sorted3_with_index ) and actualscore2[iterator_x] > 0.96 :
|
391 |
+
webgraph_list.append(x)
|
392 |
+
iterator_x = iterator_x + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
|
394 |
+
print("webgraph_list2")
|
395 |
+
print("webgraph_list2")
|
396 |
+
print(webgraph_list)
|
397 |
+
|
398 |
+
|
399 |
+
iterator_x = 0
|
400 |
+
for x in sorted3_with_index:
|
401 |
+
print(x)
|
402 |
+
if x in sorted1_with_index and x in sorted0_with_index and x in sorted2_with_index :
|
403 |
+
webgraph_list.append(x)
|
404 |
+
if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted1_with_index ):
|
405 |
+
webgraph_list.append(x)
|
406 |
+
if (x in sorted0_with_index or x in sorted2_with_index or x in sorted1_with_index ) and actualscore3[iterator_x] > 0.96 :
|
407 |
+
webgraph_list.append(x)
|
408 |
+
iterator_x = iterator_x + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
+
print("webgraph_list3")
|
411 |
+
print("webgraph_list3")
|
412 |
+
print(webgraph_list)
|
413 |
+
print("webgraph_list")
|
414 |
+
print(webgraph_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
|
417 |
+
return str( list(set(webgraph_list ) ) )
|
418 |
+
|
|
|
|
|
|
|
419 |
|
420 |
if __name__ == '__main__':
|
421 |
+
app.run(host='0.0.0.0', port=8081)
|
422 |
|