aibmedia commited on
Commit
9500a11
·
verified ·
1 Parent(s): eb98b33

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +223 -270
main.py CHANGED
@@ -2,6 +2,7 @@ import os , json
2
  from flask import Flask, render_template
3
  import threading
4
  import time
 
5
  import requests
6
 
7
  from langchain_core.tools import Tool
@@ -9,8 +10,6 @@ from langchain_google_community import GoogleSearchAPIWrapper, search
9
  from langchain_community.tools import DuckDuckGoSearchResults
10
 
11
  from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
-
13
-
14
 
15
 
16
  API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
@@ -19,7 +18,6 @@ API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/al
19
  API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
20
  # API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
21
 
22
-
23
 
24
 
25
  search = GoogleSearchAPIWrapper()
@@ -47,62 +45,175 @@ def server_one():
47
 
48
  @app.route('/')
49
  async def server_1():
50
- # check docs first then check similarity
51
- query_sentence = "Obama's first name"
 
52
  duck_results = []
53
  all_results = []
 
54
  try:
55
- searchduck = DuckDuckGoSearchResults(output_format="list", max_results=5, num_results=5)
56
  duck_results = searchduck.invoke(query_sentence)
57
- if type(duck_results) == list :
58
- all_results = duck_results
59
  except:
60
  print("An exception occurred")
61
  duck_results = []
62
-
63
-
 
64
 
65
  tool = Tool(
66
  name="google_search",
67
  description="Search Google for recent results.",
68
  func=search.run,
69
  )
 
70
  try:
71
  google_results = search.results( query_sentence , 10 )
72
  print("type(duck_results)")
73
  print(type(duck_results))
74
  print(type(all_results))
75
- if type(google_results) == list :
76
- all_results = all_results + google_results
77
  except:
78
  print("An exception occurred")
79
-
 
 
 
 
 
 
 
 
 
 
 
80
  # get the snippet put into list
81
- split_query_words = query_sentence.split(); important_keywords = []
82
  for x in split_query_words:
83
- if x.isupper():
84
- important_keywords.append(x)
85
- if len(x) > 3 & x.isupper() == False:
 
 
 
86
  important_keywords.append(x)
87
-
88
- # pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
89
- #
90
-
91
- # get the longest word in sentence
92
- # res = "" ; iteratorx = 0
93
- # for word in split_query_words:
94
- # if len(word) > len(res):
95
- # res = word
96
-
97
- # get google 20 items
98
- # get user query in the url param
99
- # truncate 130 characters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- # if still no passed 2x4 matrix in log print increase chars by 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # payload = { "inputs": { "source_sentence": "That is a green painted house", "sentences": ["The house paint is green", "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green", "That house looks green", "Today the house is clean " ] } , }
104
- # payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976", "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines", "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. " ] } , }
105
- # payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
 
 
 
 
 
 
 
106
 
107
  payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
108
  response0 = requests.post(API_URL0, headers=headers, json=payload)
@@ -133,7 +244,7 @@ async def server_1():
133
  if varcontinue_similarity == 1 :
134
  # call processing with 10 google search result or 15 search results
135
  if len(all_results) == 10 :
136
- result_processed = process_similarity_10(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
137
  if len(all_results) > 10 :
138
  result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
139
  # return all_results
@@ -154,8 +265,15 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
154
  print("length")
155
  # print(len(similarity_scores))
156
  key_index = 0
157
- #copy + loop to get index
158
-
 
 
 
 
 
 
 
159
  print("the sorted0-3")
160
  print(sorted0)
161
  print(sorted1)
@@ -219,251 +337,86 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
219
  print("sorted_with_index")
220
  print(sorted3_with_index)
221
 
 
222
  print("sorted0-3_with_index")
223
  print(sorted0_with_index)
224
- print(sorted1_with_index)
225
  print(sorted2_with_index)
226
  print(sorted3_with_index)
227
-
228
- index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
229
- # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
230
- # then combine the top 5 values from each list to get the top 3 of 4 llm
231
- varcontinue = False
232
- # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
233
 
234
- if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
235
- # continue variable set to true
236
- # indent this here # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
237
- varcontinue = True
238
- print("continue variable set to true")
239
- if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
240
- # continue variable set to true
241
- # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
242
- varcontinue = True
243
- print("continue variable set to true")
244
-
245
- # check if llm 1 - 3 has minimum 3
246
-
247
- if varcontinue == True:
248
- if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
249
- print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
250
- print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
251
- print("sorted0_with_index")
252
- print(sorted0_with_index)
253
- for x in sorted0_with_index :
254
- index_sorted0.append(x)
255
- remaining_padding = 5 - len(index_sorted0)
256
- while remaining_padding > 0 :
257
- remaining_padding= remaining_padding - 1
258
- index_sorted0.append(index_sorted0[0])
259
-
260
- if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
261
- print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
262
- print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
263
- print("sorted1_with_index")
264
- print(sorted1_with_index)
265
- for x in sorted1_with_index :
266
- index_sorted1.append(x)
267
- remaining_padding = 5 - len(index_sorted1)
268
- while remaining_padding > 0 :
269
- remaining_padding= remaining_padding - 1
270
- index_sorted1.append(index_sorted1[0])
271
-
272
- if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
273
- print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
274
- print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
275
- print("sorted2_with_index")
276
- print(sorted2_with_index)
277
- for x in sorted2_with_index :
278
- index_sorted2.append(x)
279
- remaining_padding = 5 - len(index_sorted2)
280
- while remaining_padding > 0 :
281
- remaining_padding= remaining_padding - 1
282
- index_sorted2.append(index_sorted2[0])
283
-
284
- if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
285
- print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
286
- print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
287
- print("sorted3_with_index")
288
- print(sorted3_with_index)
289
- for x in sorted3_with_index :
290
- index_sorted3.append(x)
291
- remaining_padding = 5 - len(index_sorted3)
292
- while remaining_padding > 0 :
293
- remaining_padding= remaining_padding - 1
294
- index_sorted3.append(index_sorted3[0])
295
-
296
- print("index_sorted0-1")
297
- print(index_sorted0)
298
- print(index_sorted1)
299
- print(index_sorted2)
300
- print(index_sorted3)
301
-
302
 
303
- else:
304
- print("No reliable similarity found by 4 llms")
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
- return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
307
-
308
- def process_similarity_10(sorted0, sorted1, sorted2, sorted3, actualscore0, actualscore1, actualscore2, actualscore3):
 
 
 
 
 
 
 
 
 
 
309
 
310
- # print(similarity_scores)
311
- # print(type(similarity_scores))
312
- print("length")
313
- # print(len(similarity_scores))
314
- key_index = 0
315
- #copy + loop to get index
316
-
317
- print("the sorted0-3")
318
- print(sorted0)
319
- print(sorted1)
320
- print(sorted2)
321
- print(sorted3)
322
- print("end the sorted0-3")
323
- # Get the index of the sorted list for resp_list0
324
-
325
- sorted0_with_index = []
326
- for x in sorted0:
327
- for y in actualscore0:
328
- if x == y:
329
- print("index of sorted0")
330
- print(actualscore0.index(y))
331
- if x > 0.90:
332
- sorted0_with_index.append(actualscore0.index(y))
333
- print("sorted_with_index")
334
- print(sorted0_with_index)
335
- print("sorted0_with_index")
336
- print(sorted0_with_index)
337
- sorted1_with_index = []
338
- for x in sorted1:
339
- for y in actualscore1:
340
- if x == y:
341
- print("index of sorted1")
342
- print(actualscore1.index(y))
343
- if y > 0.90:
344
- sorted1_with_index.append(actualscore1.index(y))
345
- print("sorted_with_index")
346
- print(sorted1_with_index)
347
 
348
- print("sorted1_with_index")
349
- print(sorted1_with_index)
350
-
351
- sorted2_with_index = []
352
- print("b4 for x in sorted2:")
353
- print("resp_list2:" + str(actualscore2))
354
- print("sorted:" + str(sorted2))
355
- for x in sorted2:
356
- for y in actualscore2:
357
- if x == y:
358
- print("index of sorted2")
359
- print(actualscore2.index(y))
360
- if y > 0.90:
361
- sorted2_with_index.append(actualscore2.index(y))
362
- print("sorted_with_index")
363
- print(sorted2_with_index)
364
-
365
- print("sorted2_with_index")
366
- print(sorted2_with_index)
367
- sorted3_with_index = []
368
- print("b4 for x in sorted3:")
369
- print("resp_list3:" + str(actualscore3))
370
- for x in sorted3:
371
- for y in actualscore3:
372
- if x == y:
373
- print("index of sorted3")
374
- print(actualscore3.index(y))
375
- if y > 0.90:
376
- sorted3_with_index.append(actualscore3.index(y))
377
- print("sorted_with_index")
378
- print(sorted3_with_index)
379
-
380
- print("sorted0-3_with_index")
381
- print(sorted0_with_index)
382
- print(sorted1_with_index)
383
- print(sorted2_with_index)
384
- print(sorted3_with_index)
385
-
386
- index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
387
- # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
388
- # then combine the top 5 values from each list to get the top 3 of 4 llm
389
- varcontinue = False
390
- # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
391
 
392
- if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
393
- # continue variable set to true
394
- # indent this here # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
395
- varcontinue = True
396
- print("continue variable set to true")
397
- if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
398
- # continue variable set to true
399
- # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
400
- varcontinue = True
401
- print("continue variable set to true")
402
-
403
- # check if llm 1 - 3 has minimum 3
404
-
405
- if varcontinue == True:
406
- if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
407
- print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
408
- print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
409
- print("sorted0_with_index")
410
- print(sorted0_with_index)
411
- for x in sorted0_with_index :
412
- index_sorted0.append(x)
413
- remaining_padding = 5 - len(index_sorted0)
414
- while remaining_padding > 0 :
415
- remaining_padding= remaining_padding - 1
416
- index_sorted0.append(index_sorted0[0])
417
-
418
- if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
419
- print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
420
- print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
421
- print("sorted1_with_index")
422
- print(sorted1_with_index)
423
- for x in sorted1_with_index :
424
- index_sorted1.append(x)
425
- remaining_padding = 5 - len(index_sorted1)
426
- while remaining_padding > 0 :
427
- remaining_padding= remaining_padding - 1
428
- index_sorted1.append(index_sorted1[0])
429
-
430
- if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
431
- print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
432
- print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
433
- print("sorted2_with_index")
434
- print(sorted2_with_index)
435
- for x in sorted2_with_index :
436
- index_sorted2.append(x)
437
- remaining_padding = 5 - len(index_sorted2)
438
- while remaining_padding > 0 :
439
- remaining_padding= remaining_padding - 1
440
- index_sorted2.append(index_sorted2[0])
441
-
442
- if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
443
- print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
444
- print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
445
- print("sorted3_with_index")
446
- print(sorted3_with_index)
447
- for x in sorted3_with_index :
448
- index_sorted3.append(x)
449
- remaining_padding = 5 - len(index_sorted3)
450
- while remaining_padding > 0 :
451
- remaining_padding= remaining_padding - 1
452
- index_sorted3.append(index_sorted3[0])
453
-
454
- print("index_sorted0-1")
455
- print(index_sorted0)
456
- print(index_sorted1)
457
- print(index_sorted2)
458
- print(index_sorted3)
459
 
460
 
461
- else:
462
- print("No reliable similarity found by 4 llms")
463
-
464
- return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
465
-
466
 
467
  if __name__ == '__main__':
468
- app.run(host='0.0.0.0', port=8080)
469
 
 
2
  from flask import Flask, render_template
3
  import threading
4
  import time
5
+ from pydantic.v1.utils import unique_list
6
  import requests
7
 
8
  from langchain_core.tools import Tool
 
10
  from langchain_community.tools import DuckDuckGoSearchResults
11
 
12
  from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 
 
13
 
14
 
15
  API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 
18
  API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
19
  # API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
20
 
 
21
 
22
 
23
  search = GoogleSearchAPIWrapper()
 
45
 
46
  @app.route('/')
47
  async def server_1():
48
+ # TODO :: check html first then check similarity
49
+ # TODO :: check parts of snipp to pass in the processing func
50
+ query_sentence = "capital city of the Philippines"
51
  duck_results = []
52
  all_results = []
53
+
54
  try:
55
+ searchduck = DuckDuckGoSearchResults(output_format="list", num_results=20)
56
  duck_results = searchduck.invoke(query_sentence)
57
+ print("type of duck")
58
+ print(type(duck_results))
59
  except:
60
  print("An exception occurred")
61
  duck_results = []
62
+
63
+ if type(duck_results) == list and len(duck_results) > 0 :
64
+ all_results = duck_results
65
 
66
  tool = Tool(
67
  name="google_search",
68
  description="Search Google for recent results.",
69
  func=search.run,
70
  )
71
+
72
  try:
73
  google_results = search.results( query_sentence , 10 )
74
  print("type(duck_results)")
75
  print(type(duck_results))
76
  print(type(all_results))
 
 
77
  except:
78
  print("An exception occurred")
79
+
80
+ if type(google_results) == list and len(google_results) > 0:
81
+ all_results = all_results + google_results
82
+ print("len of google and duck")
83
+ print(len(all_results))
84
+ print(len(google_results))
85
+ print(len(duck_results))
86
+ print("type of google")
87
+ print(type(google_results))
88
+ print(all_results)
89
+ all_snipps = []
90
+ new_results = []
91
  # get the snippet put into list
92
+ split_query_words = query_sentence.split(); important_keywords = []; uppercased_keywords = [];
93
  for x in split_query_words:
94
+ print(" x.isupper() ")
95
+ print(x)
96
+ print( x[0].isupper() )
97
+ if x[0].isupper() == True :
98
+ uppercased_keywords.append(x)
99
+ if ( len(x) > 3 ) & ( x[0].isupper() == False ):
100
  important_keywords.append(x)
101
+ print("what is important and upper")
102
+ print(important_keywords)
103
+ print(uppercased_keywords)
104
+ snipp_score = 0
105
+ capitalized_score = 0
106
+ for x in all_results:
107
+ snipp_score = 0
108
+ capitalized_score = 0
109
+ for words in important_keywords:
110
+ print("The important words " )
111
+ print(words)
112
+ print("x[snippet].find(words)")
113
+ print(x["snippet"].find(words))
114
+ if x["snippet"].find(words) != -1 :
115
+ print("Found word")
116
+ snipp_score = snipp_score + 1
117
+ for words in uppercased_keywords:
118
+ print("The important words capitalized" )
119
+ print(words)
120
+ if x["snippet"].find(words) != -1 :
121
+ snipp_score = snipp_score + 1
122
+ capitalized_score = capitalized_score + 1
123
+
124
+ if ( snipp_score >= len(important_keywords) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ):
125
+ new_results.append(x)
126
+ continue
127
+ if ( (snipp_score <= len(important_keywords) and snipp_score >= 2 ) and (len(important_keywords) <= 4) ) and ( (capitalized_score <= len(uppercased_keywords) and capitalized_score >= 1) or ( len(uppercased_keywords) == 0 ) ):
128
+ new_results.append(x)
129
+ continue
130
+ if ( ( snipp_score <= len(important_keywords) and snipp_score >= 4 ) and ( len(important_keywords) >= 5 and len(important_keywords) <= 7 ) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ) :
131
+ new_results.append(x)
132
+ continue
133
+ else :
134
+ # skip the result
135
+ print("This is not added")
136
+ print(x["snippet"])
137
+ print("important keywords")
138
+ print(important_keywords)
139
+ print("capitalized_score")
140
+ print(capitalized_score)
141
+ print("snipp_score")
142
+ print(snipp_score)
143
+
144
+ print("these are new_results")
145
+ print("===============================")
146
+
147
+ print(new_results)
148
 
149
+ print("these are new_results")
150
+ print("===============================")
151
+
152
+ print( " len( new_results) ")
153
+ print( len( new_results) )
154
+ print("type of all_results")
155
+ # TODO :: check html first then check similarity
156
+ # TODO :: check parts of snipp to pass in the processing func
157
+ # TODO :: pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
158
+
159
+ n_results = {}
160
+ iter_x = 0
161
+ for x in new_results:
162
+ n_results[iter_x] = []
163
+ print("x[snippet]")
164
+ print(x["snippet"])
165
+ for y in (x["snippet"]).split('.') :
166
+ score = 0 ; cap_score = 0 ;
167
+ for words in important_keywords :
168
+ if y.find(words) != -1 :
169
+ print(y)
170
+ print(score)
171
+ score = score + 1
172
+ for words in uppercased_keywords :
173
+ if y.find(words) != -1 :
174
+ print(y)
175
+ print(cap_score)
176
+ cap_score = cap_score + 1
177
+ if ( score == ( len(important_keywords) ) ) and ( cap_score >= ( len(uppercased_keywords) ) ):
178
+ n_results[iter_x].append(y)
179
+ if ( score >= ( len(important_keywords)-1 ) ) or ( cap_score >= len(uppercased_keywords) and (len(uppercased_keywords) > 0) ):
180
+ n_results[iter_x].append(y)
181
+ iter_x = iter_x + 1
182
+ print("iterator")
183
+ print(iter_x)
184
+
185
+ print("n_results")
186
+ print(n_results)
187
+ print(len(n_results))
188
+ print("nresults")
189
+ print(n_results[1])
190
+ # nresults={}
191
+ # new_results loop
192
+ # sentences loop
193
+ # score = 0 ; cap_score = 0
194
+ # words loop
195
+ # if found score ++
196
+ #
197
+ # capitalized loop
198
+ # if found cap_score ++
199
+ # if cap_score >= len words && if score >= len words
200
+ #
201
+ #
202
+ # nresults[i].append(x)
203
+
204
+
205
+
206
 
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+ # TODO :: check parts of snipp
215
+ # TODO :: check parts of snipp
216
+ # TODO :: check parts of snipp
217
 
218
  payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
219
  response0 = requests.post(API_URL0, headers=headers, json=payload)
 
244
  if varcontinue_similarity == 1 :
245
  # call processing with 10 google search result or 15 search results
246
  if len(all_results) == 10 :
247
+ result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
248
  if len(all_results) > 10 :
249
  result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
250
  # return all_results
 
265
  print("length")
266
  # print(len(similarity_scores))
267
  key_index = 0
268
+ # copy + loop to get index
269
+ print("actual scores")
270
+ print("actual scores")
271
+ print(actualscore0)
272
+ print(actualscore1)
273
+ print(actualscore2)
274
+ print(actualscore3)
275
+
276
+ print("the sorted0-3")
277
  print("the sorted0-3")
278
  print(sorted0)
279
  print(sorted1)
 
337
  print("sorted_with_index")
338
  print(sorted3_with_index)
339
 
340
+ print("sorted0-3_with_index")
341
  print("sorted0-3_with_index")
342
  print(sorted0_with_index)
343
+ print(sorted1_with_index)
344
  print(sorted2_with_index)
345
  print(sorted3_with_index)
346
+ print("sorted0-3_with_index")
 
 
 
 
 
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # At this point the scores have been sorted also indexes are stored in lists
350
+ # At this point the scores have been sorted also indexes are stored in lists
351
+ this_unique_list = set( sorted0_with_index + sorted1_with_index + sorted2_with_index + sorted3_with_index )
352
+ webgraph_list = []
353
+ iterator_x = 0
354
+ for x in sorted0_with_index:
355
+ print(x)
356
+ if ( x in sorted3_with_index and x in sorted1_with_index and x in sorted2_with_index ) :
357
+ webgraph_list.append(x)
358
+ if ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted3_with_index ):
359
+ webgraph_list.append(x)
360
+ if (x in sorted1_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore0[iterator_x] > 0.96 :
361
+ webgraph_list.append(x)
362
+ iterator_x = iterator_x + 1
363
 
364
+ print("webgraph_list0")
365
+ print("webgraph_list0")
366
+ print(webgraph_list)
367
+ iterator_x = 0
368
+ for x in sorted1_with_index:
369
+ print(x)
370
+ if x in sorted3_with_index and x in sorted0_with_index and x in sorted2_with_index :
371
+ webgraph_list.append(x)
372
+ if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
373
+ webgraph_list.append(x)
374
+ if (x in sorted0_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore1[iterator_x] > 0.96 :
375
+ webgraph_list.append(x)
376
+ iterator_x = iterator_x + 1
377
 
378
+ print("webgraph_list1")
379
+ print("webgraph_list1")
380
+ print(webgraph_list)
381
+
382
+
383
+ iterator_x = 0
384
+ for x in sorted2_with_index:
385
+ print(x)
386
+ if x in sorted3_with_index and x in sorted0_with_index and x in sorted1_with_index :
387
+ webgraph_list.append(x)
388
+ if ( x in sorted0_with_index and x in sorted1_with_index ) or ( x in sorted3_with_index and x in sorted1_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
389
+ webgraph_list.append(x)
390
+ if (x in sorted0_with_index or x in sorted1_with_index or x in sorted3_with_index ) and actualscore2[iterator_x] > 0.96 :
391
+ webgraph_list.append(x)
392
+ iterator_x = iterator_x + 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
+ print("webgraph_list2")
395
+ print("webgraph_list2")
396
+ print(webgraph_list)
397
+
398
+
399
+ iterator_x = 0
400
+ for x in sorted3_with_index:
401
+ print(x)
402
+ if x in sorted1_with_index and x in sorted0_with_index and x in sorted2_with_index :
403
+ webgraph_list.append(x)
404
+ if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted1_with_index ):
405
+ webgraph_list.append(x)
406
+ if (x in sorted0_with_index or x in sorted2_with_index or x in sorted1_with_index ) and actualscore3[iterator_x] > 0.96 :
407
+ webgraph_list.append(x)
408
+ iterator_x = iterator_x + 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
+ print("webgraph_list3")
411
+ print("webgraph_list3")
412
+ print(webgraph_list)
413
+ print("webgraph_list")
414
+ print(webgraph_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
 
417
+ return str( list(set(webgraph_list ) ) )
418
+
 
 
 
419
 
420
  if __name__ == '__main__':
421
+ app.run(host='0.0.0.0', port=8081)
422