Spaces:
Sleeping
Sleeping
File size: 16,875 Bytes
3d10977 d8edfd0 bd4cf9f c7cccfe 9500a11 b245886 3d10977 7b33127 3d10977 f2b36f2 5c1eb47 85498e2 3d10977 f2b36f2 7b33127 dec3f0d 3d10977 cdefac5 f1deeaa 0ff8527 ecd8d62 e2c1771 c302b97 2abc03b f2b36f2 a7d861a baa7056 3d10977 004a7b1 c7cccfe baa7056 db10537 9500a11 ea189c9 3df47dd 9500a11 ea189c9 9500a11 ea189c9 9500a11 ea189c9 3df47dd 9500a11 3d10977 9500a11 3df47dd 4ea57ed e4ecf7c 3df47dd 9500a11 27b4106 9500a11 3d10977 9500a11 3d10977 9500a11 75e83a9 9500a11 3df47dd 9500a11 75e83a9 016504f 9500a11 75e83a9 9500a11 75e83a9 9500a11 f443d89 016504f 75e83a9 9500a11 f443d89 9500a11 27b4106 3d10977 f443d89 9500a11 85498e2 9500a11 75e83a9 9500a11 016504f 9500a11 016504f 9500a11 75e83a9 9500a11 27b4106 9500a11 27b4106 eea4fac 27b4106 85498e2 9d94f56 85498e2 9b56a83 27b4106 85498e2 9500a11 3d10977 9500a11 dec3f0d 3d10977 15b3851 b1cff41 dec3f0d 5c1eb47 3d10977 dec3f0d 3d10977 dec3f0d eb98b33 5c1eb47 3d10977 5c1eb47 3d10977 016504f 5c1eb47 9500a11 5c1eb47 dec3f0d 5c1eb47 dec3f0d 5c1eb47 3d10977 dec3f0d 3d10977 9500a11 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 dec3f0d 3d10977 9500a11 3d10977 9500a11 3d10977 9500a11 3d10977 9500a11 5c1eb47 9500a11 5c1eb47 9500a11 5c1eb47 9500a11 5c1eb47 9500a11 5c1eb47 9500a11 3d10977 bb8f849 27b4106 bb8f849 b1efb37 bb8f849 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 |
import os , json
from flask import Flask, render_template
import threading
import time
from pydantic.v1.utils import unique_list
import requests
from langchain_core.tools import Tool
from langchain_google_community import GoogleSearchAPIWrapper, search
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
API_URL1 = "https://api-inference.huggingface.co/models/sentence-transformers/all-mpnet-base-v2"
API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/all-roberta-large-v1"
API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
# API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
search = GoogleSearchAPIWrapper()
bearer = "Bearer " + os.getenv('TOKEN')
headers = {"Authorization": bearer }
print("headers")
print(headers)
app = Flask(__name__)
@app.route('/app')
def server_app():
llamafile = threading.Thread(target=threadserver)
print('This /app will start the llamafile server on thread')
llamafile.start()
return 'llamafile.start()'
@app.route('/findsimilarity')
def server_one():
sourcesim = "Results"
s1 = "Results"
return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
@app.route('/')
async def server_1():
# TODO :: check html first then check similarity
# TODO :: check parts of snipp to pass in the processing func
query_sentence = "capital city of the Philippines"
duck_results = []
all_results = []
try:
searchduck = DuckDuckGoSearchResults(output_format="list", num_results=20)
duck_results = searchduck.invoke(query_sentence)
print("type of duck")
print(type(duck_results))
except:
print("An exception occurred")
duck_results = []
if type(duck_results) == list and len(duck_results) > 0 :
all_results = duck_results
tool = Tool(
name="google_search",
description="Search Google for recent results.",
func=search.run,
)
try:
google_results = search.results( query_sentence , 10 )
print("type(duck_results)")
print(type(duck_results))
print(type(all_results))
except:
print("An exception occurred")
if type(google_results) == list and len(google_results) > 0:
all_results = all_results + google_results
print("len of google and duck")
print(len(all_results))
print(len(google_results))
print(len(duck_results))
print("type of google")
print(type(google_results))
# print(all_results)
all_snipps = []
new_results = []
# get the snippet put into list
split_query_words = query_sentence.split(); important_keywords = []; uppercased_keywords = [];
for x in split_query_words:
print(" x.isupper() ")
# print(x)
# print( x[0].isupper() )
if x[0].isupper() == True :
uppercased_keywords.append(x)
if ( len(x) > 3 ) & ( x[0].isupper() == False ):
important_keywords.append(x)
print("what is important and upper")
print(important_keywords)
print(uppercased_keywords)
snipp_score = 0
capitalized_score = 0
for x in all_results:
snipp_score = 0
capitalized_score = 0
for words in important_keywords:
# print("The important words " )
# print(words)
# print("x[snippet].find(words)")
# print(x["snippet"].find(words))
if x["snippet"].find(words) != -1 :
# print("Found word")
snipp_score = snipp_score + 1
for words in uppercased_keywords:
# print("The important words capitalized" )
# print(words)
if x["snippet"].find(words) != -1 :
snipp_score = snipp_score + 1
capitalized_score = capitalized_score + 1
if ( snipp_score >= len(important_keywords) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ):
new_results.append(x)
continue
if ( (snipp_score <= len(important_keywords) and snipp_score >= 2 ) and (len(important_keywords) <= 4) ) and ( (capitalized_score <= len(uppercased_keywords) and capitalized_score >= 1) or ( len(uppercased_keywords) == 0 ) ):
new_results.append(x)
continue
if ( ( snipp_score <= len(important_keywords) and snipp_score >= 4 ) and ( len(important_keywords) >= 5 and len(important_keywords) <= 7 ) ) and ( ( capitalized_score <= len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 ) ) :
new_results.append(x)
continue
else :
# skip the result
print("This is not added")
# print(x["snippet"])
# print("important keywords")
# print(important_keywords)
# print("capitalized_score")
# print(capitalized_score)
# print("snipp_score")
# print(snipp_score)
# print("these are new_results")
# print("===============================")
# print(new_results)
# print("these are new_results")
# print("===============================")
print( " len( new_results) ")
print( len( new_results) )
print("type of all_results")
# TODO :: check html first then check similarity
# TODO :: check parts of snipp to pass in the processing func
# TODO :: pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
n_results = {}
iter_x = 0
for x in new_results:
n_results[iter_x] = []
print("x[snippet]")
# print(x["snippet"])
for y in (x["snippet"]).split('.') :
score = 0 ; cap_score = 0 ;
for words in important_keywords :
if y.find(words) != -1 :
# print(y)
# print(score)
score = score + 1
for words in uppercased_keywords :
if y.find(words) != -1 :
# print(y)
# print(cap_score)
cap_score = cap_score + 1
if ( score == ( len(important_keywords) ) ) and ( cap_score >= ( len(uppercased_keywords) ) ):
n_results[iter_x].append(y)
if ( score >= ( len(important_keywords)-1 ) ) or ( cap_score >= len(uppercased_keywords) and (len(uppercased_keywords) > 0) ):
n_results[iter_x].append(y)
iter_x = iter_x + 1
# print("iterator")
# print(iter_x)
print("n_results length")
print(len(n_results))
print("nresults")
sentences_comparison = []
iter_x = 0
for y in n_results :
print("y")
print(n_results[iter_x])
print(y)
# print(y)
for x in n_results[iter_x] :
sentences_comparison.append(x)
iter_x = iter_x + 1
print("sentences_comparison")
print(sentences_comparison)
# nresults={}
# new_results loop
# sentences loop
# score = 0 ; cap_score = 0
# words loop
# if found score ++
#
# capitalized loop
# if found cap_score ++
# if cap_score >= len words && if score >= len words
#
#
# nresults[i].append(x)
# TODO :: check parts of snipp
# TODO :: check parts of snipp
# TODO :: check parts of snipp
payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
response0 = requests.post(API_URL0, headers=headers, json=payload)
response1 = requests.post(API_URL1, headers=headers, json=payload)
response2 = requests.post(API_URL2, headers=headers, json=payload)
response3 = requests.post(API_URL3, headers=headers, json=payload)
varcontinue_similarity = 0
print("type( response0.json() )")
print(type( response0.json() ))
print(type( response1.json() ))
print(type( response2.json() ))
print(type( response3.json() ))
if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
similarity_scores = response0.json() + response1.json() + response2.json() + response3.json()
# If all list then pass to process func
sorted0 = sorted(response0.json() , reverse=True); sorted1 = sorted(response1.json() , reverse=True)
sorted2 = sorted(response2.json() , reverse=True); sorted3 = sorted(response3.json() , reverse=True)
varcontinue_similarity = 1
else:
similarity_scores = "There's an error in llm similarity search retrieval"
return similarity_scores
time.sleep(2)
result_processed = ""
## if response is all list
if varcontinue_similarity == 1 :
# call processing with 10 google search result or 15 search results
if len(all_results) == 10 :
result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
if len(all_results) > 10 :
result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json() )
# return all_results
return result_processed
def threadserver():
print('hi')
os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actualscore1, actualscore2, actualscore3):
# print(similarity_scores)
# print(type(similarity_scores))
print("length")
# print(len(similarity_scores))
key_index = 0
# copy + loop to get index
print("actual scores")
print("actual scores")
print(actualscore0)
print(actualscore1)
print(actualscore2)
print(actualscore3)
print("the sorted0-3")
print("the sorted0-3")
print(sorted0)
print(sorted1)
print(sorted2)
print(sorted3)
print("end the sorted0-3")
# Get the index of the sorted list for resp_list0
sorted0_with_index = []
for x in sorted0:
for y in actualscore0:
if x == y:
print("index of sorted0")
print(actualscore0.index(y))
if x > 0.90:
sorted0_with_index.append(actualscore0.index(y))
print("sorted_with_index")
print(sorted0_with_index)
print("sorted0_with_index")
print(sorted0_with_index)
sorted1_with_index = []
for x in sorted1:
for y in actualscore1:
if x == y:
print("index of sorted1")
print(actualscore1.index(y))
if y > 0.90:
sorted1_with_index.append(actualscore1.index(y))
print("sorted_with_index")
print(sorted1_with_index)
print("sorted1_with_index")
print(sorted1_with_index)
sorted2_with_index = []
print("b4 for x in sorted2:")
print("resp_list2:" + str(actualscore2))
print("sorted:" + str(sorted2))
for x in sorted2:
for y in actualscore2:
if x == y:
print("index of sorted2")
print(actualscore2.index(y))
if y > 0.90:
sorted2_with_index.append(actualscore2.index(y))
print("sorted_with_index")
print(sorted2_with_index)
print("sorted2_with_index")
print(sorted2_with_index)
sorted3_with_index = []
print("b4 for x in sorted3:")
print("resp_list3:" + str(actualscore3))
for x in sorted3:
for y in actualscore3:
if x == y:
print("index of sorted3")
print(actualscore3.index(y))
if y > 0.90:
sorted3_with_index.append(actualscore3.index(y))
print("sorted_with_index")
print(sorted3_with_index)
print("sorted0-3_with_index")
print("sorted0-3_with_index")
print(sorted0_with_index)
print(sorted1_with_index)
print(sorted2_with_index)
print(sorted3_with_index)
print("sorted0-3_with_index")
# At this point the scores have been sorted also indexes are stored in lists
# At this point the scores have been sorted also indexes are stored in lists
this_unique_list = set( sorted0_with_index + sorted1_with_index + sorted2_with_index + sorted3_with_index )
webgraph_list = []
iterator_x = 0
for x in sorted0_with_index:
print(x)
if ( x in sorted3_with_index and x in sorted1_with_index and x in sorted2_with_index ) :
webgraph_list.append(x)
if ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted3_with_index ):
webgraph_list.append(x)
if (x in sorted1_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore0[iterator_x] > 0.96 :
webgraph_list.append(x)
iterator_x = iterator_x + 1
print("webgraph_list0")
print("webgraph_list0")
print(webgraph_list)
iterator_x = 0
for x in sorted1_with_index:
print(x)
if x in sorted3_with_index and x in sorted0_with_index and x in sorted2_with_index :
webgraph_list.append(x)
if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
webgraph_list.append(x)
if (x in sorted0_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore1[iterator_x] > 0.96 :
webgraph_list.append(x)
iterator_x = iterator_x + 1
print("webgraph_list1")
print("webgraph_list1")
print(webgraph_list)
iterator_x = 0
for x in sorted2_with_index:
print(x)
if x in sorted3_with_index and x in sorted0_with_index and x in sorted1_with_index :
webgraph_list.append(x)
if ( x in sorted0_with_index and x in sorted1_with_index ) or ( x in sorted3_with_index and x in sorted1_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
webgraph_list.append(x)
if (x in sorted0_with_index or x in sorted1_with_index or x in sorted3_with_index ) and actualscore2[iterator_x] > 0.96 :
webgraph_list.append(x)
iterator_x = iterator_x + 1
print("webgraph_list2")
print("webgraph_list2")
print(webgraph_list)
iterator_x = 0
for x in sorted3_with_index:
print(x)
if x in sorted1_with_index and x in sorted0_with_index and x in sorted2_with_index :
webgraph_list.append(x)
if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted1_with_index ):
webgraph_list.append(x)
if (x in sorted0_with_index or x in sorted2_with_index or x in sorted1_with_index ) and actualscore3[iterator_x] > 0.96 :
webgraph_list.append(x)
iterator_x = iterator_x + 1
print("webgraph_list3")
print("webgraph_list3")
print(webgraph_list)
print("webgraph_list")
print(webgraph_list)
return str( list(set(webgraph_list ) ) )
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8081)
# server_app()
|