Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,24 +1,28 @@
|
|
1 |
-
import os
|
2 |
from flask import Flask, render_template
|
3 |
import threading
|
4 |
-
import asyncio
|
5 |
import time
|
6 |
import requests
|
7 |
|
|
|
|
|
|
|
8 |
|
9 |
-
from
|
|
|
10 |
|
11 |
-
# app = Flask(__name__)
|
12 |
-
# client = OpenAI(
|
13 |
-
# # This base_url points to the local Llamafile server running on port 8080
|
14 |
-
# base_url="http://127.0.0.1:8080/v1",
|
15 |
-
# api_key="sk-no-key-required"
|
16 |
-
# )
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
|
|
|
19 |
|
20 |
|
21 |
-
|
|
|
22 |
bearer = "Bearer " + os.getenv('TOKEN')
|
23 |
headers = {"Authorization": bearer }
|
24 |
print("headers")
|
@@ -26,7 +30,6 @@ print(headers)
|
|
26 |
|
27 |
app = Flask(__name__)
|
28 |
|
29 |
-
|
30 |
@app.route('/app')
|
31 |
def server_app():
|
32 |
llamafile = threading.Thread(target=threadserver)
|
@@ -35,57 +38,284 @@ def server_app():
|
|
35 |
return 'llamafile.start()'
|
36 |
|
37 |
@app.route('/findsimilarity')
|
38 |
-
def server_one():
|
39 |
-
|
40 |
-
|
41 |
-
s1 = "Results"
|
42 |
-
|
43 |
return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
|
44 |
|
45 |
|
46 |
-
|
47 |
-
|
48 |
@app.route('/')
|
49 |
def server_1():
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
#
|
60 |
-
#
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
#
|
65 |
-
#
|
66 |
-
#
|
67 |
-
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
#
|
72 |
-
#
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
if
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def threadserver():
|
82 |
print('hi')
|
83 |
os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
|
84 |
|
|
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
response = await requests.post(API_URL, headers=headers, json=data)
|
89 |
-
return response.json()
|
90 |
-
|
91 |
-
|
|
|
1 |
+
import os , json
|
2 |
from flask import Flask, render_template
|
3 |
import threading
|
|
|
4 |
import time
|
5 |
import requests
|
6 |
|
7 |
+
from langchain_core.tools import Tool
|
8 |
+
from langchain_google_community import GoogleSearchAPIWrapper, search
|
9 |
+
from langchain_community.tools import DuckDuckGoSearchResults
|
10 |
|
11 |
+
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
12 |
+
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
16 |
+
API_URL1 = "https://api-inference.huggingface.co/models/sentence-transformers/all-mpnet-base-v2"
|
17 |
+
API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/all-roberta-large-v1"
|
18 |
+
API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
|
19 |
+
# API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
20 |
|
21 |
+
|
22 |
|
23 |
|
24 |
+
search = GoogleSearchAPIWrapper(k=20)
|
25 |
+
|
26 |
bearer = "Bearer " + os.getenv('TOKEN')
|
27 |
headers = {"Authorization": bearer }
|
28 |
print("headers")
|
|
|
30 |
|
31 |
app = Flask(__name__)
|
32 |
|
|
|
33 |
@app.route('/app')
|
34 |
def server_app():
|
35 |
llamafile = threading.Thread(target=threadserver)
|
|
|
38 |
return 'llamafile.start()'
|
39 |
|
40 |
@app.route('/findsimilarity')
|
41 |
+
def server_one():
|
42 |
+
sourcesim = "Results"
|
43 |
+
s1 = "Results"
|
|
|
|
|
44 |
return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
|
45 |
|
46 |
|
|
|
|
|
47 |
@app.route('/')
|
48 |
def server_1():
|
49 |
+
|
50 |
+
query_sentence = "Obama's first name"
|
51 |
+
searchduck = DuckDuckGoSearchResults(output_format="list" , num_results=20)
|
52 |
+
duck_results = searchduck.invoke(query_sentence)
|
53 |
+
|
54 |
+
tool = Tool(
|
55 |
+
name="google_search",
|
56 |
+
description="Search Google for recent results.",
|
57 |
+
func=search.run,
|
58 |
+
)
|
59 |
+
|
60 |
+
google_results = search.results( query_sentence , 10 )
|
61 |
+
print("type(duck_results)") ; print(type(duck_results)) ; print(type(google_results))
|
62 |
+
|
63 |
+
all_results = duck_results + google_results
|
64 |
|
65 |
+
# get the snippet put into list
|
66 |
+
split_query_words = query_sentence.split(); important_keywords = []
|
67 |
+
for x in split_query_words:
|
68 |
+
if x.isupper():
|
69 |
+
important_keywords.append(x)
|
70 |
|
71 |
+
## get the longest word in sentence
|
72 |
+
# res = "" ; iteratorx = 0
|
73 |
+
# for word in split_query_words:
|
74 |
+
# if len(word) > len(res):
|
75 |
+
# res = word
|
76 |
+
|
77 |
+
# get google 20 items
|
78 |
+
# get user query in the url param
|
79 |
+
# truncate 130 characters
|
80 |
+
|
81 |
+
# if still no passed 2x4 matrix in log print increase chars by 200
|
82 |
+
|
83 |
+
# payload = { "inputs": { "source_sentence": "That is a green painted house", "sentences": ["The house paint is green", "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green", "That house looks green", "Today the house is clean " ] } , }
|
84 |
+
# payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976", "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines", "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. " ] } , }
|
85 |
+
# payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
|
86 |
+
payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
|
87 |
+
response0 = requests.post(API_URL0, headers=headers, json=payload)
|
88 |
+
response1 = requests.post(API_URL1, headers=headers, json=payload)
|
89 |
+
response2 = requests.post(API_URL2, headers=headers, json=payload)
|
90 |
+
response3 = requests.post(API_URL3, headers=headers, json=payload)
|
91 |
+
|
92 |
+
print("type( response0.json() )")
|
93 |
+
print(type( response0.json() ))
|
94 |
+
print(type( response1.json() ))
|
95 |
+
print(type( response2.json() ))
|
96 |
+
print(type( response3.json() ))
|
97 |
+
if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
|
98 |
+
similarity_scores = response0.json() + response1.json() + response2.json() + response3.json()
|
99 |
+
else:
|
100 |
+
similarity_scores = "There's an error in llm similarity search retrieval"
|
101 |
+
return similarity_scores
|
102 |
+
|
103 |
+
time.sleep(4)
|
104 |
+
print(similarity_scores)
|
105 |
+
print(type(similarity_scores))
|
106 |
+
print("length")
|
107 |
+
print(len(similarity_scores))
|
108 |
+
key_index = 0
|
109 |
+
#copy + loop to get index
|
110 |
+
|
111 |
+
r_iterator = 0
|
112 |
+
resp_list0 = []
|
113 |
+
resp_list1 = []
|
114 |
+
resp_list2 = []
|
115 |
+
resp_list3 = []
|
116 |
+
|
117 |
+
for value_inlist in similarity_scores:
|
118 |
+
print(value_inlist)
|
119 |
+
print("index ")
|
120 |
+
print(key_index)
|
121 |
+
if key_index <= 8 :
|
122 |
+
resp_list0.append(value_inlist)
|
123 |
+
if key_index <= 17 and key_index > 8 :
|
124 |
+
resp_list1.append(value_inlist)
|
125 |
+
if key_index <= 26 and key_index > 17 :
|
126 |
+
resp_list2.append(value_inlist)
|
127 |
+
if key_index <= 35 and key_index > 26 :
|
128 |
+
resp_list3.append(value_inlist)
|
129 |
+
key_index = key_index + 1
|
130 |
+
|
131 |
+
print("The Response list 0 ")
|
132 |
+
print(resp_list0)
|
133 |
+
print("The Response list 1 ")
|
134 |
+
print(resp_list1)
|
135 |
+
print("The Response list 2 ")
|
136 |
+
print(resp_list2)
|
137 |
+
print("The Response list 3 ")
|
138 |
+
print(resp_list3)
|
139 |
+
# sorted 0 - 3 are sorted lists of score ; we must get their indices which is 0-8 that will be mapped to sentence index
|
140 |
+
sorted0 = sorted(resp_list0 , reverse=True)
|
141 |
+
sorted1 = sorted(resp_list1 , reverse=True)
|
142 |
+
sorted2 = sorted(resp_list2 , reverse=True)
|
143 |
+
sorted3 = sorted(resp_list3 , reverse=True)
|
144 |
+
|
145 |
+
print("the sorted0-3")
|
146 |
+
print(sorted0)
|
147 |
+
print(sorted1)
|
148 |
+
print(sorted2)
|
149 |
+
print(sorted3)
|
150 |
+
print("end the sorted0-3")
|
151 |
+
# Get the index of the sorted list for resp_list0
|
152 |
+
|
153 |
+
sorted0_with_index = []
|
154 |
+
for x in sorted0:
|
155 |
+
for y in resp_list0:
|
156 |
+
if x == y:
|
157 |
+
print("index of sorted0")
|
158 |
+
print(resp_list0.index(y))
|
159 |
+
if x > 0.90:
|
160 |
+
sorted0_with_index.append(resp_list0.index(y))
|
161 |
+
print("sorted_with_index")
|
162 |
+
print(sorted0_with_index)
|
163 |
+
print("sorted0_with_index")
|
164 |
+
print(sorted0_with_index)
|
165 |
+
sorted1_with_index = []
|
166 |
+
for x in sorted1:
|
167 |
+
for y in resp_list1:
|
168 |
+
if x == y:
|
169 |
+
print("index of sorted1")
|
170 |
+
print(resp_list1.index(y))
|
171 |
+
if y > 0.90:
|
172 |
+
sorted1_with_index.append(resp_list1.index(y))
|
173 |
+
print("sorted_with_index")
|
174 |
+
print(sorted1_with_index)
|
175 |
+
|
176 |
+
print("sorted1_with_index")
|
177 |
+
print(sorted1_with_index)
|
178 |
+
|
179 |
+
sorted2_with_index = []
|
180 |
+
print("b4 for x in sorted2:")
|
181 |
+
print("resp_list2:" + str(resp_list2))
|
182 |
+
print("sorted:" + str(sorted2))
|
183 |
+
for x in sorted2:
|
184 |
+
for y in resp_list2:
|
185 |
+
if x == y:
|
186 |
+
print("index of sorted2")
|
187 |
+
print(resp_list2.index(y))
|
188 |
+
if y > 0.90:
|
189 |
+
sorted2_with_index.append(resp_list2.index(y))
|
190 |
+
print("sorted_with_index")
|
191 |
+
print(sorted2_with_index)
|
192 |
+
|
193 |
+
print("sorted2_with_index")
|
194 |
+
print(sorted2_with_index)
|
195 |
+
sorted3_with_index = []
|
196 |
+
print("b4 for x in sorted3:")
|
197 |
+
print("resp_list3:" + str(resp_list3))
|
198 |
+
for x in sorted3:
|
199 |
+
for y in resp_list3:
|
200 |
+
if x == y:
|
201 |
+
print("index of sorted3")
|
202 |
+
print(resp_list3.index(y))
|
203 |
+
if y > 0.90:
|
204 |
+
sorted3_with_index.append(resp_list3.index(y))
|
205 |
+
print("sorted_with_index")
|
206 |
+
print(sorted3_with_index)
|
207 |
+
|
208 |
+
print("sorted0-3_with_index")
|
209 |
+
print(sorted0_with_index)
|
210 |
+
print(sorted1_with_index)
|
211 |
+
print(sorted2_with_index)
|
212 |
+
print(sorted3_with_index)
|
213 |
+
|
214 |
+
index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
|
215 |
+
# lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
|
216 |
+
# then combine the top 5 values from each list to get the top 3 of 4 llm
|
217 |
+
varcontinue = False
|
218 |
+
# we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
|
219 |
+
|
220 |
+
if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
|
221 |
+
# continue variable set to true
|
222 |
+
# indent this here # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
|
223 |
+
varcontinue = True
|
224 |
+
print("continue variable set to true")
|
225 |
+
if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
|
226 |
+
# continue variable set to true
|
227 |
+
# if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
|
228 |
+
varcontinue = True
|
229 |
+
print("continue variable set to true")
|
230 |
+
|
231 |
+
# check if llm 1 - 3 has minimum 3
|
232 |
+
|
233 |
+
if varcontinue == True:
|
234 |
+
if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
|
235 |
+
print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
|
236 |
+
print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
|
237 |
+
print("sorted0_with_index")
|
238 |
+
print(sorted0_with_index)
|
239 |
+
for x in sorted0_with_index :
|
240 |
+
index_sorted0.append(x)
|
241 |
+
remaining_padding = 5 - len(index_sorted0)
|
242 |
+
while remaining_padding > 0 :
|
243 |
+
remaining_padding= remaining_padding - 1
|
244 |
+
index_sorted0.append(index_sorted0[0])
|
245 |
+
|
246 |
+
if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
|
247 |
+
print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
|
248 |
+
print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
|
249 |
+
print("sorted1_with_index")
|
250 |
+
print(sorted1_with_index)
|
251 |
+
for x in sorted1_with_index :
|
252 |
+
index_sorted1.append(x)
|
253 |
+
remaining_padding = 5 - len(index_sorted1)
|
254 |
+
while remaining_padding > 0 :
|
255 |
+
remaining_padding= remaining_padding - 1
|
256 |
+
index_sorted1.append(index_sorted1[0])
|
257 |
|
258 |
+
if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
|
259 |
+
print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
|
260 |
+
print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
|
261 |
+
print("sorted2_with_index")
|
262 |
+
print(sorted2_with_index)
|
263 |
+
for x in sorted2_with_index :
|
264 |
+
index_sorted2.append(x)
|
265 |
+
remaining_padding = 5 - len(index_sorted2)
|
266 |
+
while remaining_padding > 0 :
|
267 |
+
remaining_padding= remaining_padding - 1
|
268 |
+
index_sorted2.append(index_sorted2[0])
|
269 |
+
|
270 |
+
if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
|
271 |
+
print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
|
272 |
+
print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
|
273 |
+
print("sorted3_with_index")
|
274 |
+
print(sorted3_with_index)
|
275 |
+
for x in sorted3_with_index :
|
276 |
+
index_sorted3.append(x)
|
277 |
+
remaining_padding = 5 - len(index_sorted3)
|
278 |
+
while remaining_padding > 0 :
|
279 |
+
remaining_padding= remaining_padding - 1
|
280 |
+
index_sorted3.append(index_sorted3[0])
|
281 |
|
282 |
+
print("index_sorted0-1")
|
283 |
+
print(index_sorted0)
|
284 |
+
print(index_sorted1)
|
285 |
+
print(index_sorted2)
|
286 |
+
print(index_sorted3)
|
287 |
+
|
288 |
+
|
289 |
+
else:
|
290 |
+
print("No reliable similarity found by 4 llms")
|
291 |
+
# index_sorted0 = sorted0_with_index[:4]
|
292 |
+
# index_sorted1 = sorted1_with_index[:4]
|
293 |
+
# index_sorted2 = sorted2_with_index[:4]
|
294 |
+
# index_sorted3 = sorted3_with_index[:4]
|
295 |
+
|
296 |
+
# combined_indexes = index_sorted0 +index_sorted1 +index_sorted2 +index_sorted3
|
297 |
+
# uniq_list = []
|
298 |
+
# print("combined_indexes")
|
299 |
+
# print(combined_indexes)
|
300 |
+
# for item in combined_indexes:
|
301 |
+
# if item not in uniq_list:
|
302 |
+
# uniq_list.append(item)
|
303 |
+
# print("uniq_list")
|
304 |
+
# print(uniq_list)
|
305 |
+
# top_3_indexes = []
|
306 |
+
# get the top 3 from the combined_indexes
|
307 |
+
# the top 3 indexes must be above .78 similarity score
|
308 |
+
# the top 3 must have occured 4 times or more in combined_indexes
|
309 |
+
|
310 |
+
|
311 |
+
return all_results
|
312 |
+
|
313 |
+
|
314 |
def threadserver():
|
315 |
print('hi')
|
316 |
os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
|
317 |
|
318 |
+
|
319 |
|
320 |
+
if __name__ == '__main__':
|
321 |
+
app.run(host='0.0.0.0', port=8080)
|
|
|
|
|
|
|
|