aibmedia commited on
Commit
3d10977
·
verified ·
1 Parent(s): ce3d5df

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +281 -51
main.py CHANGED
@@ -1,24 +1,28 @@
1
- import os
2
  from flask import Flask, render_template
3
  import threading
4
- import asyncio
5
  import time
6
  import requests
7
 
 
 
 
8
 
9
- from openai import OpenAI
 
10
 
11
- # app = Flask(__name__)
12
- # client = OpenAI(
13
- # # This base_url points to the local Llamafile server running on port 8080
14
- # base_url="http://127.0.0.1:8080/v1",
15
- # api_key="sk-no-key-required"
16
- # )
17
 
 
 
 
 
 
18
 
 
19
 
20
 
21
- API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2"
 
22
  bearer = "Bearer " + os.getenv('TOKEN')
23
  headers = {"Authorization": bearer }
24
  print("headers")
@@ -26,7 +30,6 @@ print(headers)
26
 
27
  app = Flask(__name__)
28
 
29
-
30
  @app.route('/app')
31
  def server_app():
32
  llamafile = threading.Thread(target=threadserver)
@@ -35,57 +38,284 @@ def server_app():
35
  return 'llamafile.start()'
36
 
37
  @app.route('/findsimilarity')
38
- def server_one():
39
-
40
- sourcesim = "Results"
41
- s1 = "Results"
42
-
43
  return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
44
 
45
 
46
-
47
-
48
  @app.route('/')
49
  def server_1():
50
- payload = { "inputs": { "source_sentence": "That is a happy person", "sentences": [ "That is a happy dog", "That is a very happy person", "Today is a sunny day" ] } , }
51
- response = requests.post(API_URL, headers=headers, json=payload)
52
- time.sleep(6)
53
- return response.json()
54
-
55
- # response = os.system(" curl https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2 -X POST -d '{ "inputs": { "source_sentence": "That is a happy person", "sentences": [ "That is a happy dog", "That is a very happy person", "Today is a sunny day" , ] } , } ' -H 'Content-Type: application/json' -H 'Authorization: `+bearer+`' " )
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
57
 
58
- # @app.route('/chat', methods=['POST'])
59
- # def chat():
60
- # try:
61
- # user_message = request.json['message']
62
-
63
- # completion = client.chat.completions.create(
64
- # model="LLaMA_CPP",
65
- # messages=[
66
- # {"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."},
67
- # {"role": "user", "content": user_message}
68
- # ]
69
- # )
70
-
71
- # ai_response = completion.choices[0].message.content
72
- # ai_response = ai_response.replace('</s>', '').strip()
73
- # return jsonify({'response': ai_response})
74
- # except Exception as e:
75
- # print(f"Error: {str(e)}")
76
- # return jsonify({'response': f"Sorry, there was an error processing your request: {str(e)}"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- if __name__ == '__main__':
79
- app.run(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def threadserver():
82
  print('hi')
83
  os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
84
 
 
85
 
86
-
87
- async def query(data):
88
- response = await requests.post(API_URL, headers=headers, json=data)
89
- return response.json()
90
-
91
-
 
1
+ import os , json
2
  from flask import Flask, render_template
3
  import threading
 
4
  import time
5
  import requests
6
 
7
+ from langchain_core.tools import Tool
8
+ from langchain_google_community import GoogleSearchAPIWrapper, search
9
+ from langchain_community.tools import DuckDuckGoSearchResults
10
 
11
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
+
13
 
 
 
 
 
 
 
14
 
15
+ API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
16
+ API_URL1 = "https://api-inference.huggingface.co/models/sentence-transformers/all-mpnet-base-v2"
17
+ API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/all-roberta-large-v1"
18
+ API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
19
+ # API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
20
 
21
+
22
 
23
 
24
+ search = GoogleSearchAPIWrapper(k=20)
25
+
26
  bearer = "Bearer " + os.getenv('TOKEN')
27
  headers = {"Authorization": bearer }
28
  print("headers")
 
30
 
31
  app = Flask(__name__)
32
 
 
33
  @app.route('/app')
34
  def server_app():
35
  llamafile = threading.Thread(target=threadserver)
 
38
  return 'llamafile.start()'
39
 
40
  @app.route('/findsimilarity')
41
+ def server_one():
42
+ sourcesim = "Results"
43
+ s1 = "Results"
 
 
44
  return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
45
 
46
 
 
 
47
  @app.route('/')
48
  def server_1():
49
+
50
+ query_sentence = "Obama's first name"
51
+ searchduck = DuckDuckGoSearchResults(output_format="list" , num_results=20)
52
+ duck_results = searchduck.invoke(query_sentence)
53
+
54
+ tool = Tool(
55
+ name="google_search",
56
+ description="Search Google for recent results.",
57
+ func=search.run,
58
+ )
59
+
60
+ google_results = search.results( query_sentence , 10 )
61
+ print("type(duck_results)") ; print(type(duck_results)) ; print(type(google_results))
62
+
63
+ all_results = duck_results + google_results
64
 
65
+ # get the snippet put into list
66
+ split_query_words = query_sentence.split(); important_keywords = []
67
+ for x in split_query_words:
68
+ if x.isupper():
69
+ important_keywords.append(x)
70
 
71
+ ## get the longest word in sentence
72
+ # res = "" ; iteratorx = 0
73
+ # for word in split_query_words:
74
+ # if len(word) > len(res):
75
+ # res = word
76
+
77
+ # get google 20 items
78
+ # get user query in the url param
79
+ # truncate 130 characters
80
+
81
+ # if still no passed 2x4 matrix in log print increase chars by 200
82
+
83
+ # payload = { "inputs": { "source_sentence": "That is a green painted house", "sentences": ["The house paint is green", "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green", "That house looks green", "Today the house is clean " ] } , }
84
+ # payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976", "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines", "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. " ] } , }
85
+ # payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
86
+ payload = { "inputs": { "source_sentence": "Manila is the capital city of the Philippines", "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
87
+ response0 = requests.post(API_URL0, headers=headers, json=payload)
88
+ response1 = requests.post(API_URL1, headers=headers, json=payload)
89
+ response2 = requests.post(API_URL2, headers=headers, json=payload)
90
+ response3 = requests.post(API_URL3, headers=headers, json=payload)
91
+
92
+ print("type( response0.json() )")
93
+ print(type( response0.json() ))
94
+ print(type( response1.json() ))
95
+ print(type( response2.json() ))
96
+ print(type( response3.json() ))
97
+ if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
98
+ similarity_scores = response0.json() + response1.json() + response2.json() + response3.json()
99
+ else:
100
+ similarity_scores = "There's an error in llm similarity search retrieval"
101
+ return similarity_scores
102
+
103
+ time.sleep(4)
104
+ print(similarity_scores)
105
+ print(type(similarity_scores))
106
+ print("length")
107
+ print(len(similarity_scores))
108
+ key_index = 0
109
+ #copy + loop to get index
110
+
111
+ r_iterator = 0
112
+ resp_list0 = []
113
+ resp_list1 = []
114
+ resp_list2 = []
115
+ resp_list3 = []
116
+
117
+ for value_inlist in similarity_scores:
118
+ print(value_inlist)
119
+ print("index ")
120
+ print(key_index)
121
+ if key_index <= 8 :
122
+ resp_list0.append(value_inlist)
123
+ if key_index <= 17 and key_index > 8 :
124
+ resp_list1.append(value_inlist)
125
+ if key_index <= 26 and key_index > 17 :
126
+ resp_list2.append(value_inlist)
127
+ if key_index <= 35 and key_index > 26 :
128
+ resp_list3.append(value_inlist)
129
+ key_index = key_index + 1
130
+
131
+ print("The Response list 0 ")
132
+ print(resp_list0)
133
+ print("The Response list 1 ")
134
+ print(resp_list1)
135
+ print("The Response list 2 ")
136
+ print(resp_list2)
137
+ print("The Response list 3 ")
138
+ print(resp_list3)
139
+ # sorted 0 - 3 are sorted lists of score ; we must get their indices which is 0-8 that will be mapped to sentence index
140
+ sorted0 = sorted(resp_list0 , reverse=True)
141
+ sorted1 = sorted(resp_list1 , reverse=True)
142
+ sorted2 = sorted(resp_list2 , reverse=True)
143
+ sorted3 = sorted(resp_list3 , reverse=True)
144
+
145
+ print("the sorted0-3")
146
+ print(sorted0)
147
+ print(sorted1)
148
+ print(sorted2)
149
+ print(sorted3)
150
+ print("end the sorted0-3")
151
+ # Get the index of the sorted list for resp_list0
152
+
153
+ sorted0_with_index = []
154
+ for x in sorted0:
155
+ for y in resp_list0:
156
+ if x == y:
157
+ print("index of sorted0")
158
+ print(resp_list0.index(y))
159
+ if x > 0.90:
160
+ sorted0_with_index.append(resp_list0.index(y))
161
+ print("sorted_with_index")
162
+ print(sorted0_with_index)
163
+ print("sorted0_with_index")
164
+ print(sorted0_with_index)
165
+ sorted1_with_index = []
166
+ for x in sorted1:
167
+ for y in resp_list1:
168
+ if x == y:
169
+ print("index of sorted1")
170
+ print(resp_list1.index(y))
171
+ if y > 0.90:
172
+ sorted1_with_index.append(resp_list1.index(y))
173
+ print("sorted_with_index")
174
+ print(sorted1_with_index)
175
+
176
+ print("sorted1_with_index")
177
+ print(sorted1_with_index)
178
+
179
+ sorted2_with_index = []
180
+ print("b4 for x in sorted2:")
181
+ print("resp_list2:" + str(resp_list2))
182
+ print("sorted:" + str(sorted2))
183
+ for x in sorted2:
184
+ for y in resp_list2:
185
+ if x == y:
186
+ print("index of sorted2")
187
+ print(resp_list2.index(y))
188
+ if y > 0.90:
189
+ sorted2_with_index.append(resp_list2.index(y))
190
+ print("sorted_with_index")
191
+ print(sorted2_with_index)
192
+
193
+ print("sorted2_with_index")
194
+ print(sorted2_with_index)
195
+ sorted3_with_index = []
196
+ print("b4 for x in sorted3:")
197
+ print("resp_list3:" + str(resp_list3))
198
+ for x in sorted3:
199
+ for y in resp_list3:
200
+ if x == y:
201
+ print("index of sorted3")
202
+ print(resp_list3.index(y))
203
+ if y > 0.90:
204
+ sorted3_with_index.append(resp_list3.index(y))
205
+ print("sorted_with_index")
206
+ print(sorted3_with_index)
207
+
208
+ print("sorted0-3_with_index")
209
+ print(sorted0_with_index)
210
+ print(sorted1_with_index)
211
+ print(sorted2_with_index)
212
+ print(sorted3_with_index)
213
+
214
+ index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
215
+ # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
216
+ # then combine the top 5 values from each list to get the top 3 of 4 llm
217
+ varcontinue = False
218
+ # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
219
+
220
+ if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
221
+ # continue variable set to true
222
+ # indent this here # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
223
+ varcontinue = True
224
+ print("continue variable set to true")
225
+ if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
226
+ # continue variable set to true
227
+ # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
228
+ varcontinue = True
229
+ print("continue variable set to true")
230
+
231
+ # check if llm 1 - 3 has minimum 3
232
+
233
+ if varcontinue == True:
234
+ if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
235
+ print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
236
+ print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
237
+ print("sorted0_with_index")
238
+ print(sorted0_with_index)
239
+ for x in sorted0_with_index :
240
+ index_sorted0.append(x)
241
+ remaining_padding = 5 - len(index_sorted0)
242
+ while remaining_padding > 0 :
243
+ remaining_padding= remaining_padding - 1
244
+ index_sorted0.append(index_sorted0[0])
245
+
246
+ if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
247
+ print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
248
+ print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
249
+ print("sorted1_with_index")
250
+ print(sorted1_with_index)
251
+ for x in sorted1_with_index :
252
+ index_sorted1.append(x)
253
+ remaining_padding = 5 - len(index_sorted1)
254
+ while remaining_padding > 0 :
255
+ remaining_padding= remaining_padding - 1
256
+ index_sorted1.append(index_sorted1[0])
257
 
258
+ if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
259
+ print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
260
+ print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
261
+ print("sorted2_with_index")
262
+ print(sorted2_with_index)
263
+ for x in sorted2_with_index :
264
+ index_sorted2.append(x)
265
+ remaining_padding = 5 - len(index_sorted2)
266
+ while remaining_padding > 0 :
267
+ remaining_padding= remaining_padding - 1
268
+ index_sorted2.append(index_sorted2[0])
269
+
270
+ if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
271
+ print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
272
+ print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
273
+ print("sorted3_with_index")
274
+ print(sorted3_with_index)
275
+ for x in sorted3_with_index :
276
+ index_sorted3.append(x)
277
+ remaining_padding = 5 - len(index_sorted3)
278
+ while remaining_padding > 0 :
279
+ remaining_padding= remaining_padding - 1
280
+ index_sorted3.append(index_sorted3[0])
281
 
282
+ print("index_sorted0-1")
283
+ print(index_sorted0)
284
+ print(index_sorted1)
285
+ print(index_sorted2)
286
+ print(index_sorted3)
287
+
288
+
289
+ else:
290
+ print("No reliable similarity found by 4 llms")
291
+ # index_sorted0 = sorted0_with_index[:4]
292
+ # index_sorted1 = sorted1_with_index[:4]
293
+ # index_sorted2 = sorted2_with_index[:4]
294
+ # index_sorted3 = sorted3_with_index[:4]
295
+
296
+ # combined_indexes = index_sorted0 +index_sorted1 +index_sorted2 +index_sorted3
297
+ # uniq_list = []
298
+ # print("combined_indexes")
299
+ # print(combined_indexes)
300
+ # for item in combined_indexes:
301
+ # if item not in uniq_list:
302
+ # uniq_list.append(item)
303
+ # print("uniq_list")
304
+ # print(uniq_list)
305
+ # top_3_indexes = []
306
+ # get the top 3 from the combined_indexes
307
+ # the top 3 indexes must be above .78 similarity score
308
+ # the top 3 must have occured 4 times or more in combined_indexes
309
+
310
+
311
+ return all_results
312
+
313
+
314
  def threadserver():
315
  print('hi')
316
  os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
317
 
318
+
319
 
320
+ if __name__ == '__main__':
321
+ app.run(host='0.0.0.0', port=8080)