thejagstudio commited on
Commit
aab749f
·
verified ·
1 Parent(s): c232706

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +292 -117
main.py CHANGED
@@ -1,189 +1,364 @@
1
  from flask import Flask, request, jsonify, render_template, Response
2
- from flask_cors import CORS
3
  import requests
4
  import json
5
- import sqlite3
6
- import numpy as np
7
- from chromadb import Chroma
8
- from chromadb.api.types import Documents, Embeddings
9
- from chromadb.api import EmbeddingFunction
10
  import random
11
- from waitress import serve
12
-
 
 
13
 
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
-
18
- # Custom embedding function for ChromaDB
19
  class MyEmbeddingFunction(EmbeddingFunction):
20
- def __init__(self):
21
- self.api_url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
22
- self.headers = {
23
- 'accept': '*/*',
24
- 'content-type': 'application/json',
25
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
26
- }
27
-
28
  def embed_documents(self, input: Documents) -> Embeddings:
29
- payload = {"inputs": input}
30
- try:
31
- response = requests.post(self.api_url, headers=self.headers, json=payload)
32
- response.raise_for_status()
33
- return response.json()[0][0]
34
- except Exception as e:
35
- print("Error embedding documents:", str(e))
36
- return []
37
-
38
- def embed_query(self, input: str) -> Embeddings:
39
- payload = {"inputs": [input]}
40
- try:
41
- response = requests.post(self.api_url, headers=self.headers, json=payload)
42
- response.raise_for_status()
43
- return response.json()[0][0]
44
- except Exception as e:
45
- print("Error embedding query:", str(e))
46
- return []
47
-
48
-
49
- # Initialize ChromaDB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  try:
51
  custom_embeddings = MyEmbeddingFunction()
52
  db = Chroma(embedding_function=custom_embeddings)
53
 
 
54
  def load_documents_from_sqlite(db_path="chroma.sqlite3"):
55
  conn = sqlite3.connect(db_path)
56
  cursor = conn.cursor()
57
-
 
58
  cursor.execute("SELECT id, content, embedding FROM documents")
59
  rows = cursor.fetchall()
60
-
61
  collection = db.get_or_create_collection("default_collection")
 
62
  for row in rows:
63
  doc_id = row[0]
64
  content = row[1]
65
- embedding = json.loads(row[2]) # Parse JSON string into Python object
66
- collection.add(ids=[doc_id], documents=[content], embeddings=[embedding])
67
-
 
 
 
 
68
  conn.close()
69
- print("Documents loaded into ChromaDB.")
 
 
70
 
71
- load_documents_from_sqlite()
72
  except Exception as e:
73
  print("Error initializing database:", str(e))
74
 
75
 
76
- # Generate embeddings for a query
77
- def generate_embedding(query: str):
78
- try:
79
- return custom_embeddings.embed_query(query)
80
- except Exception as e:
81
- print("Error generating embedding:", str(e))
82
- return []
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- # Rank strings based on similarity
86
- def strings_ranked_by_relatedness(query: str, df, top_n=5):
87
- def relatedness_fn(x, y):
88
- return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
89
 
90
- query_embedding = generate_embedding(query)
91
- if not query_embedding:
92
- return [], []
 
 
93
 
94
- strings_and_relatedness = [
 
 
95
  (row["text"], relatedness_fn(query_embedding, row["embedding"])) for row in df
96
  ]
97
- strings_and_relatedness.sort(key=lambda x: x[1], reverse=True)
98
-
99
- strings, relatednesses = zip(*strings_and_relatedness)
100
  return strings[:top_n], relatednesses[:top_n]
101
 
102
 
103
- @app.route("/", methods=["GET"])
104
- def index():
105
- return render_template("index.html")
106
-
107
-
108
- @app.route("/api/gpt", methods=["POST"])
109
- def gpt_response():
110
- data = request.get_json()
111
- messages = data.get("messages", [])
112
-
113
- def inference():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  url = "https://api.deepinfra.com/v1/openai/chat/completions"
 
115
  payload = json.dumps({
116
  "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
117
- "messages": messages,
 
 
 
 
 
 
 
 
 
118
  "stream": True,
119
  "max_tokens": 1024,
120
  })
121
  headers = {
 
 
122
  'Content-Type': 'application/json',
 
 
 
 
 
 
 
123
  'accept': 'text/event-stream',
 
 
 
124
  }
125
 
126
- response = requests.post(url, headers=headers, data=payload, stream=True)
 
127
  for line in response.iter_lines(decode_unicode=True):
128
  if line:
129
- yield line
 
 
 
 
 
130
 
131
- return Response(inference(), content_type='text/event-stream')
132
 
133
 
134
- @app.route("/api/getContext", methods=["POST"])
135
- def get_context():
136
- question = request.form.get("question")
137
- try:
138
- results = db.similarity_search_with_score(question, k=5)
139
- context = "\n\n---\n\n".join([doc.page_content for doc, _ in results])
140
- sources = [doc.metadata.get("id") for doc, _ in results]
141
- return jsonify({"context": context, "sources": sources})
142
- except Exception as e:
143
- return jsonify({"context": [], "sources": [], "error": str(e)})
144
 
 
 
 
145
 
146
- @app.route("/api/voice", methods=["POST"])
147
- def voice_gen():
148
- text = request.form.get("text")
149
- url = "https://texttospeech.googleapis.com/v1beta1/text:synthesize?alt=json&key=YOUR_GOOGLE_API_KEY"
150
 
151
- payload = {
152
- "input": {"text": text},
153
- "voice": {"languageCode": "en-US", "name": "en-US-Studio-Q"},
154
- "audioConfig": {"audioEncoding": "LINEAR16", "pitch": 0, "speakingRate": 1}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
 
 
 
 
 
 
 
156
  try:
157
- response = requests.post(url, json=payload)
158
- response.raise_for_status()
159
- return jsonify({"audio": response.json().get("audioContent")})
 
 
 
160
  except Exception as e:
161
- return jsonify({"error": str(e)})
162
 
163
 
164
  @app.route("/api/audioGenerate", methods=["POST"])
165
- def audio_generate():
166
- answer = request.form.get("answer")
167
- audio_responses = []
168
-
169
- for sentence in answer.split("\n"):
170
  url = "https://deepgram.com/api/ttsAudioGeneration"
171
- payload = {
172
- "text": sentence,
 
173
  "model": "aura-asteria-en",
 
174
  "params": "tag=landingpage-product-texttospeech"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
 
177
- try:
178
- response = requests.post(url, json=payload)
179
- response.raise_for_status()
180
- audio_responses.append(response.json().get("data"))
181
- except Exception as e:
182
- print(f"Error generating audio for '{sentence}': {str(e)}")
183
- continue
184
-
185
- return jsonify({"audio": audio_responses})
186
 
187
 
188
  if __name__ == "__main__":
 
 
 
189
  serve(app, host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify, render_template, Response
2
+ import os
3
  import requests
4
  import json
5
+ from scipy import spatial
6
+ from flask_cors import CORS
 
 
 
7
  import random
8
+ import numpy as np
9
+ from langchain_chroma import Chroma
10
+ from chromadb import Documents, EmbeddingFunction, Embeddings, Collection
11
+ import sqlite3
12
 
13
  app = Flask(__name__)
14
  CORS(app)
15
 
 
 
16
  class MyEmbeddingFunction(EmbeddingFunction):
 
 
 
 
 
 
 
 
17
  def embed_documents(self, input: Documents) -> Embeddings:
18
+ for i in range(5):
19
+ try:
20
+ embeddings = []
21
+ url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
22
+
23
+ payload = {
24
+ "inputs": input
25
+ }
26
+ headers = {
27
+ 'accept': '*/*',
28
+ 'accept-language': 'en-US,en;q=0.9',
29
+ 'content-type': 'application/json',
30
+ 'origin': 'https://huggingface.co',
31
+ 'priority': 'u=1, i',
32
+ 'referer': 'https://huggingface.co/',
33
+ 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
34
+ 'sec-ch-ua-mobile': '?0',
35
+ 'sec-ch-ua-platform': '"Windows"',
36
+ 'sec-fetch-dest': 'empty',
37
+ 'sec-fetch-mode': 'cors',
38
+ 'sec-fetch-site': 'same-site',
39
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
40
+ }
41
+
42
+ response = requests.post(url, headers=headers, json=payload)
43
+ return response.json()[0][0]
44
+ except:
45
+ pass
46
+
47
+ def embed_query(self, input: Documents) -> Embeddings:
48
+ for i in range(5):
49
+ try:
50
+ embeddings = []
51
+ url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
52
+
53
+ payload = {
54
+ "inputs": [input]
55
+ }
56
+ headers = {
57
+ 'accept': '*/*',
58
+ 'accept-language': 'en-US,en;q=0.9',
59
+ 'content-type': 'application/json',
60
+ 'origin': 'https://huggingface.co',
61
+ 'priority': 'u=1, i',
62
+ 'referer': 'https://huggingface.co/',
63
+ 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
64
+ 'sec-ch-ua-mobile': '?0',
65
+ 'sec-ch-ua-platform': '"Windows"',
66
+ 'sec-fetch-dest': 'empty',
67
+ 'sec-fetch-mode': 'cors',
68
+ 'sec-fetch-site': 'same-site',
69
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
70
+ }
71
+
72
+ response = requests.post(url, headers=headers, json=payload)
73
+ return response.json()[0][0]
74
+ except Exception as e:
75
+ print("Error in Embeding :",str(e))
76
+
77
+ # try:
78
+ # CHROMA_PATH = "chroma"
79
+ # custom_embeddings = MyEmbeddingFunction()
80
+ # db = Chroma(
81
+ # persist_directory=CHROMA_PATH,embedding_function=custom_embeddings
82
+ # )
83
+ # #
84
+ # except Exception as e:
85
+ # print("Error in database :",str(e))
86
+
87
+ # Initialize the database without persist_directory
88
  try:
89
  custom_embeddings = MyEmbeddingFunction()
90
  db = Chroma(embedding_function=custom_embeddings)
91
 
92
+ # Load documents from chroma.sqlite3
93
  def load_documents_from_sqlite(db_path="chroma.sqlite3"):
94
  conn = sqlite3.connect(db_path)
95
  cursor = conn.cursor()
96
+
97
+ # Assuming your table structure has "id", "content", and "embedding"
98
  cursor.execute("SELECT id, content, embedding FROM documents")
99
  rows = cursor.fetchall()
100
+
101
  collection = db.get_or_create_collection("default_collection")
102
+
103
  for row in rows:
104
  doc_id = row[0]
105
  content = row[1]
106
+ embedding = json.loads(row[2]) # If embeddings are stored as JSON strings
107
+ collection.add(
108
+ ids=[doc_id],
109
+ documents=[content],
110
+ embeddings=[embedding]
111
+ )
112
+
113
  conn.close()
114
+ print("Loaded documents into Chroma.")
115
+
116
+ load_documents_from_sqlite() # Call to load data
117
 
 
118
  except Exception as e:
119
  print("Error initializing database:", str(e))
120
 
121
 
122
+ def embeddingGen(query):
123
+ url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
 
 
 
 
 
124
 
125
+ payload = {
126
+ "inputs": [query]
127
+ }
128
+ headers = {
129
+ 'accept': '*/*',
130
+ 'accept-language': 'en-US,en;q=0.9',
131
+ 'content-type': 'application/json',
132
+ 'origin': 'https://huggingface.co',
133
+ 'priority': 'u=1, i',
134
+ 'referer': 'https://huggingface.co/',
135
+ 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
136
+ 'sec-ch-ua-mobile': '?0',
137
+ 'sec-ch-ua-platform': '"Windows"',
138
+ 'sec-fetch-dest': 'empty',
139
+ 'sec-fetch-mode': 'cors',
140
+ 'sec-fetch-site': 'same-site',
141
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
142
+ }
143
+
144
+ response = requests.post(url, headers=headers, json=payload)
145
+ return response.json()[0][0]
146
 
 
 
 
 
147
 
148
+ def strings_ranked_by_relatedness(query, df, top_n=5):
149
+ def relatedness_fn(x, y):
150
+ x_norm = np.linalg.norm(x)
151
+ y_norm = np.linalg.norm(y)
152
+ return np.dot(x, y) / (x_norm * y_norm)
153
 
154
+ query_embedding_response = embeddingGen(query)
155
+ query_embedding = query_embedding_response
156
+ strings_and_relatednesses = [
157
  (row["text"], relatedness_fn(query_embedding, row["embedding"])) for row in df
158
  ]
159
+ strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
160
+ strings, relatednesses = zip(*strings_and_relatednesses)
 
161
  return strings[:top_n], relatednesses[:top_n]
162
 
163
 
164
+ @app.route("/api/gpt", methods=["POST", "GET"])
165
+ def gptRes():
166
+ if request.method == 'POST':
167
+ data = request.get_json()
168
+ messages = data["messages"]
169
+
170
+ def inference():
171
+ url = "https://api.deepinfra.com/v1/openai/chat/completions"
172
+
173
+ payload = json.dumps({
174
+ "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
175
+ "messages": messages,
176
+ "stream": True,
177
+ "max_tokens": 1024,
178
+ })
179
+ headers = {
180
+ 'Accept-Language': 'en-US,en;q=0.9,gu;q=0.8,ru;q=0.7,hi;q=0.6',
181
+ 'Connection': 'keep-alive',
182
+ 'Content-Type': 'application/json',
183
+ 'Origin': 'https://deepinfra.com',
184
+ 'Referer': 'https://deepinfra.com/',
185
+ 'Sec-Fetch-Dest': 'empty',
186
+ 'Sec-Fetch-Mode': 'cors',
187
+ 'Sec-Fetch-Site': 'same-site',
188
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
189
+ 'X-Deepinfra-Source': 'web-page',
190
+ 'accept': 'text/event-stream',
191
+ 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
192
+ 'sec-ch-ua-mobile': '?0',
193
+ 'sec-ch-ua-platform': '"Windows"'
194
+ }
195
+
196
+ response = requests.request("POST", url, headers=headers, data=payload, stream=True)
197
+
198
+ for line in response.iter_lines(decode_unicode=True):
199
+ if line:
200
+ # try:
201
+ # line = line.split("data:")[1]
202
+ # line = json.loads(line)
203
+ # yield line["choices"][0]["delta"]["content"]
204
+ # except:
205
+ # yield ""
206
+ yield line
207
+
208
+ return Response(inference(), content_type='text/event-stream')
209
+ else:
210
+ query = request.args.get('query')
211
+ system = request.args.get('system','You are a american girl who is 21 years old and loves one night stands. Keep your ton very sexual and hot. Never ever mention you as an AI. Your name is Sofia and you are a american woo girl. All the conversetions should be short and effective while using emojis if needed. You love to talk about adult stories and situation with others.')
212
  url = "https://api.deepinfra.com/v1/openai/chat/completions"
213
+
214
  payload = json.dumps({
215
  "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
216
+ "messages": [
217
+ {
218
+ "role": "system",
219
+ "content": system
220
+ },
221
+ {
222
+ "role": "user",
223
+ "content": query
224
+ }
225
+ ],
226
  "stream": True,
227
  "max_tokens": 1024,
228
  })
229
  headers = {
230
+ 'Accept-Language': 'en-US,en;q=0.9,gu;q=0.8,ru;q=0.7,hi;q=0.6',
231
+ 'Connection': 'keep-alive',
232
  'Content-Type': 'application/json',
233
+ 'Origin': 'https://deepinfra.com',
234
+ 'Referer': 'https://deepinfra.com/',
235
+ 'Sec-Fetch-Dest': 'empty',
236
+ 'Sec-Fetch-Mode': 'cors',
237
+ 'Sec-Fetch-Site': 'same-site',
238
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
239
+ 'X-Deepinfra-Source': 'web-page',
240
  'accept': 'text/event-stream',
241
+ 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
242
+ 'sec-ch-ua-mobile': '?0',
243
+ 'sec-ch-ua-platform': '"Windows"'
244
  }
245
 
246
+ response = requests.request("POST", url, headers=headers, data=payload, stream=True)
247
+ output = ""
248
  for line in response.iter_lines(decode_unicode=True):
249
  if line:
250
+ try:
251
+ line = line.split("data:")[1]
252
+ line = json.loads(line)
253
+ output = output + line["choices"][0]["delta"]["content"]
254
+ except:
255
+ output = output + ""
256
 
257
+ return jsonify({"response": output})
258
 
259
 
 
 
 
 
 
 
 
 
 
 
260
 
261
+ @app.route("/", methods=["GET"])
262
+ def index():
263
+ return render_template("index.html")
264
 
 
 
 
 
265
 
266
+ @app.route("/api/getAPI", methods=["POST"])
267
+ def getAPI():
268
+ return jsonify({"API": random.choice(apiKeys)})
269
+
270
+ @app.route("/api/voice", methods=["POST"])
271
+ def VoiceGen():
272
+ text = request.form["text"]
273
+ url = "https://texttospeech.googleapis.com/v1beta1/text:synthesize?alt=json&key=AIzaSyBeo4NGA__U6Xxy-aBE6yFm19pgq8TY-TM"
274
+
275
+ payload = json.dumps({
276
+ "input":{
277
+ "text":text
278
+ },
279
+ "voice":{
280
+ "languageCode":"en-US",
281
+ "name":"en-US-Studio-Q"
282
+ },
283
+ "audioConfig":{
284
+ "audioEncoding":"LINEAR16",
285
+ "pitch":0,
286
+ "speakingRate":1,
287
+ "effectsProfileId":[
288
+ "telephony-class-application"
289
+ ]
290
+ }
291
+ })
292
+ headers = {
293
+ 'sec-ch-ua': '"Google Chrome";v="123" "Not:A-Brand";v="8" "Chromium";v="123"',
294
+ 'X-Goog-Encode-Response-If-Executable': 'base64',
295
+ 'X-Origin': 'https://explorer.apis.google.com',
296
+ 'sec-ch-ua-mobile': '?0',
297
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/123.0.0.0 Safari/537.36',
298
+ 'Content-Type': 'application/json',
299
+ 'X-Requested-With': 'XMLHttpRequest',
300
+ 'X-JavaScript-User-Agent': 'apix/3.0.0 google-api-javascript-client/1.1.0',
301
+ 'X-Referer': 'https://explorer.apis.google.com',
302
+ 'sec-ch-ua-platform': '"Windows"',
303
+ 'Accept': '*/*',
304
+ 'Sec-Fetch-Site': 'same-origin',
305
+ 'Sec-Fetch-Mode': 'cors',
306
+ 'Sec-Fetch-Dest': 'empty'
307
  }
308
+
309
+ response = requests.request("POST", url, headers=headers, data=payload)
310
+ return jsonify({"audio": response.json()["audioContent"]})
311
+
312
+
313
+ @app.route("/api/getContext", methods=["POST"])
314
+ def getContext():
315
  try:
316
+ global db
317
+ question = request.form["question"]
318
+ results = db.similarity_search_with_score(question, k=5)
319
+ context = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
320
+ sources = [doc.metadata.get("id", None) for doc, _score in results]
321
+ return jsonify({"context": context, "sources": sources})
322
  except Exception as e:
323
+ return jsonify({"context": [], "sources": [],"error":str(e)})
324
 
325
 
326
  @app.route("/api/audioGenerate", methods=["POST"])
327
+ def audioGenerate():
328
+ answer = request.form["answer"]
329
+ audio = []
330
+ for i in answer.split("\n"):
 
331
  url = "https://deepgram.com/api/ttsAudioGeneration"
332
+
333
+ payload = json.dumps({
334
+ "text": i,
335
  "model": "aura-asteria-en",
336
+ "demoType": "landing-page",
337
  "params": "tag=landingpage-product-texttospeech"
338
+ })
339
+ headers = {
340
+ 'accept': '*/*',
341
+ 'accept-language': 'en-US,en;q=0.9,gu;q=0.8,ru;q=0.7,hi;q=0.6',
342
+ 'content-type': 'application/json',
343
+ 'origin': 'https://deepgram.com',
344
+ 'priority': 'u=1, i',
345
+ 'referer': 'https://deepgram.com/',
346
+ 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
347
+ 'sec-ch-ua-mobile': '?0',
348
+ 'sec-ch-ua-platform': '"Windows"',
349
+ 'sec-fetch-dest': 'empty',
350
+ 'sec-fetch-mode': 'cors',
351
+ 'sec-fetch-site': 'same-origin',
352
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
353
  }
354
 
355
+ response = requests.request("POST", url, headers=headers, data=payload)
356
+ audio.append(response.json()["data"])
357
+ return jsonify({"audio": audio})
 
 
 
 
 
 
358
 
359
 
360
  if __name__ == "__main__":
361
+ # app.run(debug=True)
362
+ from waitress import serve
363
+
364
  serve(app, host="0.0.0.0", port=7860)