yasirme commited on
Commit
106e2f6
·
verified ·
1 Parent(s): 971f0dc

Update rag/RAG.py

Browse files
Files changed (1) hide show
  1. rag/RAG.py +25 -17
rag/RAG.py CHANGED
@@ -27,32 +27,40 @@ class RAG:
27
  except Exception as e:
28
  raise ValueError(f"an error occured: {e}")
29
 
 
 
 
 
 
 
 
 
30
  def generate_embedding(self, text, task_type=None):
31
  try:
32
  if not task_type:
33
  task_type = self.TASK_TYPE
34
-
35
  chunks = self.split_text(text)
36
- batches = [chunks[i:i + self.MAX_BATCH_SIZE] for i in range(0, len(chunks), self.MAX_BATCH_SIZE)]
37
-
38
- def embed_batch(batch):
39
- response = client.models.embed_content(
40
- model=self.MODEL,
41
- contents=batch,
42
- config=types.EmbedContentConfig(task_type=task_type)
43
- )
44
- return [embedding.values for embedding in response.embeddings]
45
-
46
  embeddings = []
47
- with ThreadPoolExecutor(max_workers=100) as executor:
48
- futures = [executor.submit(embed_batch, batch) for batch in batches]
 
 
 
 
49
  for future in as_completed(futures):
50
- embeddings.extend(future.result())
51
-
 
52
  return {"embeddings": embeddings, "chunks": chunks}, 200
53
-
54
  except Exception as e:
55
- return {"an error occurred": str(e)}, 500
56
 
57
 
58
 
 
27
  except Exception as e:
28
  raise ValueError(f"an error occured: {e}")
29
 
30
+ def _embed_batch(self, chunk_batch, task_type):
31
+ response = client.models.embed_content(
32
+ model=self.MODEL,
33
+ contents=chunk_batch,
34
+ config=types.EmbedContentConfig(task_type=task_type)
35
+ )
36
+ return [embedding.values for embedding in response.embeddings]
37
+
38
  def generate_embedding(self, text, task_type=None):
39
  try:
40
  if not task_type:
41
  task_type = self.TASK_TYPE
42
+
43
  chunks = self.split_text(text)
44
+ batches = [
45
+ chunks[i:i + self.MAX_BATCH_SIZE]
46
+ for i in range(0, len(chunks), self.MAX_BATCH_SIZE)
47
+ ]
48
+
 
 
 
 
 
49
  embeddings = []
50
+ with ThreadPoolExecutor(max_workers=50) as executor:
51
+ futures = {
52
+ executor.submit(self._embed_batch, batch, task_type): batch
53
+ for batch in batches
54
+ }
55
+
56
  for future in as_completed(futures):
57
+ result = future.result()
58
+ embeddings.extend(result)
59
+
60
  return {"embeddings": embeddings, "chunks": chunks}, 200
61
+
62
  except Exception as e:
63
+ return {"an error occurred": f"{e}"}, 500
64
 
65
 
66