Almaatla commited on
Commit
97f5451
·
verified ·
1 Parent(s): 3a1f579

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -39,6 +39,37 @@ def read_root(request: Request):
39
 
40
  @app.post("/embed")
41
  def embed_strings(request: EmbedRequest):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  new_documents = request.texts
43
  new_embeddings = model.encode(new_documents)
44
  index.add(np.array(new_embeddings))
 
39
 
40
  @app.post("/embed")
41
  def embed_strings(request: EmbedRequest):
42
+ new_documents = request.texts
43
+ batch_size = 20
44
+
45
+ # Split the new_documents list into batches of 10 documents
46
+ batches = [new_documents[i:i+batch_size] for i in range(0, len(new_documents), batch_size)]
47
+
48
+ # Perform embedding for each batch
49
+ new_embeddings = []
50
+ for batch in batches:
51
+ batch_embeddings = model.encode(batch)
52
+ new_embeddings.extend(batch_embeddings)
53
+ print(f"embeded {batch_size} docs")
54
+
55
+ # Handle remaining documents less than batch_size
56
+ remaining_docs = len(new_documents) % batch_size
57
+ print(f"embedind remaining {remaining_docs} docs")
58
+
59
+ if remaining_docs > 0:
60
+ remaining_batch = new_documents[-remaining_docs:]
61
+ remaining_embeddings = model.encode(remaining_batch)
62
+ new_embeddings.extend(remaining_embeddings)
63
+
64
+ index.add(np.array(new_embeddings))
65
+ new_size = index.ntotal
66
+ documents.extend(new_documents)
67
+ print(f"End embedding {len(new_documents)} docs, new DB size: {new_size}")
68
+ return {
69
+ "message": f"{len(new_documents)} new strings embedded and added to FAISS database. New size of the database: {new_size}"
70
+ }
71
+
72
+ def embed_strings_v0(request: EmbedRequest):
73
  new_documents = request.texts
74
  new_embeddings = model.encode(new_documents)
75
  index.add(np.array(new_embeddings))