Update app.py
Browse files
app.py
CHANGED
@@ -39,6 +39,37 @@ def read_root(request: Request):
|
|
39 |
|
40 |
@app.post("/embed")
|
41 |
def embed_strings(request: EmbedRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
new_documents = request.texts
|
43 |
new_embeddings = model.encode(new_documents)
|
44 |
index.add(np.array(new_embeddings))
|
|
|
39 |
|
40 |
@app.post("/embed")
|
41 |
def embed_strings(request: EmbedRequest):
|
42 |
+
new_documents = request.texts
|
43 |
+
batch_size = 20
|
44 |
+
|
45 |
+
# Split the new_documents list into batches of 10 documents
|
46 |
+
batches = [new_documents[i:i+batch_size] for i in range(0, len(new_documents), batch_size)]
|
47 |
+
|
48 |
+
# Perform embedding for each batch
|
49 |
+
new_embeddings = []
|
50 |
+
for batch in batches:
|
51 |
+
batch_embeddings = model.encode(batch)
|
52 |
+
new_embeddings.extend(batch_embeddings)
|
53 |
+
print(f"embeded {batch_size} docs")
|
54 |
+
|
55 |
+
# Handle remaining documents less than batch_size
|
56 |
+
remaining_docs = len(new_documents) % batch_size
|
57 |
+
print(f"embedind remaining {remaining_docs} docs")
|
58 |
+
|
59 |
+
if remaining_docs > 0:
|
60 |
+
remaining_batch = new_documents[-remaining_docs:]
|
61 |
+
remaining_embeddings = model.encode(remaining_batch)
|
62 |
+
new_embeddings.extend(remaining_embeddings)
|
63 |
+
|
64 |
+
index.add(np.array(new_embeddings))
|
65 |
+
new_size = index.ntotal
|
66 |
+
documents.extend(new_documents)
|
67 |
+
print(f"End embedding {len(new_documents)} docs, new DB size: {new_size}")
|
68 |
+
return {
|
69 |
+
"message": f"{len(new_documents)} new strings embedded and added to FAISS database. New size of the database: {new_size}"
|
70 |
+
}
|
71 |
+
|
72 |
+
def embed_strings_v0(request: EmbedRequest):
|
73 |
new_documents = request.texts
|
74 |
new_embeddings = model.encode(new_documents)
|
75 |
index.add(np.array(new_embeddings))
|