Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -40,17 +40,17 @@ class SemanticSearch:
|
|
40 |
|
41 |
|
42 |
def get_text_embedding(self, texts, batch=1000):
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
|
56 |
def pdf_to_text(pdf_path, start_page=1):
|
|
|
40 |
|
41 |
|
42 |
def get_text_embedding(self, texts, batch=1000):
|
43 |
+
embeddings = []
|
44 |
+
for i in range(0, len(texts), batch):
|
45 |
+
text_batch = texts[i:(i+batch)]
|
46 |
+
print(f"Processing batch {i//batch + 1} of {len(texts)//batch + 1}")
|
47 |
+
print(f"Text batch: {text_batch}")
|
48 |
+
emb_batch = self.use(text_batch)
|
49 |
+
print(f"Embedding batch: {emb_batch}")
|
50 |
+
embeddings.append(emb_batch)
|
51 |
+
embeddings = np.vstack(embeddings)
|
52 |
+
print(f"Final embeddings: {embeddings}")
|
53 |
+
return embeddings
|
54 |
|
55 |
|
56 |
def pdf_to_text(pdf_path, start_page=1):
|