Spaces:
Runtime error
Runtime error
Kushwanth Chowday Kandala
commited on
maximum metadata size
Browse files
app.py
CHANGED
@@ -172,6 +172,14 @@ def combine_text(pages):
|
|
172 |
st.write(f"There are {len(concatenates_text)} characters in the pdf with {mbsize}MB size")
|
173 |
return concatenates_text
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
def create_embeddings():
|
176 |
# Get the uploaded file
|
177 |
inputtext = ""
|
@@ -188,7 +196,7 @@ def create_embeddings():
|
|
188 |
pinecone = connect_pinecone()
|
189 |
index = get_pinecone_semantic_index(pinecone)
|
190 |
|
191 |
-
# The maximum metadata size per vector is 40KB
|
192 |
batch_size = 10000
|
193 |
for i in tqdm(range(0, len(inputtext), batch_size)):
|
194 |
# find end of batch
|
|
|
172 |
st.write(f"There are {len(concatenates_text)} characters in the pdf with {mbsize}MB size")
|
173 |
return concatenates_text
|
174 |
|
175 |
+
def split_into_chunks(text, chunk_size):
|
176 |
+
|
177 |
+
chunks = []
|
178 |
+
for i in range(0, len(text), chunk_size):
|
179 |
+
chunks.append(text[i:i + chunk_size])
|
180 |
+
|
181 |
+
return chunks
|
182 |
+
|
183 |
def create_embeddings():
|
184 |
# Get the uploaded file
|
185 |
inputtext = ""
|
|
|
196 |
pinecone = connect_pinecone()
|
197 |
index = get_pinecone_semantic_index(pinecone)
|
198 |
|
199 |
+
# The maximum metadata size per vector is 40KB ~ 40000Bytes ~ each text character is 1 to 2 bytes. so rougly given batch size of 10000 to 40000
|
200 |
batch_size = 10000
|
201 |
for i in tqdm(range(0, len(inputtext), batch_size)):
|
202 |
# find end of batch
|