Abhilashvj commited on
Commit
69f292c
·
1 Parent(s): 9d4b1c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -23
app.py CHANGED
@@ -200,29 +200,34 @@ if len(ALL_FILES) > 0:
200
  # we will use batches of 64
201
  batch_size = 128
202
  # docs = docs['documents']
203
- with st.spinner(
204
- "🧠    Performing indexing of uplaoded documents... \n "
205
- ):
206
- for i in range(0, len(docs), batch_size):
207
- # find end of batch
208
- i_end = min(i+batch_size, len(docs))
209
- # extract batch
210
- batch = [doc.content for doc in docs[i:i_end]]
211
- # generate embeddings for batch
212
- emb = sentence_encoder.encode(batch).tolist()
213
- # get metadata
214
- # meta = [doc.meta for doc in docs[i:i_end]]
215
- meta = []
216
- for doc in docs[i:i_end]:
217
- meta_dict = doc.meta
218
- meta_dict["text"] = doc.content
219
- meta.append(meta_dict)
220
- # create unique IDs
221
- ids = [doc.id for doc in docs[i:i_end]]
222
- # add all to upsert list
223
- to_upsert = list(zip(ids, emb, meta))
224
- # upsert/insert these records to pinecone
225
- _ = index.upsert(vectors=to_upsert)
 
 
 
 
 
226
 
227
  top_k_reader = st.sidebar.slider(
228
  "Max. number of answers",
 
200
  # we will use batches of 64
201
  batch_size = 128
202
  # docs = docs['documents']
203
+ # with st.spinner(
204
+ # "🧠    Performing indexing of uplaoded documents... \n "
205
+ # ):
206
+ my_bar = st.progress(0)
207
+ upload_count = 0
208
+ for i in range(0, len(docs), batch_size):
209
+ # find end of batch
210
+ i_end = min(i+batch_size, len(docs))
211
+ # extract batch
212
+ batch = [doc.content for doc in docs[i:i_end]]
213
+ # generate embeddings for batch
214
+ emb = sentence_encoder.encode(batch).tolist()
215
+ # get metadata
216
+ # meta = [doc.meta for doc in docs[i:i_end]]
217
+ meta = []
218
+ for doc in docs[i:i_end]:
219
+ meta_dict = doc.meta
220
+ meta_dict["text"] = doc.content
221
+ meta.append(meta_dict)
222
+ # create unique IDs
223
+ ids = [doc.id for doc in docs[i:i_end]]
224
+ # add all to upsert list
225
+ to_upsert = list(zip(ids, emb, meta))
226
+ # upsert/insert these records to pinecone
227
+ _ = index.upsert(vectors=to_upsert)
228
+ upload_count+=batch_size
229
+ upload_percentage = int((upload_count/len(docs))*100)
230
+ my_bar.progress(percent_complete + upload_percentage)
231
 
232
  top_k_reader = st.sidebar.slider(
233
  "Max. number of answers",