Chris4K commited on
Commit
142d17f
·
verified ·
1 Parent(s): ebf441f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -20,6 +20,10 @@ print("-----------")
20
  print(documents[0])
21
  print("-----------")
22
 
 
 
 
 
23
 
24
  # Extract the embedding arrays from the PDF documents
25
  embeddings = []
@@ -27,7 +31,9 @@ for doc in documents:
27
  embeddings.extend(doc['embeddings'])
28
 
29
  # Create Chroma vector store for API embeddings
30
- api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
 
 
31
 
32
 
33
  # Define the PDF retrieval function
 
20
  print(documents[0])
21
  print("-----------")
22
 
23
+ # Split the documents into chunks and embed them using the HfApiEmbeddingTool
24
+ text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
25
+ vdocuments = text_splitter.split_documents(documents)
26
+
27
 
28
  # Extract the embedding arrays from the PDF documents
29
  embeddings = []
 
31
  embeddings.extend(doc['embeddings'])
32
 
33
  # Create Chroma vector store for API embeddings
34
+ api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
35
+
36
+ #api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
37
 
38
 
39
  # Define the PDF retrieval function