bstraehle commited on
Commit
56a0e0e
·
1 Parent(s): 90a2028

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +5 -0
rag.py CHANGED
@@ -41,10 +41,12 @@ def document_loading():
41
  # PDF
42
  loader = PyPDFLoader(PDF_URL)
43
  docs.extend(loader.load())
 
44
 
45
  # Web
46
  loader = WebBaseLoader(WEB_URL)
47
  docs.extend(loader.load())
 
48
 
49
  # YouTube
50
  #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
@@ -57,14 +59,17 @@ def document_splitting(config, docs):
57
  text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = config["chunk_overlap"],
58
  chunk_size = config["chunk_size"])
59
 
 
60
  return text_splitter.split_documents(docs)
61
 
62
  def document_storage_chroma(chunks):
 
63
  Chroma.from_documents(documents = chunks,
64
  embedding = OpenAIEmbeddings(disallowed_special = ()),
65
  persist_directory = CHROMA_DIR)
66
 
67
  def document_storage_mongodb(chunks):
 
68
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
69
  embedding = OpenAIEmbeddings(disallowed_special = ()),
70
  collection = collection,
 
41
  # PDF
42
  loader = PyPDFLoader(PDF_URL)
43
  docs.extend(loader.load())
44
+ print("### Load PDF")
45
 
46
  # Web
47
  loader = WebBaseLoader(WEB_URL)
48
  docs.extend(loader.load())
49
+ print("### Load Web")
50
 
51
  # YouTube
52
  #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
 
59
  text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = config["chunk_overlap"],
60
  chunk_size = config["chunk_size"])
61
 
62
+ print("### Split")
63
  return text_splitter.split_documents(docs)
64
 
65
  def document_storage_chroma(chunks):
66
+ print("### Store")
67
  Chroma.from_documents(documents = chunks,
68
  embedding = OpenAIEmbeddings(disallowed_special = ()),
69
  persist_directory = CHROMA_DIR)
70
 
71
  def document_storage_mongodb(chunks):
72
+ print("### Store")
73
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
74
  embedding = OpenAIEmbeddings(disallowed_special = ()),
75
  collection = collection,