bstraehle commited on
Commit
5dce867
·
1 Parent(s): f28ec17

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +0 -6
rag.py CHANGED
@@ -41,18 +41,15 @@ def document_loading():
41
  # PDF
42
  loader = PyPDFLoader(PDF_URL)
43
  docs.extend(loader.load())
44
- print("### Load PDF")
45
 
46
  # Web
47
  loader = WebBaseLoader(WEB_URL)
48
  docs.extend(loader.load())
49
- print("### Load Web")
50
 
51
  # YouTube
52
  loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
53
  OpenAIWhisperParser())
54
  docs.extend(loader.load())
55
- print("### Load YouTube")
56
 
57
  return docs
58
 
@@ -60,17 +57,14 @@ def document_splitting(config, docs):
60
  text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = config["chunk_overlap"],
61
  chunk_size = config["chunk_size"])
62
 
63
- print("### Split")
64
  return text_splitter.split_documents(docs)
65
 
66
  def document_storage_chroma(chunks):
67
- print("### Store Chroma")
68
  Chroma.from_documents(documents = chunks,
69
  embedding = OpenAIEmbeddings(disallowed_special = ()),
70
  persist_directory = CHROMA_DIR)
71
 
72
  def document_storage_mongodb(chunks):
73
- print("### Store MongoDB")
74
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
75
  embedding = OpenAIEmbeddings(disallowed_special = ()),
76
  collection = collection,
 
41
  # PDF
42
  loader = PyPDFLoader(PDF_URL)
43
  docs.extend(loader.load())
 
44
 
45
  # Web
46
  loader = WebBaseLoader(WEB_URL)
47
  docs.extend(loader.load())
 
48
 
49
  # YouTube
50
  loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
51
  OpenAIWhisperParser())
52
  docs.extend(loader.load())
 
53
 
54
  return docs
55
 
 
57
  text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = config["chunk_overlap"],
58
  chunk_size = config["chunk_size"])
59
 
 
60
  return text_splitter.split_documents(docs)
61
 
62
  def document_storage_chroma(chunks):
 
63
  Chroma.from_documents(documents = chunks,
64
  embedding = OpenAIEmbeddings(disallowed_special = ()),
65
  persist_directory = CHROMA_DIR)
66
 
67
  def document_storage_mongodb(chunks):
 
68
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
69
  embedding = OpenAIEmbeddings(disallowed_special = ()),
70
  collection = collection,