bstraehle commited on
Commit
6ea7ef9
·
1 Parent(s): 1645955

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +6 -6
rag.py CHANGED
@@ -22,8 +22,8 @@ WEB_URL = "https://openai.com/research/gpt-4"
22
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
23
  YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
24
 
25
- YOUTUBE_DIR = "/data/yt"
26
- CHROMA_DIR = "/data/db"
27
 
28
  MONGODB_ATLAS_CLUSTER_URI = os.environ["MONGODB_ATLAS_CLUSTER_URI"]
29
  MONGODB_DB_NAME = "langchain_db"
@@ -63,23 +63,23 @@ def document_splitting(config, docs):
63
 
64
  def document_storage_chroma(chunks):
65
  Chroma.from_documents(documents = chunks,
66
- embedding = OpenAIEmbeddings(),
67
  persist_directory = CHROMA_DIR)
68
 
69
  def document_storage_mongodb(chunks):
70
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
71
- embedding = OpenAIEmbeddings(),
72
  collection = collection,
73
  index_name = MONGODB_INDEX_NAME)
74
 
75
  def document_retrieval_chroma():
76
- return Chroma(embedding_function = OpenAIEmbeddings(),
77
  persist_directory = CHROMA_DIR)
78
 
79
  def document_retrieval_mongodb():
80
  return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
81
  MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
82
- OpenAIEmbeddings(),
83
  index_name = MONGODB_INDEX_NAME)
84
 
85
  def rag_batch(config):
 
22
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
23
  YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
24
 
25
+ YOUTUBE_DIR = "/data/youtube"
26
+ CHROMA_DIR = "/data/chroma"
27
 
28
  MONGODB_ATLAS_CLUSTER_URI = os.environ["MONGODB_ATLAS_CLUSTER_URI"]
29
  MONGODB_DB_NAME = "langchain_db"
 
63
 
64
  def document_storage_chroma(chunks):
65
  Chroma.from_documents(documents = chunks,
66
+ embedding = OpenAIEmbeddings(disallowed_special = ()),
67
  persist_directory = CHROMA_DIR)
68
 
69
  def document_storage_mongodb(chunks):
70
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
71
+ embedding = OpenAIEmbeddings(disallowed_special = ()),
72
  collection = collection,
73
  index_name = MONGODB_INDEX_NAME)
74
 
75
  def document_retrieval_chroma():
76
+ return Chroma(embedding_function = OpenAIEmbeddings(disallowed_special = ()),
77
  persist_directory = CHROMA_DIR)
78
 
79
  def document_retrieval_mongodb():
80
  return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
81
  MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
82
+ OpenAIEmbeddings(disallowed_special = ()),
83
  index_name = MONGODB_INDEX_NAME)
84
 
85
  def rag_batch(config):