bstraehle commited on
Commit
eceefb4
·
1 Parent(s): 3ede494

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +14 -6
rag.py CHANGED
@@ -1,5 +1,19 @@
1
  import os
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
4
  WEB_URL = "https://openai.com/research/gpt-4"
5
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
@@ -17,10 +31,6 @@ MONGODB_INDEX_NAME = "default"
17
  LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"], template = os.environ["LLM_TEMPLATE"])
18
  RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = os.environ["RAG_TEMPLATE"])
19
 
20
- RAG_OFF = "Off"
21
- RAG_CHROMA = "Chroma"
22
- RAG_MONGODB = "MongoDB"
23
-
24
  client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
25
  collection = client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
26
 
@@ -28,8 +38,6 @@ config = {
28
  "chunk_overlap": 150,
29
  "chunk_size": 1500,
30
  "k": 3,
31
- "model_name": "gpt-4-0613",
32
- "temperature": 0,
33
  }
34
 
35
  def document_loading_splitting():
 
1
  import os
2
 
3
+ from langchain.chains import LLMChain, RetrievalQA
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain.document_loaders import PyPDFLoader, WebBaseLoader
6
+ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
7
+ from langchain.document_loaders.generic import GenericLoader
8
+ from langchain.document_loaders.parsers import OpenAIWhisperParser
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.vectorstores import Chroma
13
+ from langchain.vectorstores import MongoDBAtlasVectorSearch
14
+
15
+ from pymongo import MongoClient
16
+
17
  PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
18
  WEB_URL = "https://openai.com/research/gpt-4"
19
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
 
31
  LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"], template = os.environ["LLM_TEMPLATE"])
32
  RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = os.environ["RAG_TEMPLATE"])
33
 
 
 
 
 
34
  client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
35
  collection = client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
36
 
 
38
  "chunk_overlap": 150,
39
  "chunk_size": 1500,
40
  "k": 3,
 
 
41
  }
42
 
43
  def document_loading_splitting():