Spaces:
Build error
Build error
Update rag.py
Browse files
rag.py
CHANGED
@@ -1,5 +1,19 @@
|
|
1 |
import os
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
|
4 |
WEB_URL = "https://openai.com/research/gpt-4"
|
5 |
YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
|
@@ -17,10 +31,6 @@ MONGODB_INDEX_NAME = "default"
|
|
17 |
LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"], template = os.environ["LLM_TEMPLATE"])
|
18 |
RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = os.environ["RAG_TEMPLATE"])
|
19 |
|
20 |
-
RAG_OFF = "Off"
|
21 |
-
RAG_CHROMA = "Chroma"
|
22 |
-
RAG_MONGODB = "MongoDB"
|
23 |
-
|
24 |
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
|
25 |
collection = client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
|
26 |
|
@@ -28,8 +38,6 @@ config = {
|
|
28 |
"chunk_overlap": 150,
|
29 |
"chunk_size": 1500,
|
30 |
"k": 3,
|
31 |
-
"model_name": "gpt-4-0613",
|
32 |
-
"temperature": 0,
|
33 |
}
|
34 |
|
35 |
def document_loading_splitting():
|
|
|
1 |
import os
|
2 |
|
3 |
+
from langchain.chains import LLMChain, RetrievalQA
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
from langchain.document_loaders import PyPDFLoader, WebBaseLoader
|
6 |
+
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
7 |
+
from langchain.document_loaders.generic import GenericLoader
|
8 |
+
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
9 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
10 |
+
from langchain.prompts import PromptTemplate
|
11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
+
from langchain.vectorstores import Chroma
|
13 |
+
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
14 |
+
|
15 |
+
from pymongo import MongoClient
|
16 |
+
|
17 |
PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
|
18 |
WEB_URL = "https://openai.com/research/gpt-4"
|
19 |
YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
|
|
|
31 |
LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"], template = os.environ["LLM_TEMPLATE"])
|
32 |
RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = os.environ["RAG_TEMPLATE"])
|
33 |
|
|
|
|
|
|
|
|
|
34 |
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
|
35 |
collection = client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
|
36 |
|
|
|
38 |
"chunk_overlap": 150,
|
39 |
"chunk_size": 1500,
|
40 |
"k": 3,
|
|
|
|
|
41 |
}
|
42 |
|
43 |
def document_loading_splitting():
|