Update app.py
Browse files
app.py
CHANGED
@@ -30,8 +30,9 @@ RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
|
|
30 |
CHROMA_DIR = "/data/chroma"
|
31 |
YOUTUBE_DIR = "/data/youtube"
|
32 |
|
33 |
-
YOUTUBE_URL_01 = "https://www.youtube.com/watch?v
|
34 |
YOUTUBE_URL_02 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=2"
|
|
|
35 |
|
36 |
MODEL_NAME = "gpt-4"
|
37 |
|
@@ -41,16 +42,17 @@ def invoke(openai_api_key, use_rag, prompt):
|
|
41 |
temperature = 0)
|
42 |
if (use_rag):
|
43 |
# Document loading, splitting, and storage
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
54 |
# Document retrieval
|
55 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
56 |
persist_directory = CHROMA_DIR)
|
@@ -63,7 +65,6 @@ def invoke(openai_api_key, use_rag, prompt):
|
|
63 |
else:
|
64 |
chain = LLMChain(llm = llm, prompt = LLM_CHAIN_PROMPT)
|
65 |
result = chain.run({"question": prompt})
|
66 |
-
print(os.listdir("/data/chroma/"))
|
67 |
return result
|
68 |
|
69 |
description = """<strong>Overview:</strong> The app demonstrates how to use a Large Language Model (LLM) with Retrieval Augmented Generation (RAG) on external data
|
|
|
30 |
CHROMA_DIR = "/data/chroma"
|
31 |
YOUTUBE_DIR = "/data/youtube"
|
32 |
|
33 |
+
YOUTUBE_URL_01 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=1"
|
34 |
YOUTUBE_URL_02 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=2"
|
35 |
+
YOUTUBE_URL_03 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=3"
|
36 |
|
37 |
MODEL_NAME = "gpt-4"
|
38 |
|
|
|
42 |
temperature = 0)
|
43 |
if (use_rag):
|
44 |
# Document loading, splitting, and storage
|
45 |
+
loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_01,
|
46 |
+
YOUTUBE_URL_02,
|
47 |
+
YOUTUBE_URL_03], YOUTUBE_DIR),
|
48 |
+
OpenAIWhisperParser())
|
49 |
+
docs = loader.load()
|
50 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
|
51 |
+
chunk_size = 1500)
|
52 |
+
splits = text_splitter.split_documents(docs)
|
53 |
+
vector_db = Chroma.from_documents(documents = splits,
|
54 |
+
embedding = OpenAIEmbeddings(),
|
55 |
+
persist_directory = CHROMA_DIR)
|
56 |
# Document retrieval
|
57 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
58 |
persist_directory = CHROMA_DIR)
|
|
|
65 |
else:
|
66 |
chain = LLMChain(llm = llm, prompt = LLM_CHAIN_PROMPT)
|
67 |
result = chain.run({"question": prompt})
|
|
|
68 |
return result
|
69 |
|
70 |
description = """<strong>Overview:</strong> The app demonstrates how to use a Large Language Model (LLM) with Retrieval Augmented Generation (RAG) on external data
|