bstraehle commited on
Commit
8c128f9
·
1 Parent(s): 24b21f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -30,8 +30,9 @@ RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
30
  CHROMA_DIR = "/data/chroma"
31
  YOUTUBE_DIR = "/data/youtube"
32
 
33
- YOUTUBE_URL_01 = "https://www.youtube.com/watch?v=--khbXchTeE"
34
  YOUTUBE_URL_02 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=2"
 
35
 
36
  MODEL_NAME = "gpt-4"
37
 
@@ -41,16 +42,17 @@ def invoke(openai_api_key, use_rag, prompt):
41
  temperature = 0)
42
  if (use_rag):
43
  # Document loading, splitting, and storage
44
- #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_01,
45
- # YOUTUBE_URL_02], YOUTUBE_DIR),
46
- # OpenAIWhisperParser())
47
- #docs = loader.load()
48
- #text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
49
- # chunk_size = 1500)
50
- #splits = text_splitter.split_documents(docs)
51
- #vector_db = Chroma.from_documents(documents = splits,
52
- # embedding = OpenAIEmbeddings(),
53
- # persist_directory = CHROMA_DIR)
 
54
  # Document retrieval
55
  vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
56
  persist_directory = CHROMA_DIR)
@@ -63,7 +65,6 @@ def invoke(openai_api_key, use_rag, prompt):
63
  else:
64
  chain = LLMChain(llm = llm, prompt = LLM_CHAIN_PROMPT)
65
  result = chain.run({"question": prompt})
66
- print(os.listdir("/data/chroma/"))
67
  return result
68
 
69
  description = """<strong>Overview:</strong> The app demonstrates how to use a Large Language Model (LLM) with Retrieval Augmented Generation (RAG) on external data
 
30
  CHROMA_DIR = "/data/chroma"
31
  YOUTUBE_DIR = "/data/youtube"
32
 
33
+ YOUTUBE_URL_01 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=1"
34
  YOUTUBE_URL_02 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=2"
35
+ YOUTUBE_URL_03 = "https://www.youtube.com/watch?v=Iy1IpvcJH7I&list=PL2yQDdvlhXf9XsB2W76_seM6dJxcE2Pdc&index=3"
36
 
37
  MODEL_NAME = "gpt-4"
38
 
 
42
  temperature = 0)
43
  if (use_rag):
44
  # Document loading, splitting, and storage
45
+ loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_01,
46
+ YOUTUBE_URL_02,
47
+ YOUTUBE_URL_03], YOUTUBE_DIR),
48
+ OpenAIWhisperParser())
49
+ docs = loader.load()
50
+ text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
51
+ chunk_size = 1500)
52
+ splits = text_splitter.split_documents(docs)
53
+ vector_db = Chroma.from_documents(documents = splits,
54
+ embedding = OpenAIEmbeddings(),
55
+ persist_directory = CHROMA_DIR)
56
  # Document retrieval
57
  vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
58
  persist_directory = CHROMA_DIR)
 
65
  else:
66
  chain = LLMChain(llm = llm, prompt = LLM_CHAIN_PROMPT)
67
  result = chain.run({"question": prompt})
 
68
  return result
69
 
70
  description = """<strong>Overview:</strong> The app demonstrates how to use a Large Language Model (LLM) with Retrieval Augmented Generation (RAG) on external data