bstraehle commited on
Commit
d871888
·
1 Parent(s): 865d70c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -33,10 +33,10 @@ YOUTUBE_DIR = "/data/youtube"
33
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
34
  YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
35
  YOUTUBE_URL_3 = "https://www.youtube.com/watch?v=vw-KWfKwvTQ"
36
-
37
- YOUTUBE_URL_4 = "https://www.youtube.com/shorts/3x95mw35dJY"
38
- YOUTUBE_URL_5 = "https://www.youtube.com/shorts/zg-DS23wq0c"
39
- YOUTUBE_URL_6 = "https://www.youtube.com/shorts/cS4fyhKZ8bQ"
40
 
41
  MODEL_NAME = "gpt-4"
42
 
@@ -46,20 +46,21 @@ def invoke(openai_api_key, use_rag, prompt):
46
  temperature = 0)
47
  if (use_rag):
48
  # Document loading, splitting, and storage
49
- #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_1,
50
- # YOUTUBE_URL_2,
51
- # YOUTUBE_URL_3,
52
- # YOUTUBE_URL_4,
53
- # YOUTUBE_URL_5,
54
- # YOUTUBE_URL_6], YOUTUBE_DIR),
55
- # OpenAIWhisperParser())
56
- #docs = loader.load()
57
- #text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
58
- # chunk_size = 1500)
59
- #splits = text_splitter.split_documents(docs)
60
- #vector_db = Chroma.from_documents(documents = splits,
61
- # embedding = OpenAIEmbeddings(),
62
- # persist_directory = CHROMA_DIR)
 
63
  # Document retrieval
64
  vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
65
  persist_directory = CHROMA_DIR)
@@ -78,9 +79,9 @@ description = """<strong>Overview:</strong> The app demonstrates how to use a La
78
  (YouTube videos, PDFs, URLs, or other <a href='https://raw.githubusercontent.com/bstraehle/ai-ml-dl/c38b224c196fc984aab6b6cc6bdc666f8f4fbcff/langchain/document-loaders.png'>data sources</a>).\n\n
79
  <strong>Instructions:</strong> Enter an OpenAI API key and perform LLM use cases (semantic search, sentiment analysis, summarization, translation, etc.) on YouTube videos about GPT-4.
80
  <ul style="list-style-type:square;">
81
- <li>Set "Retrieval Augmented Generation" to "<strong>False</strong>" and submit prompt "explain gpt-4". The LLM <strong>without</strong> RAG does not know the answer.</li>
82
- <li>Set "Retrieval Augmented Generation" to "<strong>True</strong>" and submit prompt "explain gpt-4". The LLM <strong>with</strong> RAG knows the answer.</li>
83
- <li>Experiment with different prompts, for example "explain gpt-4 in german", "list pros and cons of gpt-4", or "compare gtp-4 and gpt-3.5 in JSON".</li>
84
  </ul>\n\n
85
  <strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API via AI-first
86
  <a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text) and
 
33
  YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
34
  YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
35
  YOUTUBE_URL_3 = "https://www.youtube.com/watch?v=vw-KWfKwvTQ"
36
+ YOUTUBE_URL_4 = "https://www.youtube.com/watch?v=kiHpqXNCPj8"
37
+ YOUTUBE_URL_5 = "https://www.youtube.com/shorts/3x95mw35dJY"
38
+ YOUTUBE_URL_6 = "https://www.youtube.com/shorts/zg-DS23wq0c"
39
+ YOUTUBE_URL_7 = "https://www.youtube.com/shorts/cS4fyhKZ8bQ"
40
 
41
  MODEL_NAME = "gpt-4"
42
 
 
46
  temperature = 0)
47
  if (use_rag):
48
  # Document loading, splitting, and storage
49
+ loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_1,
50
+ YOUTUBE_URL_2,
51
+ YOUTUBE_URL_3,
52
+ YOUTUBE_URL_4,
53
+ YOUTUBE_URL_5,
54
+ YOUTUBE_URL_6,
55
+ YOUTUBE_URL_7], YOUTUBE_DIR),
56
+ OpenAIWhisperParser())
57
+ docs = loader.load()
58
+ text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
59
+ chunk_size = 1500)
60
+ splits = text_splitter.split_documents(docs)
61
+ vector_db = Chroma.from_documents(documents = splits,
62
+ embedding = OpenAIEmbeddings(),
63
+ persist_directory = CHROMA_DIR)
64
  # Document retrieval
65
  vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
66
  persist_directory = CHROMA_DIR)
 
79
  (YouTube videos, PDFs, URLs, or other <a href='https://raw.githubusercontent.com/bstraehle/ai-ml-dl/c38b224c196fc984aab6b6cc6bdc666f8f4fbcff/langchain/document-loaders.png'>data sources</a>).\n\n
80
  <strong>Instructions:</strong> Enter an OpenAI API key and perform LLM use cases (semantic search, sentiment analysis, summarization, translation, etc.) on YouTube videos about GPT-4.
81
  <ul style="list-style-type:square;">
82
+ <li>Set "Retrieval Augmented Generation" to "<strong>False</strong>" and submit prompt "Explain GPT-4". The LLM <strong>without</strong> RAG does not know the answer.</li>
83
+ <li>Set "Retrieval Augmented Generation" to "<strong>True</strong>" and submit prompt "Explain GPT-4". The LLM <strong>with</strong> RAG knows the answer.</li>
84
+ <li>Experiment with different prompts, for example "Explain GPT-4 in one sentence, output in German", "List pros and cons of GPT-4", or "Compare GPT-4 and Claude 2, output in JSON".</li>
85
  </ul>\n\n
86
  <strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API via AI-first
87
  <a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text) and