Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -33,10 +33,10 @@ YOUTUBE_DIR = "/data/youtube"
|
|
33 |
YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
|
34 |
YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
|
35 |
YOUTUBE_URL_3 = "https://www.youtube.com/watch?v=vw-KWfKwvTQ"
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
MODEL_NAME = "gpt-4"
|
42 |
|
@@ -46,20 +46,21 @@ def invoke(openai_api_key, use_rag, prompt):
|
|
46 |
temperature = 0)
|
47 |
if (use_rag):
|
48 |
# Document loading, splitting, and storage
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
63 |
# Document retrieval
|
64 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
65 |
persist_directory = CHROMA_DIR)
|
@@ -78,9 +79,9 @@ description = """<strong>Overview:</strong> The app demonstrates how to use a La
|
|
78 |
(YouTube videos, PDFs, URLs, or other <a href='https://raw.githubusercontent.com/bstraehle/ai-ml-dl/c38b224c196fc984aab6b6cc6bdc666f8f4fbcff/langchain/document-loaders.png'>data sources</a>).\n\n
|
79 |
<strong>Instructions:</strong> Enter an OpenAI API key and perform LLM use cases (semantic search, sentiment analysis, summarization, translation, etc.) on YouTube videos about GPT-4.
|
80 |
<ul style="list-style-type:square;">
|
81 |
-
<li>Set "Retrieval Augmented Generation" to "<strong>False</strong>" and submit prompt "
|
82 |
-
<li>Set "Retrieval Augmented Generation" to "<strong>True</strong>" and submit prompt "
|
83 |
-
<li>Experiment with different prompts, for example "
|
84 |
</ul>\n\n
|
85 |
<strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API via AI-first
|
86 |
<a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text) and
|
|
|
33 |
YOUTUBE_URL_1 = "https://www.youtube.com/watch?v=--khbXchTeE"
|
34 |
YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
|
35 |
YOUTUBE_URL_3 = "https://www.youtube.com/watch?v=vw-KWfKwvTQ"
|
36 |
+
YOUTUBE_URL_4 = "https://www.youtube.com/watch?v=kiHpqXNCPj8"
|
37 |
+
YOUTUBE_URL_5 = "https://www.youtube.com/shorts/3x95mw35dJY"
|
38 |
+
YOUTUBE_URL_6 = "https://www.youtube.com/shorts/zg-DS23wq0c"
|
39 |
+
YOUTUBE_URL_7 = "https://www.youtube.com/shorts/cS4fyhKZ8bQ"
|
40 |
|
41 |
MODEL_NAME = "gpt-4"
|
42 |
|
|
|
46 |
temperature = 0)
|
47 |
if (use_rag):
|
48 |
# Document loading, splitting, and storage
|
49 |
+
loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_1,
|
50 |
+
YOUTUBE_URL_2,
|
51 |
+
YOUTUBE_URL_3,
|
52 |
+
YOUTUBE_URL_4,
|
53 |
+
YOUTUBE_URL_5,
|
54 |
+
YOUTUBE_URL_6,
|
55 |
+
YOUTUBE_URL_7], YOUTUBE_DIR),
|
56 |
+
OpenAIWhisperParser())
|
57 |
+
docs = loader.load()
|
58 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
|
59 |
+
chunk_size = 1500)
|
60 |
+
splits = text_splitter.split_documents(docs)
|
61 |
+
vector_db = Chroma.from_documents(documents = splits,
|
62 |
+
embedding = OpenAIEmbeddings(),
|
63 |
+
persist_directory = CHROMA_DIR)
|
64 |
# Document retrieval
|
65 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
66 |
persist_directory = CHROMA_DIR)
|
|
|
79 |
(YouTube videos, PDFs, URLs, or other <a href='https://raw.githubusercontent.com/bstraehle/ai-ml-dl/c38b224c196fc984aab6b6cc6bdc666f8f4fbcff/langchain/document-loaders.png'>data sources</a>).\n\n
|
80 |
<strong>Instructions:</strong> Enter an OpenAI API key and perform LLM use cases (semantic search, sentiment analysis, summarization, translation, etc.) on YouTube videos about GPT-4.
|
81 |
<ul style="list-style-type:square;">
|
82 |
+
<li>Set "Retrieval Augmented Generation" to "<strong>False</strong>" and submit prompt "Explain GPT-4". The LLM <strong>without</strong> RAG does not know the answer.</li>
|
83 |
+
<li>Set "Retrieval Augmented Generation" to "<strong>True</strong>" and submit prompt "Explain GPT-4". The LLM <strong>with</strong> RAG knows the answer.</li>
|
84 |
+
<li>Experiment with different prompts, for example "Explain GPT-4 in one sentence, output in German", "List pros and cons of GPT-4", or "Compare GPT-4 and Claude 2, output in JSON".</li>
|
85 |
</ul>\n\n
|
86 |
<strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API via AI-first
|
87 |
<a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text) and
|