Update app.py
Browse files
app.py
CHANGED
@@ -31,10 +31,8 @@ def invoke(openai_api_key, youtube_url, process_video, prompt):
|
|
31 |
openai.api_key = openai_api_key
|
32 |
if (process_video):
|
33 |
if (os.path.isdir(CHROMA_DIR)):
|
34 |
-
print(os.listdir(CHROMA_DIR))
|
35 |
shutil.rmtree(CHROMA_DIR)
|
36 |
if (os.path.isdir(YOUTUBE_DIR)):
|
37 |
-
print(os.listdir(YOUTUBE_DIR))
|
38 |
shutil.rmtree(YOUTUBE_DIR)
|
39 |
loader = GenericLoader(YoutubeAudioLoader([youtube_url], YOUTUBE_DIR), OpenAIWhisperParser())
|
40 |
docs = loader.load()
|
@@ -42,10 +40,6 @@ def invoke(openai_api_key, youtube_url, process_video, prompt):
|
|
42 |
splits = text_splitter.split_documents(docs)
|
43 |
vector_db = Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(), persist_directory = CHROMA_DIR)
|
44 |
else:
|
45 |
-
if (os.path.isdir(CHROMA_DIR)):
|
46 |
-
print(os.listdir(CHROMA_DIR))
|
47 |
-
if (os.path.isdir(YOUTUBE_DIR)):
|
48 |
-
print(os.listdir(YOUTUBE_DIR))
|
49 |
vector_db = Chroma(persist_directory = CHROMA_DIR, embedding_function = OpenAIEmbeddings())
|
50 |
llm = ChatOpenAI(model_name = MODEL_NAME, temperature = 0)
|
51 |
qa_chain = RetrievalQA.from_chain_type(llm, retriever = vector_db.as_retriever(), return_source_documents = True, chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT})
|
@@ -56,9 +50,8 @@ description = """<strong>Overview:</strong> The app demonstrates how to use a <s
|
|
56 |
(RAG) on external data (YouTube videos in this case, but could be PDFs, URLs, databases, etc.)\n\n
|
57 |
<strong>Instructions:</strong> Enter an OpenAI API key, YouTube URL, and prompt to perform semantic search, sentiment analysis, summarization,
|
58 |
translation, etc. "Process Video" specifies whether or not to perform speech-to-text processing. To ask multiple questions related to the same video,
|
59 |
-
typically set it to "True" the first
|
60 |
-
|
61 |
-
"what is gpt-4, answer in german" or "write a poem about gpt-4".\n\n
|
62 |
<strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API
|
63 |
via AI-first <a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text)
|
64 |
and <a href='https://openai.com/research/gpt-4'>GPT-4</a> (LLM) foundation models as well as AI-native
|
|
|
31 |
openai.api_key = openai_api_key
|
32 |
if (process_video):
|
33 |
if (os.path.isdir(CHROMA_DIR)):
|
|
|
34 |
shutil.rmtree(CHROMA_DIR)
|
35 |
if (os.path.isdir(YOUTUBE_DIR)):
|
|
|
36 |
shutil.rmtree(YOUTUBE_DIR)
|
37 |
loader = GenericLoader(YoutubeAudioLoader([youtube_url], YOUTUBE_DIR), OpenAIWhisperParser())
|
38 |
docs = loader.load()
|
|
|
40 |
splits = text_splitter.split_documents(docs)
|
41 |
vector_db = Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(), persist_directory = CHROMA_DIR)
|
42 |
else:
|
|
|
|
|
|
|
|
|
43 |
vector_db = Chroma(persist_directory = CHROMA_DIR, embedding_function = OpenAIEmbeddings())
|
44 |
llm = ChatOpenAI(model_name = MODEL_NAME, temperature = 0)
|
45 |
qa_chain = RetrievalQA.from_chain_type(llm, retriever = vector_db.as_retriever(), return_source_documents = True, chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT})
|
|
|
50 |
(RAG) on external data (YouTube videos in this case, but could be PDFs, URLs, databases, etc.)\n\n
|
51 |
<strong>Instructions:</strong> Enter an OpenAI API key, YouTube URL, and prompt to perform semantic search, sentiment analysis, summarization,
|
52 |
translation, etc. "Process Video" specifies whether or not to perform speech-to-text processing. To ask multiple questions related to the same video,
|
53 |
+
typically set it to "True" the first run and then to "False". The example is a 3:12 min. video about GPT-4 and takes about 20 sec. to process.
|
54 |
+
Try different prompts, for example "what is gpt-4, answer in german" or "write a poem about gpt-4".\n\n
|
|
|
55 |
<strong>Technology:</strong> <a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://platform.openai.com/'>OpenAI</a> API
|
56 |
via AI-first <a href='https://www.langchain.com/'>LangChain</a> toolkit with <a href='https://openai.com/research/whisper'>Whisper</a> (speech-to-text)
|
57 |
and <a href='https://openai.com/research/gpt-4'>GPT-4</a> (LLM) foundation models as well as AI-native
|