GPT4ApplicationWithDALLE3

Runtime error

App Files Files Community

Todd Deshane commited on Nov 17, 2023

Commit

a5f4443

1 Parent(s): 4097737

add in youtube rag

Browse files

Files changed (1) hide show

tools.py +76 -0

tools.py CHANGED Viewed

@@ -55,6 +55,27 @@ def _generate_image(prompt: str):
     cl.user_session.set("generated_image", name)
     return name
 def generate_image(prompt: str):
     image_name = _generate_image(prompt)
@@ -71,3 +92,58 @@ generate_image_tool = Tool.from_function(
     description=f"Useful to create an image from a text prompt. Input should be a single string strictly in the following JSON format: {generate_image_format}",
     return_direct=True,
 )

     cl.user_session.set("generated_image", name)
     return name
+def _youtube_rag(prompt: str):
+    openai.api_key = os.environ["OPENAI_API_KEY"]
+    flattened_texts = []
+    #check if db exists
+    if os.path.exists(persist_directory):
+       #don't process transcripts
+        if debug:
+            print("Database exists, skipping transcript processing...")
+    else:
+        print("Database does not exist")
+    if debug:
+        print("Initializing database...")
+    docsearch = initialize_chroma_db(flattened_texts)
+    docs = docsearch.get_relevant_documents(prompt)
+    chat_model = ChatOpenAI(model_name="gpt-4-1106-preview")
+    chain = load_qa_chain(llm=chat_model, chain_type="stuff")
+    answer = chain.run(input_documents=docs, question=query)
+return answer
 def generate_image(prompt: str):
     image_name = _generate_image(prompt)
     description=f"Useful to create an image from a text prompt. Input should be a single string strictly in the following JSON format: {generate_image_format}",
     return_direct=True,
 )
+import os
+import openai
+from langchain.chat_models import ChatOpenAI
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.chains.question_answering import load_qa_chain
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+debug = False
+persist_directory = 'db'
+embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+# Function to initialize or load the Chroma database
+def initialize_chroma_db(texts):
+    if os.path.exists(persist_directory):
+        # Load existing database
+        if debug:
+            print("Loading existing database...")
+        db = Chroma(persist_directory="./db", embedding_function=embedding_function)
+    else:
+        # Create and initialize new database
+        #embeddings = OpenAIEmbeddings()
+        if debug:
+            print("Creating new database...")
+        db = Chroma.from_texts(texts, embedding_function, persist_directory=persist_directory)
+    return db.as_retriever()
+# thisis the youtube rag tool - which is what allows our agent to rag the youtube vector db
+# the `description` field is of utmost importance as it is what the LLM "brain" uses to determine
+# which tool to use for a given input.
+youtube_rag_format = '{{"prompt": "prompt"}}'
+generate_image_tool = Tool.from_function(
+    func=youtube_rag,
+    name="Youtube_Rag",
+    description=f"Useful to query the vector database containing youtube transcripts about Aaron Lebauer. Input should be a single string strictly in the following JSON format: {youtube_rag_format}",
+    return_direct=True,
+)
+def youtube_rag(prompt: str):
+    answer = _youtube_rag(prompt)
+    return f" {answer}."