Spaces:
Runtime error
Runtime error
import io | |
import os | |
from openai import OpenAI | |
from langchain.tools import StructuredTool, Tool | |
from io import BytesIO | |
import requests | |
import json | |
from io import BytesIO | |
import chainlit as cl | |
def get_image_name(): | |
""" | |
We need to keep track of images we generate, so we can reference them later | |
and display them correctly to our users. | |
""" | |
image_count = cl.user_session.get("image_count") | |
if image_count is None: | |
image_count = 0 | |
else: | |
image_count += 1 | |
cl.user_session.set("image_count", image_count) | |
return f"image-{image_count}" | |
def _generate_image(prompt: str): | |
""" | |
This function is used to generate an image from a text prompt using | |
DALL-E 3. | |
We use the OpenAI API to generate the image, and then store it in our | |
user session so we can reference it later. | |
""" | |
client = OpenAI() | |
response = client.images.generate( | |
model="dall-e-3", | |
prompt=prompt, | |
size="1024x1024", | |
quality="standard", | |
n=1, | |
) | |
image_payload = requests.get(response.data[0].url, stream=True) | |
image_bytes = BytesIO(image_payload.content) | |
print(type(image_bytes)) | |
name = get_image_name() | |
cl.user_session.set(name, image_bytes.getvalue()) | |
cl.user_session.set("generated_image", name) | |
return name | |
def _youtube_rag(prompt: str): | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
flattened_texts = [] | |
#check if db exists | |
if os.path.exists(persist_directory): | |
#don't process transcripts | |
if debug: | |
print("Database exists, skipping transcript processing...") | |
else: | |
print("Database does not exist") | |
if debug: | |
print("Initializing database...") | |
docsearch = initialize_chroma_db(flattened_texts) | |
docs = docsearch.get_relevant_documents(prompt) | |
chat_model = ChatOpenAI(model_name="gpt-4-1106-preview") | |
chain = load_qa_chain(llm=chat_model, chain_type="stuff") | |
answer = chain.run(input_documents=docs, question=prompt) | |
return answer | |
def generate_image(prompt: str): | |
image_name = _generate_image(prompt) | |
return f"Here is {image_name}." | |
# this is our tool - which is what allows our agent to generate images in the first place! | |
# the `description` field is of utmost imporance as it is what the LLM "brain" uses to determine | |
# which tool to use for a given input. | |
generate_image_format = '{{"prompt": "prompt"}}' | |
generate_image_tool = Tool.from_function( | |
func=generate_image, | |
name="GenerateImage", | |
description=f"Useful to create an image from a text prompt. Input should be a single string strictly in the following JSON format: {generate_image_format}", | |
return_direct=True, | |
) | |
def youtube_rag(prompt: str): | |
answer = _youtube_rag(prompt) | |
return f" {answer}." | |
import os | |
import openai | |
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
debug = False | |
persist_directory = 'db' | |
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
# Function to initialize or load the Chroma database | |
def initialize_chroma_db(texts): | |
if os.path.exists(persist_directory): | |
# Load existing database | |
if debug: | |
print("Loading existing database...") | |
db = Chroma(persist_directory="./db", embedding_function=embedding_function) | |
else: | |
# Create and initialize new database | |
#embeddings = OpenAIEmbeddings() | |
if debug: | |
print("Creating new database...") | |
db = Chroma.from_texts(texts, embedding_function, persist_directory=persist_directory) | |
return db.as_retriever() | |
# thisis the youtube rag tool - which is what allows our agent to rag the youtube vector db | |
# the `description` field is of utmost importance as it is what the LLM "brain" uses to determine | |
# which tool to use for a given input. | |
youtube_rag_format = '{{"prompt": "prompt"}}' | |
generate_image_tool = Tool.from_function( | |
func=youtube_rag, | |
name="Youtube_Rag", | |
description=f"Useful to query the vector database containing youtube transcripts about Aaron Lebauer. Input should be a single string strictly in the following JSON format: {youtube_rag_format}", | |
return_direct=True, | |
) | |