prepr / app.py
MikeCraBash's picture
new
48ee1eb
raw
history blame
6.65 kB
# AI MAKERSPACE PREPR
# Date: 2024-5-16
# Basic Imports & Setup
import os
from openai import AsyncOpenAI
# Using Chainlit for our UI
import chainlit as cl
from chainlit.prompt import Prompt, PromptMessage
from chainlit.playground.providers import ChatOpenAI
# Getting the API key from the .env file
from dotenv import load_dotenv
load_dotenv()
# RAG pipeline imports and setup code
# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
from langchain.document_loaders import PyMuPDFLoader
# Adjust the URL to the direct download format
file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
# Now load the document using the direct URL
docs = PyMuPDFLoader(direct_url).load()
import tiktoken
def tiktoken_len(text):
tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
text,
)
return len(tokens)
# Split the document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 500, # 500 tokens per chunk, experiment with this value
chunk_overlap = 50, # 50 tokens overlap between chunks, experiment with this value
length_function = tiktoken_len,
)
split_chunks = text_splitter.split_documents(docs)
# Load the embeddings model
from langchain_openai.embeddings import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
# Load the vector store and retriever from Qdrant
from langchain_community.vectorstores import Qdrant
qdrant_vectorstore = Qdrant.from_documents(
split_chunks,
embedding_model,
location=":memory:",
collection_name="Prepr",
)
qdrant_retriever = qdrant_vectorstore.as_retriever()
from langchain_openai import ChatOpenAI
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
from langchain_core.prompts import ChatPromptTemplate
RAG_PROMPT = """
CONTEXT:
{context}
QUERY:
{question}
You are a personal assistant for a professional. Your tone is professional and considerate. Before proceeding to answer about which conference sessions the user should attend, be sure to ask them what key topics they are hoping to learn from the conference, and if there are any specific sessions they are keen on attending. Use the provided context to answer the user's query. You are a professional personal assistant for an executive professional in a high tech company. You help them plan for events and meetings. You always review the provided event information. You can look up dates and location where event sessions take place from the document. If you do not know the answer, or cannot answer, please respond with "Insufficient data for further analysis, please try again".
### Examples:
Example 1:
CONTEXT:
- The conference focuses on AI, machine learning, cloud computing, and cybersecurity.
- The user is interested in sessions related to AI and machine learning.
QUERY:
What sessions should I attend?
Response:
To determine the best sessions for you, could you please specify the key topics you are hoping to learn from the conference? Are there any specific sessions you are keen on attending?
Example 2:
CONTEXT:
- The conference includes various tracks on software development, DevOps, and data science.
- The user is a software developer interested in the latest trends in DevOps.
QUERY:
What sessions are best for me?
Response:
Based on your interest in DevOps, here are some sessions you might find valuable:
- **Session Title:** Turbocharged CI/CD Pipelines: Unleashing DevOps Excellence
**Speaker:** Prashant Patil
**Company:** DevOps Experts Inc.
**Topic:** CI/CD best practices and tools
**AI Industry Relevance:** Streamlining development workflows with AI
**Details of their work in AI:** Focuses on integrating AI for predictive analysis in CI/CD pipelines
**Main Point Likely to be Made:** Enhancing productivity through automated pipelines
**Questions to Ask the Speaker:**
1. What are the key metrics for measuring CI/CD performance improvements?
2. How can AI be integrated into existing CI/CD workflows?
3. What are common pitfalls to avoid when implementing CI/CD pipelines?
Example 3:
CONTEXT:
- The conference covers a wide range of topics, including contextualization in AI.
QUERY:
What sessions should I attend?
Response:
Could you please specify what key topics you are hoping to learn from the conference? Are there any specific sessions you are keen on attending?
QUERY:
I am interested in contextualization.
Response:
There is a session on contextualization on Friday, with Dr. TBA. Here are the details:
- **Session Title:** Advanced Contextualization in AI
**Speaker:** Dr. TBA
**Company:** Context AI Research Lab
**Topic:** Deep dive into AI contextualization techniques
**AI Industry Relevance:** Enhancing AI understanding and relevance
**Details of their work in AI:** Focus on contextual algorithms and their applications
**Main Point Likely to be Made:** Improving AI contextual understanding for better user interactions
**Questions to Ask the Speaker:**
1. What are the latest advancements in AI contextualization?
2. How can contextualization improve AI decision-making processes?
3. What are the challenges in implementing contextualization techniques in AI systems?
### End of Examples
Is there anything else that I can help you with?
"""
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
retrieval_augmented_qa_chain = (
{"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)
# Chainlit App
@cl.on_chat_start
async def start_chat():
settings = {
"model": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 500,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
}
cl.user_session.set("settings", settings)
@cl.on_message
async def main(message: cl.Message):
chainlit_question = message.content
#chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
chainlit_answer = response["response"].content
msg = cl.Message(content=chainlit_answer)
await msg.send()