Spaces:

MikeCraBash
/

prepr

Runtime error

App Files Files Community

prepr / app.py

MikeCraBash

new

48ee1eb 6 months ago

raw

history blame

6.65 kB

	# AI MAKERSPACE PREPR
	# Date: 2024-5-16

	# Basic Imports & Setup
	import os
	from openai import AsyncOpenAI

	# Using Chainlit for our UI
	import chainlit as cl
	from chainlit.prompt import Prompt, PromptMessage
	from chainlit.playground.providers import ChatOpenAI

	# Getting the API key from the .env file
	from dotenv import load_dotenv
	load_dotenv()

	# RAG pipeline imports and setup code
	# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
	from langchain.document_loaders import PyMuPDFLoader

	# Adjust the URL to the direct download format
	file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
	direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"

	# Now load the document using the direct URL
	docs = PyMuPDFLoader(direct_url).load()

	import tiktoken
	def tiktoken_len(text):
	tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
	text,
	)
	return len(tokens)

	# Split the document into chunks
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size = 500, # 500 tokens per chunk, experiment with this value
	chunk_overlap = 50, # 50 tokens overlap between chunks, experiment with this value
	length_function = tiktoken_len,
	)

	split_chunks = text_splitter.split_documents(docs)

	# Load the embeddings model
	from langchain_openai.embeddings import OpenAIEmbeddings

	embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

	# Load the vector store and retriever from Qdrant
	from langchain_community.vectorstores import Qdrant

	qdrant_vectorstore = Qdrant.from_documents(
	split_chunks,
	embedding_model,
	location=":memory:",
	collection_name="Prepr",
	)

	qdrant_retriever = qdrant_vectorstore.as_retriever()

	from langchain_openai import ChatOpenAI
	openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")

	from langchain_core.prompts import ChatPromptTemplate

	RAG_PROMPT = """
	CONTEXT:
	{context}

	QUERY:
	{question}

	You are a personal assistant for a professional. Your tone is professional and considerate. Before proceeding to answer about which conference sessions the user should attend, be sure to ask them what key topics they are hoping to learn from the conference, and if there are any specific sessions they are keen on attending. Use the provided context to answer the user's query. You are a professional personal assistant for an executive professional in a high tech company. You help them plan for events and meetings. You always review the provided event information. You can look up dates and location where event sessions take place from the document. If you do not know the answer, or cannot answer, please respond with "Insufficient data for further analysis, please try again".

	### Examples:

	Example 1:
	CONTEXT:
	- The conference focuses on AI, machine learning, cloud computing, and cybersecurity.
	- The user is interested in sessions related to AI and machine learning.

	QUERY:
	What sessions should I attend?

	Response:
	To determine the best sessions for you, could you please specify the key topics you are hoping to learn from the conference? Are there any specific sessions you are keen on attending?

	Example 2:
	CONTEXT:
	- The conference includes various tracks on software development, DevOps, and data science.
	- The user is a software developer interested in the latest trends in DevOps.

	QUERY:
	What sessions are best for me?

	Response:
	Based on your interest in DevOps, here are some sessions you might find valuable:
	- Session Title: Turbocharged CI/CD Pipelines: Unleashing DevOps Excellence
	Speaker: Prashant Patil
	Company: DevOps Experts Inc.
	Topic: CI/CD best practices and tools
	AI Industry Relevance: Streamlining development workflows with AI
	Details of their work in AI: Focuses on integrating AI for predictive analysis in CI/CD pipelines
	Main Point Likely to be Made: Enhancing productivity through automated pipelines
	Questions to Ask the Speaker:
	1. What are the key metrics for measuring CI/CD performance improvements?
	2. How can AI be integrated into existing CI/CD workflows?
	3. What are common pitfalls to avoid when implementing CI/CD pipelines?

	Example 3:
	CONTEXT:
	- The conference covers a wide range of topics, including contextualization in AI.

	QUERY:
	What sessions should I attend?

	Response:
	Could you please specify what key topics you are hoping to learn from the conference? Are there any specific sessions you are keen on attending?

	QUERY:
	I am interested in contextualization.

	Response:
	There is a session on contextualization on Friday, with Dr. TBA. Here are the details:
	- Session Title: Advanced Contextualization in AI
	Speaker: Dr. TBA
	Company: Context AI Research Lab
	Topic: Deep dive into AI contextualization techniques
	AI Industry Relevance: Enhancing AI understanding and relevance
	Details of their work in AI: Focus on contextual algorithms and their applications
	Main Point Likely to be Made: Improving AI contextual understanding for better user interactions
	Questions to Ask the Speaker:
	1. What are the latest advancements in AI contextualization?
	2. How can contextualization improve AI decision-making processes?
	3. What are the challenges in implementing contextualization techniques in AI systems?

	### End of Examples

	Is there anything else that I can help you with?
	"""
	rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

	from operator import itemgetter
	from langchain.schema.output_parser import StrOutputParser
	from langchain.schema.runnable import RunnablePassthrough

	retrieval_augmented_qa_chain = (
	{"context": itemgetter("question") \| qdrant_retriever, "question": itemgetter("question")}
	\| RunnablePassthrough.assign(context=itemgetter("context"))
	\| {"response": rag_prompt \| openai_chat_model, "context": itemgetter("context")}
	)

	# Chainlit App
	@cl.on_chat_start
	async def start_chat():
	settings = {
	"model": "gpt-3.5-turbo",
	"temperature": 0,
	"max_tokens": 500,
	"top_p": 1,
	"frequency_penalty": 0,
	"presence_penalty": 0,
	}
	cl.user_session.set("settings", settings)

	@cl.on_message
	async def main(message: cl.Message):
	chainlit_question = message.content
	#chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
	response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
	chainlit_answer = response["response"].content

	msg = cl.Message(content=chainlit_answer)
	await msg.send()