import os from openai import AsyncOpenAI from RagPipeline import RetrievalAugmentedQAPipeline from typing import List from chainlit.types import AskFileResponse from chainlit.cli import run_chainlit from aimakerspace.text_utils import CharacterTextSplitter, PdfFileLoader, TextFileLoader from aimakerspace.openai_utils.prompts import ( UserRolePrompt, SystemRolePrompt, AssistantRolePrompt, ) from aimakerspace.openai_utils.embedding import EmbeddingModel from aimakerspace.vectordatabase import VectorDatabase, VectorDatabaseOptions from aimakerspace.openai_utils.chatmodel import ChatOpenAI import chainlit as cl from langchain_text_splitters import RecursiveCharacterTextSplitter # Instrument the OpenAI client # cl.instrument_openai() ##### Prompt Templates ##### system_template = """\ Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.""" user_prompt_template = """\ Context: {context} Question: {question} """ system_role_prompt = SystemRolePrompt(system_template) user_role_prompt = UserRolePrompt(user_prompt_template) ### Text Chunking ### # text_splitter = CharacterTextSplitter() text_splitter = RecursiveCharacterTextSplitter( separators=[ "\n\n", "\n", " ", ".", ",", "\u200b", # Zero-width space "\uff0c", # Fullwidth comma "\u3001", # Ideographic comma "\uff0e", # Fullwidth full stop "\u3002", # Ideographic full stop "", ], ) def process_text_file(file: AskFileResponse) -> List[str]: import tempfile with tempfile.NamedTemporaryFile( mode="wb", delete=False, suffix=".txt" ) as temp_file: temp_file_path = temp_file.name temp_file.write(file.content) text_loader = TextFileLoader(temp_file_path) documents = text_loader.load_documents() texts = [] for doc in documents: texts += text_splitter.split_text(doc) return texts def process_pdf_file(file: AskFileResponse) -> List[str]: import tempfile with tempfile.NamedTemporaryFile( mode="wb", delete=False, suffix=".pdf" ) as temp_file: temp_file_path = temp_file.name temp_file.write(file.content) pdf_loader = PdfFileLoader(temp_file_path) texts = pdf_loader.load_documents() # Also handles splitting the text in this case pages return texts async def send_new_message(content, elemets=None): msg = cl.Message(content,elements=elemets) await msg.send() return msg @cl.on_chat_start async def on_chat_start(): print("On Chat Start") # await send_new_message("Welcome to the Chat with Files app!") msg = cl.Message(content="Welcome to the Chat with Files app!") await msg.send() print("After First message") files = None # Wait for the user to upload a file while files == None: files = await cl.AskFileMessage( content="Please upload a text file to begin!", accept=["text/plain", "application/pdf"], max_size_mb=10, max_files=4, timeout=180, ).send() texts : List[str] = [] for file in files: if file.type == "application/pdf": texts += process_pdf_file(file) if file.type == "text/plain": texts += process_text_file(file) # await send_new_message(content=f"Processing `{file.name}`...") msg = cl.Message(content=f"Processing `{file.name}`...") await msg.send() print(f"Processing {len(texts)} text chunks") # Create a dict vector store vector_db_options =VectorDatabaseOptions.QDRANT embedding_model = EmbeddingModel(embeddings_model_name= "text-embedding-3-small",dimensions=1000) vector_db = VectorDatabase(vector_db_options,embedding_model) vector_db = await vector_db.abuild_from_list(texts) chat_openai = ChatOpenAI() # Create a chain retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(system_role_prompt, user_role_prompt, vector_db_retriever=vector_db, llm=chat_openai ) # Let the user know that the system is ready msg = cl.Message(content=f"Processing `{file.name}` done. You can now ask questions!") await msg.send() cl.user_session.set("chain", retrieval_augmented_qa_pipeline) @cl.on_message async def main(message: cl.Message): msg = cl.Message(content="on message") await msg.send() chain :RetrievalAugmentedQAPipeline = cl.user_session.get("chain") msg = cl.Message(content="") result = await chain.arun_pipeline(message.content) async for stream_resp in result.get('response'): await msg.stream_token(stream_resp) await msg.send() cl.user_session.set("chain", chain) if __name__ == "__main__": run_chainlit(__file__)