PythonicRAG / app.py
jeevan
updated
637aeec
import os
from openai import AsyncOpenAI
from RagPipeline import RetrievalAugmentedQAPipeline
from typing import List
from chainlit.types import AskFileResponse
from chainlit.cli import run_chainlit
from aimakerspace.text_utils import CharacterTextSplitter, PdfFileLoader, TextFileLoader
from aimakerspace.openai_utils.prompts import (
UserRolePrompt,
SystemRolePrompt,
AssistantRolePrompt,
)
from aimakerspace.openai_utils.embedding import EmbeddingModel
from aimakerspace.vectordatabase import VectorDatabase, VectorDatabaseOptions
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
import chainlit as cl
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Instrument the OpenAI client
# cl.instrument_openai()
##### Prompt Templates #####
system_template = """\
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
user_prompt_template = """\
Context:
{context}
Question:
{question}
"""
system_role_prompt = SystemRolePrompt(system_template)
user_role_prompt = UserRolePrompt(user_prompt_template)
### Text Chunking ###
# text_splitter = CharacterTextSplitter()
text_splitter = RecursiveCharacterTextSplitter(
separators=[
"\n\n",
"\n",
" ",
".",
",",
"\u200b", # Zero-width space
"\uff0c", # Fullwidth comma
"\u3001", # Ideographic comma
"\uff0e", # Fullwidth full stop
"\u3002", # Ideographic full stop
"",
],
)
def process_text_file(file: AskFileResponse) -> List[str]:
import tempfile
with tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".txt"
) as temp_file:
temp_file_path = temp_file.name
temp_file.write(file.content)
text_loader = TextFileLoader(temp_file_path)
documents = text_loader.load_documents()
texts = []
for doc in documents:
texts += text_splitter.split_text(doc)
return texts
def process_pdf_file(file: AskFileResponse) -> List[str]:
import tempfile
with tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".pdf"
) as temp_file:
temp_file_path = temp_file.name
temp_file.write(file.content)
pdf_loader = PdfFileLoader(temp_file_path)
texts = pdf_loader.load_documents() # Also handles splitting the text in this case pages
return texts
async def send_new_message(content, elemets=None):
msg = cl.Message(content,elements=elemets)
await msg.send()
return msg
@cl.on_chat_start
async def on_chat_start():
print("On Chat Start")
# await send_new_message("Welcome to the Chat with Files app!")
msg = cl.Message(content="Welcome to the Chat with Files app!")
await msg.send()
print("After First message")
files = None
# Wait for the user to upload a file
while files == None:
files = await cl.AskFileMessage(
content="Please upload a text file to begin!",
accept=["text/plain", "application/pdf"],
max_size_mb=10,
max_files=4,
timeout=180,
).send()
texts : List[str] = []
for file in files:
if file.type == "application/pdf":
texts += process_pdf_file(file)
if file.type == "text/plain":
texts += process_text_file(file)
# await send_new_message(content=f"Processing `{file.name}`...")
msg = cl.Message(content=f"Processing `{file.name}`...")
await msg.send()
print(f"Processing {len(texts)} text chunks")
# Create a dict vector store
vector_db_options =VectorDatabaseOptions.QDRANT
embedding_model = EmbeddingModel(embeddings_model_name= "text-embedding-3-small",dimensions=1000)
vector_db = VectorDatabase(vector_db_options,embedding_model)
vector_db = await vector_db.abuild_from_list(texts)
chat_openai = ChatOpenAI()
# Create a chain
retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(system_role_prompt, user_role_prompt,
vector_db_retriever=vector_db, llm=chat_openai
)
# Let the user know that the system is ready
msg = cl.Message(content=f"Processing `{file.name}` done. You can now ask questions!")
await msg.send()
cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
@cl.on_message
async def main(message: cl.Message):
msg = cl.Message(content="on message")
await msg.send()
chain :RetrievalAugmentedQAPipeline = cl.user_session.get("chain")
msg = cl.Message(content="")
result = await chain.arun_pipeline(message.content)
async for stream_resp in result.get('response'):
await msg.stream_token(stream_resp)
await msg.send()
cl.user_session.set("chain", chain)
if __name__ == "__main__":
run_chainlit(__file__)