Spaces:

jet-taekyo
/

AIE4_MVP

Build error

File size: 5,812 Bytes

### Import Section ###
import os
import re
import chainlit as cl
from langchain.storage import LocalFileStore 
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, Runnable, RunnableParallel
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from chainlit.types import AskFileResponse
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
from langchain.embeddings import CacheBackedEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain.schema import StrOutputParser
from langchain_core.documents import Document
from typing import cast
from dotenv import load_dotenv
import tempfile

### Emvironment Variables ###
load_dotenv('.env')

### Global Section ###
VECTOR_STORE_CACHE = LocalFileStore(root_path = "VECTOR_STORE_CACHE")
E2E_CACHE = LocalFileStore(root_path = "E2E_CACHE")

#😉 helper functions
def clean_text(text: str) -> str:    
    return re.sub(r'[^a-zA-Z0-9]', '', text)

def caching_rag_respnse(question: str, answer:str):    
    E2E_CACHE.mset( [(clean_text(question), answer.encode('utf-8'))]  )

def load_cached_response(input) :
    question = clean_text(input['question']) 
    cached_answer = E2E_CACHE.mget([question])[0]
    return cached_answer.decode('utf-8') if cached_answer else False


#😉 prompt
RAG_SYSTEM_MSG_TEMPLATE = """\
You are a helpful assistant that uses the provided context to answer questions. If Context does not coantain any information to answer Question, just say "I don't know".

Question:
{question}
Context:
{context}
"""
RAG_PROMPT = ChatPromptTemplate([('human', RAG_SYSTEM_MSG_TEMPLATE)])


#😉 retriever
async def get_retriever(file: AskFileResponse):

    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.pdf') as temp_file:
        temp_file_path = temp_file.name
    with open(temp_file_path, 'wb') as f:
        f.write(file.content)
    documents = PyMuPDFLoader(temp_file_path).load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = await text_splitter.atransform_documents(documents)


    client = QdrantClient(":memory:")
    core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
        underlying_embeddings = core_embeddings, 
        document_embedding_cache = VECTOR_STORE_CACHE, 
        namespace=core_embeddings.model 
    )

    
    collection_name = f"pdf_to_parse_{clean_text(file.name)}"
    if collection_name not in (x.name for x in client.get_collections().collections):    
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
        )    
        vectorstore = QdrantVectorStore(
            client=client,
            collection_name=collection_name,
            embedding=cached_embedder
            )    
        vectorstore.add_documents(chunks)
        already_exist = False
    else:
        vectorstore = QdrantVectorStore(
            client=client,
            collection_name=collection_name,
            embedding=cached_embedder
            )    
        already_exist = True
    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
    return retriever, already_exist


def get_rag(retriever):
    chat_model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
    rag_chain =  RunnableParallel(
        context = retriever,
        question = lambda x: x 
    )| RAG_PROMPT | chat_model | StrOutputParser()
    rag_chain = rag_chain.with_config({'run_name':'RAG'})

    return rag_chain





### On Chat Start (Session Start) Section ###
@cl.on_chat_start
async def on_chat_start():
    """ SESSION SPECIFIC CODE HERE """
    files = None

    # Wait for the user to upload a file
    while files == None:
        files = await cl.AskFileMessage(
            content="Hello!! I'm Jet! Please upload a Pdf File file to begin!",
            accept=["application/pdf"],  
            max_size_mb=10,
            timeout=180,
        ).send()


    file = files[0] 
    msg = cl.Message(content=f"Processing `{file.name}`...", disable_human_feedback=True)
    await msg.send()

    
    # get rag chain
    retriever, already_exist = await get_retriever(file)
    # retriever, already_exist = await get_retriever(file.name.split('pdf')[0], chunks)
    rag_chain = get_rag(retriever)

    # Let the user know that the system is ready
    if not already_exist:
        msg.content = f"Processing `{file.name}` done. You can now ask questions!"
    else:
        msg.content = f"VectorStore already exist. You can now ask questions!" 
    await msg.update()

    cl.user_session.set("chain", rag_chain)




### Rename Chains ###
@cl.author_rename
def rename(orig_author: str):
    """ RENAME CODE HERE """
    rename_dict = {"Assistant": "Jet"}
    return rename_dict.get(orig_author, orig_author)


### On Message Section ###
@cl.on_message
async def main(message):
    """
    MESSAGE CODE HERE
    """

    cached_answer = load_cached_response({'question':message.content})
    if cached_answer:
        msg = cl.Message(content=cached_answer)
        await msg.send()
    else:
        chain = cast(Runnable, cl.user_session.get("chain")) 

        msg = cl.Message(content="")
        async for stream_resp in chain.astream(message.content):
            await msg.stream_token(stream_resp)

        caching_rag_respnse(question=message.content, answer=msg.content)

        await msg.send()