Spaces:

vishwask
/

rag

Sleeping

rag

File size: 10,197 Bytes

import gradio as gr
import os

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings 
from langchain.llms import HuggingFacePipeline
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFaceHub

from pathlib import Path
import chromadb

from transformers import AutoTokenizer
import transformers
import torch
import tqdm 
import accelerate


# default_persist_directory = './chroma_HF/'

list_llm = ["mistralai/Mistral-7B-Instruct-v0.2"]
list_llm_simple = [os.path.basename(llm) for llm in list_llm]

# Load PDF document and create doc splits
def load_doc(list_file_path, chunk_size, chunk_overlap):
    # Processing for one document only
    # loader = PyPDFLoader(file_path)
    # pages = loader.load()
    loaders = [PyPDFLoader(x) for x in list_file_path]
    pages = []
    for loader in loaders:
        pages.extend(loader.load())
    # text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = chunk_size, 
        chunk_overlap = chunk_overlap)
    doc_splits = text_splitter.split_documents(pages)
    return doc_splits

# Create vector database
def create_db(splits, collection_name):
    embedding = HuggingFaceEmbeddings()
    new_client = chromadb.EphemeralClient()
    vectordb = Chroma.from_documents(
        documents=splits,
        embedding=embedding,
        client=new_client,
        collection_name=collection_name,
        # persist_directory=default_persist_directory
    )
    return vectordb


# Load vector database
def load_db():
    embedding = HuggingFaceEmbeddings()
    vectordb = Chroma(
        # persist_directory=default_persist_directory, 
        embedding_function=embedding)
    return vectordb


# Initialize langchain LLM chain
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
    progress(0.1, desc="Initializing HF tokenizer...")
    
    # HuggingFaceHub uses HF inference endpoints
    progress(0.5, desc="Initializing HF Hub...")
    
    # Use of trust_remote_code as model_kwargs
    # Warning: langchain issue
    # URL: https://github.com/langchain-ai/langchain/issues/6080
    if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
        llm = HuggingFaceHub(
            repo_id=llm_model, 
            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
        )
    elif llm_model == "microsoft/phi-2":
        raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
        llm = HuggingFaceHub(
            repo_id=llm_model, 
            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
        )
    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
        llm = HuggingFaceHub(
            repo_id=llm_model, 
            model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
        )
    elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
        raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
        llm = HuggingFaceHub(
            repo_id=llm_model, 
            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
        )
    else:
        llm = HuggingFaceHub(
            repo_id=llm_model, 
            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
        )
    
    progress(0.75, desc="Defining buffer memory...")
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key='answer',
        return_messages=True
    )
    # retriever=vector_db.as_retriever(search_type="similarity", search_kwargs={'k': 3})
    retriever=vector_db.as_retriever()
    progress(0.8, desc="Defining retrieval chain...")
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=retriever,
        chain_type="stuff", 
        memory=memory,
        # combine_docs_chain_kwargs={"prompt": your_prompt})
        return_source_documents=True,
        #return_generated_question=False,
        verbose=False,
    )
    progress(0.9, desc="Done!")
    return qa_chain

def start(llm_model, temperature, max_tokens, top_k, 
          vector_db, list_file_obj, chunk_size, chunk_overlap,
         qa_chain, message, history):
    # HuggingFaceHub uses HF inference endpoints
    # Use of trust_remote_code as model_kwargs
    # Warning: langchain issue
    # URL: https://github.com/langchain-ai/langchain/issues/6080
    llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature": temperature, 
                                                          "max_new_tokens": max_tokens, 
                                                          "top_k": top_k, 
                                                          "load_in_8bit": True})
    memory = ConversationBufferMemory(memory_key="chat_history",output_key='answer',return_messages=True)

    retriever=vector_db.as_retriever()
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=retriever,
        chain_type="stuff", 
        memory=memory,
        # combine_docs_chain_kwargs={"prompt": your_prompt})
        return_source_documents=True,
        #return_generated_question=False,
        verbose=False,
    )

    # Create list of documents (when valid)
    list_file_path = [x.name for x in list_file_obj if x is not None]
    
    # Create collection_name for vector database
    collection_name = Path(list_file_path[0]).stem
    
    # Fix potential issues from naming convention
    ## Remove space
    collection_name = collection_name.replace(" ","-") 
    ## Limit lenght to 50 characters
    collection_name = collection_name[:50]
    ## Enforce start and end as alphanumeric character
    if not collection_name[0].isalnum():
        collection_name[0] = 'A'
    if not collection_name[-1].isalnum():
        collection_name[-1] = 'Z'
    # print('list_file_path: ', list_file_path)
    print('Collection name: ', collection_name)

    # Load document and create splits
    doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
    
    # Create or load vector database
    vector_db = create_db(doc_splits, collection_name)

    formatted_chat_history = format_chat_history(message, history)
    #print("formatted_chat_history",formatted_chat_history)
   
    # Generate response using QA chain
    response = qa_chain({"question": message, "chat_history": formatted_chat_history})
    response_answer = response["answer"]
    if response_answer.find("Helpful Answer:") != -1:
        response_answer = response_answer.split("Helpful Answer:")[-1]
    response_sources = response["source_documents"]
    response_source1 = response_sources[0].page_content.strip()
    response_source2 = response_sources[1].page_content.strip()
    response_source3 = response_sources[2].page_content.strip()
    # Langchain sources are zero-based
    response_source1_page = response_sources[0].metadata["page"] + 1
    response_source2_page = response_sources[1].metadata["page"] + 1
    response_source3_page = response_sources[2].metadata["page"] + 1
    # print ('chat response: ', response_answer)
    # print('DB source', response_sources)
    
    # Append user message and response to chat history
    new_history = history + [(message, response_answer)]
    
    return qa_chain, vector_db, collection_name, new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
    
def demo():
    with gr.Blocks(theme="base") as demo:
        vector_db = gr.State()
        qa_chain = gr.State()
        collection_name = gr.State()

        chatbot = gr.Chatbot(height=300)
        with gr.Accordion("Advanced - Document references", open=False):
            with gr.Row():
                doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
                source1_page = gr.Number(label="Page", scale=1)
            with gr.Row():
                doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
                source2_page = gr.Number(label="Page", scale=1)
            with gr.Row():
                doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
                source3_page = gr.Number(label="Page", scale=1)
        with gr.Row():
            msg = gr.Textbox(placeholder="Type message", container=True)
        with gr.Row():
            submit_btn = gr.Button("Submit")
            clear_btn = gr.ClearButton([msg, chatbot])

        msg.submit(start, 
                   inputs=[llm_model, temperature, max_tokens, top_k, 
                           vector_db, list_file_obj, chunk_size, chunk_overlap,
                           qa_chain, message, history], 
                   outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, 
                            doc_source2, source2_page, 
                            doc_source3, source3_page], 
                   queue=False)
        submit_btn.click(conversation, \
            inputs=[qa_chain, msg, chatbot], \
            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
            queue=False)
        clear_btn.click(lambda:[None,"",0,"",0,"",0], \
            inputs=None, \
            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
            queue=False)
    
    demo.queue().launch(debug=True)

if __name__ == "__main__":
    demo()