Spaces:
Sleeping
Sleeping
""" | |
This is main logic file for the project responsible for the following: | |
1. Read the loaded file using langchains | |
2. Split the loaded data into chunks | |
3. Ingest the data in vector form | |
4. Conversational Retrieval logic on loaded data create conversational response | |
5. Return the response to the user (Output) | |
""" | |
#Importing the required libraries | |
import os | |
import openai | |
import sys | |
sys.path.append('../..') #To import the langchain package from the parent directory | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | |
from langchain.vectorstores import DocArrayInMemorySearch | |
from langchain.document_loaders import TextLoader | |
from langchain.chains import RetrievalQA, ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.llms import OpenAI | |
from langchain.memory import ConversationBufferMemory | |
from langchain.vectorstores import DocArrayInMemorySearch | |
import datetime | |
from langchain.prompts import PromptTemplate | |
#Function to load the data from the file | |
def load_data(file_path): | |
loader = PyPDFLoader(file_path) | |
pages = loader.load() | |
return pages | |
#Function to split the data into chunks | |
def split_data(data): | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=150, | |
) | |
chunks = splitter.split_documents(data) | |
return chunks | |
# #Creating the OpenAI Embeddings | |
# embeddings = OpenAIEmbeddings() | |
#Function to ingest the data in vector form in data memory | |
def ingest_data(chunks, embeddings): | |
vector_store = DocArrayInMemorySearch.from_documents(chunks, embeddings) | |
return vector_store | |
#Function to create the conversational response | |
def create_conversational_response(vector_store, chain_type, k): | |
#Creating the retriever, this can also be a contextual compressed retriever | |
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k}) #search_type can be "similarity" or "mmr" | |
#Creating Memory | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
input_key="question", | |
output_key="answer", | |
return_messages=True) | |
#Creating LLM | |
llm_name = "gpt-3.5-turbo" | |
llm = ChatOpenAI(model=llm_name, temperature=0.5) | |
# Creating Prompt template | |
template = """ | |
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
PROMPT = PromptTemplate(input_variables=["context", "question"], template=template,) | |
#creating the conversational retrieval chain | |
chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
chain_type=chain_type, #chain type can be refine, stuff, map_reduce | |
retriever=retriever, | |
memory=memory, | |
return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work | |
combine_docs_chain_kwargs=dict({"prompt": PROMPT}) | |
) | |
return chain | |
# ConversationalResponse Class to call all the defined functions in a single call | |
class ConversationalResponse: | |
def __init__(self, file, api_key): | |
self.file = file | |
embeddings = OpenAIEmbeddings(openai_api_key=api_key) | |
self.data = load_data(self.file) | |
self.chunks = split_data(self.data) | |
self.vector_store = ingest_data(self.chunks, embeddings) | |
self.chain_type = "stuff" | |
self.k = 5 | |
self.chain = create_conversational_response(self.vector_store, self.chain_type, self.k) | |
def __call__(self, question, callbacks=None): | |
response = self.chain(question, callbacks=callbacks) | |
return response['answer'] |