Spaces:
Runtime error
Runtime error
import gradio as gr | |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.retrievers import SVMRetriever | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
def load_data(): | |
# load the documents | |
loader = DirectoryLoader('./data', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader) | |
docs = loader.load() | |
# replace all new lines with spaces | |
[setattr(doc, "page_content", doc.page_content.replace("\n", " ")) for doc in docs] | |
print(docs) | |
# split the documents into chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50) | |
all_splits = text_splitter.split_documents(docs) | |
# construct vector store | |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) | |
# https://python.langchain.com/docs/use_cases/question_answering.html#go-deeper-3 | |
svm_retriever = SVMRetriever.from_documents(all_splits, OpenAIEmbeddings()) | |
return svm_retriever, vectorstore | |
svm_retriever, vectorstore = load_data() | |
def process_question(question, history, svm_retriever=svm_retriever, vectorstore=vectorstore): | |
docs_svm=svm_retriever.get_relevant_documents(question) | |
print(len(docs_svm)) | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) | |
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever(), return_source_documents=True) | |
result = qa_chain({"query": question}) | |
output = f"""============RESULT============== | |
\n | |
{result["result"]} | |
\n | |
============SOURCES============= | |
""" | |
# Initialize an empty list to hold the lines | |
lines = [] | |
source_docs = [(x.metadata["source"], x.page_content) for x in result["source_documents"]] | |
for i, doc in enumerate(source_docs): | |
lines.append(f"* CHUNK: {i} *") | |
lines.append(f"original doc: {doc[0]}") | |
lines.append(f"{doc[1]}") | |
lines.append('') # for a newline between chunks | |
# Join the lines with a newline character to get the multi-line string | |
output += '\n'.join(lines) | |
return output | |
iface = gr.ChatInterface( | |
title="Internal DOC QA", | |
theme=gr.themes.Soft, | |
fn=process_question, # the function to wrap | |
# inputs="text", # the input type | |
# outputs="text", # the output type | |
examples=[ | |
[f"what is the process of raising an incident?"], | |
[f"What is Cx0 program management?"], | |
[ | |
f"What is process for identifying risksthat can impact the desired outcomes of a project?" | |
], | |
[f"What is the release management process?"], | |
], | |
) | |
if __name__ == "__main__": | |
iface.launch() | |