Spaces:
Runtime error
Runtime error
File size: 2,759 Bytes
611aebd 9c2548e 13b16b6 611aebd 9c2548e 611aebd 9c2548e 13b16b6 9c2548e 13b16b6 9c2548e 13b16b6 9c2548e 13b16b6 9c2548e 373316d 9c2548e 373316d 13b16b6 9c2548e 611aebd 9c2548e 13b16b6 9c2548e 13b16b6 611aebd 9c2548e 611aebd 13b16b6 611aebd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers import SVMRetriever
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
def load_data():
# load the documents
loader = DirectoryLoader('./data', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader)
docs = loader.load()
# replace all new lines with spaces
[setattr(doc, "page_content", doc.page_content.replace("\n", " ")) for doc in docs]
print(docs)
# split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
all_splits = text_splitter.split_documents(docs)
# construct vector store
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
# https://python.langchain.com/docs/use_cases/question_answering.html#go-deeper-3
svm_retriever = SVMRetriever.from_documents(all_splits, OpenAIEmbeddings())
return svm_retriever, vectorstore
svm_retriever, vectorstore = load_data()
def process_question(question, svm_retriever=svm_retriever, vectorstore=vectorstore):
docs_svm=svm_retriever.get_relevant_documents(question)
print(len(docs_svm))
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever(), return_source_documents=True)
result = qa_chain({"query": question})
output = f"""============RESULT==============
\n
{result["result"]}
\n
============SOURCES=============
"""
# Initialize an empty list to hold the lines
lines = []
source_docs = [(x.metadata["source"], x.page_content) for x in result["source_documents"]]
for i, doc in enumerate(source_docs):
lines.append(f"* CHUNK: {i} *")
lines.append(f"original doc: {doc[0]}")
lines.append(f"{doc[1]}")
lines.append('') # for a newline between chunks
# Join the lines with a newline character to get the multi-line string
output += '\n'.join(lines)
return output
iface = gr.Interface(
fn=process_question, # the function to wrap
inputs="text", # the input type
outputs="text", # the output type
examples=[
[f"what is the process of raising an incident?"],
[f"What is Cx0 program management?"],
[
f"What is process for identifying risksthat can impact the desired outcomes of a project?"
],
[f"What is the release management process?"],
],
)
if __name__ == "__main__":
iface.launch()
|