Spaces:
Build error
Build error
import os | |
from langchain.chains.llm import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import PromptTemplate | |
from langchain.document_loaders import PDFPlumberLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain | |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain | |
os.environ['OPENAI_API_KEY'] = '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' | |
llm = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview") | |
def get_summ(path): | |
loader = PDFPlumberLoader(path) | |
docs = loader.load() | |
# Map | |
map_template = """The following is a set of documents | |
{docs} | |
Based on this list of docs, please identify the main themes and determine the genes relevant or irrelevant to the discussed disease followed by any associated p-values if available. | |
Helpful Answer:""" | |
map_prompt = PromptTemplate.from_template(map_template) | |
map_chain = LLMChain(llm=llm, prompt=map_prompt) | |
# Reduce | |
reduce_template = """The following is set of summaries: | |
{doc_summaries} | |
Take these and distill it into a final, consolidated summary of the main themes. | |
Helpful Answer:""" | |
reduce_prompt = PromptTemplate.from_template(reduce_template) | |
# Run chain | |
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt) | |
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain | |
combine_documents_chain = StuffDocumentsChain( | |
llm_chain=reduce_chain, document_variable_name="doc_summaries" | |
) | |
# Combines and iteravely reduces the mapped documents | |
reduce_documents_chain = ReduceDocumentsChain( | |
# This is final chain that is called. | |
combine_documents_chain=combine_documents_chain, | |
# If documents exceed context for `StuffDocumentsChain` | |
collapse_documents_chain=combine_documents_chain, | |
# The maximum number of tokens to group documents into. | |
token_max=100000, | |
) | |
# Combining documents by mapping a chain over them, then combining results | |
map_reduce_chain = MapReduceDocumentsChain( | |
# Map chain | |
llm_chain=map_chain, | |
# Reduce chain | |
reduce_documents_chain=reduce_documents_chain, | |
# The variable name in the llm_chain to put the documents in | |
document_variable_name="docs", | |
# Return the results of the map steps in the output | |
return_intermediate_steps=False, | |
) | |
text_splitter = CharacterTextSplitter.from_tiktoken_encoder( | |
chunk_size=100000, chunk_overlap=0 | |
) | |
split_docs = text_splitter.split_documents(docs) | |
return map_reduce_chain.run(split_docs) |