Spaces:
Running
Running
from typing import Dict, List, Tuple | |
from langchain import OpenAI, PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain.chains.base import Chain | |
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.prompts import FewShotPromptTemplate | |
from langchain.vectorstores import FAISS | |
from pydantic import BaseModel | |
class CustomChain(Chain, BaseModel): | |
vstore: FAISS | |
chain: BaseCombineDocumentsChain | |
key_word_extractor: Chain | |
def input_keys(self) -> List[str]: | |
return ["question"] | |
def output_keys(self) -> List[str]: | |
return ["answer", "sources"] | |
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: | |
question = inputs["question"] | |
chat_history_str = _get_chat_history(inputs["chat_history"]) | |
if chat_history_str: | |
new_question = self.key_word_extractor.run( | |
question=question, chat_history=chat_history_str | |
) | |
else: | |
new_question = question | |
docs = self.vstore.similarity_search(new_question, k=4) | |
new_inputs = inputs.copy() | |
new_inputs["question"] = new_question | |
new_inputs["chat_history"] = chat_history_str | |
answer, _ = self.chain.combine_docs(docs, **new_inputs) | |
sources = [] | |
if "SOURCES:" in answer: | |
answer, sources = answer.split("SOURCES:") | |
sources = sources.split(", ") | |
return {"answer": answer.strip(), "sources": sources} | |
def get_chain(vectorstore: FAISS) -> Chain: | |
_eg_template = """## Example: | |
Chat History: | |
{chat_history} | |
Follow Up question: {question} | |
Standalone question: {answer}""" | |
_eg_prompt = PromptTemplate( | |
template=_eg_template, | |
input_variables=["chat_history", "question", "answer"], | |
) | |
_prefix = """Given the following Chat History and a Follow Up Question, rephrase the Follow Up Question to be a new Standalone Question that takes the Chat History and context in to consideration. You should assume that the question is related to the TokCast podcast.""" | |
_suffix = """## Example: | |
Chat History: | |
{chat_history} | |
Follow Up Input: {question} | |
Standalone question:""" | |
examples = [ | |
{ | |
"question": "Who is that?", | |
"chat_history": "Human: What is the TokCast podcast?\nAssistant: TokCast is a podcast about the philosophy of David Deutsch.", | |
"answer": "Who is David Deutsch?", | |
}, | |
{ | |
"question": "What is the worldview presented here?", | |
"chat_history": "Human: What is the TokCast podcast?\nAssistant: TokCast is a podcast about the philosophy of David Deutsch.\nHuman: Who is that?\nAssistant: David Deutsch is a philosopher, physicist, and author. He is the author of The Beginning of Infinity, Fabric of Reality, and one of the pioneers of the field of quantum computing.", | |
"answer": "What is David Deutsch's worldview?", | |
}, | |
] | |
prompt = FewShotPromptTemplate( | |
prefix=_prefix, | |
suffix=_suffix, | |
# example_selector=example_selector, | |
examples=examples, | |
example_prompt=_eg_prompt, | |
input_variables=["question", "chat_history"], | |
) | |
llm = OpenAI(temperature=0, model_name="text-davinci-003") | |
key_word_extractor = LLMChain(llm=llm, prompt=prompt) | |
EXAMPLE_PROMPT = PromptTemplate( | |
template="CONTENT:\n{page_content}\n----------\nSOURCE:\n{source}\n", | |
input_variables=["page_content", "source"], | |
) | |
template = """You are an AI assistant for the TokCast Podcast. You're trained on all the transcripts of the podcast. | |
Given a QUESTION and a series one or more CONTENT and SOURCE sections from a long document provide a conversational answer as "ANSWER" and a "SOURCES" output which lists verbatim the SOURCEs used in generating the response. | |
You should only use SOURCEs that are explicitly listed as a SOURCE in the context. | |
ALWAYS include the "SOURCES" as part of the response. If you don't have any sources, just say "SOURCES:" | |
If you don't know the answer, just say "I'm not sure. Check out Brett's Channel" Don't try to make up an answer. | |
QUESTION: {question} | |
========= | |
{context} | |
========= | |
ANSWER:""" | |
PROMPT = PromptTemplate(template=template, input_variables=["question", "context"]) | |
doc_chain = load_qa_chain( | |
OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1), | |
chain_type="stuff", | |
prompt=PROMPT, | |
document_prompt=EXAMPLE_PROMPT, | |
) | |
return CustomChain( | |
chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor | |
) | |
def _get_chat_history(chat_history: List[Tuple[str, str]]): | |
buffer = "" | |
for human_s, ai_s in chat_history: | |
human = "Human: " + human_s | |
ai = "Assistant: " + ai_s | |
buffer += "\n" + "\n".join([human, ai]) | |
return buffer | |