Spaces:

andrewgleave
/

tokbot

Running

tokbot / chain.py

59b8bcd about 2 years ago

5 kB

	from typing import Dict, List, Tuple

	from langchain import OpenAI, PromptTemplate
	from langchain.chains import LLMChain
	from langchain.chains.base import Chain
	from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
	from langchain.chains.question_answering import load_qa_chain
	from langchain.prompts import FewShotPromptTemplate
	from langchain.vectorstores import FAISS

	from pydantic import BaseModel


	class CustomChain(Chain, BaseModel):

	vstore: FAISS
	chain: BaseCombineDocumentsChain
	key_word_extractor: Chain

	@property
	def input_keys(self) -> List[str]:
	return ["question"]

	@property
	def output_keys(self) -> List[str]:
	return ["answer", "sources"]

	def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
	question = inputs["question"]
	chat_history_str = _get_chat_history(inputs["chat_history"])
	if chat_history_str:
	new_question = self.key_word_extractor.run(
	question=question, chat_history=chat_history_str
	)
	else:
	new_question = question
	docs = self.vstore.similarity_search(new_question, k=4)
	new_inputs = inputs.copy()
	new_inputs["question"] = new_question
	new_inputs["chat_history"] = chat_history_str
	answer, _ = self.chain.combine_docs(docs, **new_inputs)
	sources = []
	if "SOURCES:" in answer:
	answer, sources = answer.split("SOURCES:")
	sources = sources.split(", ")
	return {"answer": answer.strip(), "sources": sources}


	def get_chain(vectorstore: FAISS) -> Chain:
	_eg_template = """## Example:

	Chat History:
	{chat_history}
	Follow Up question: {question}
	Standalone question: {answer}"""
	_eg_prompt = PromptTemplate(
	template=_eg_template,
	input_variables=["chat_history", "question", "answer"],
	)

	_prefix = """Given the following Chat History and a Follow Up Question, rephrase the Follow Up Question to be a new Standalone Question that takes the Chat History and context in to consideration. You should assume that the question is related to the TokCast podcast."""
	_suffix = """## Example:

	Chat History:
	{chat_history}
	Follow Up Input: {question}
	Standalone question:"""

	examples = [
	{
	"question": "Who is that?",
	"chat_history": "Human: What is the TokCast podcast?\nAssistant: TokCast is a podcast about the philosophy of David Deutsch.",
	"answer": "Who is David Deutsch?",
	},
	{
	"question": "What is the worldview presented here?",
	"chat_history": "Human: What is the TokCast podcast?\nAssistant: TokCast is a podcast about the philosophy of David Deutsch.\nHuman: Who is that?\nAssistant: David Deutsch is a philosopher, physicist, and author. He is the author of The Beginning of Infinity, Fabric of Reality, and one of the pioneers of the field of quantum computing.",
	"answer": "What is David Deutsch's worldview?",
	},
	]
	prompt = FewShotPromptTemplate(
	prefix=_prefix,
	suffix=_suffix,
	# example_selector=example_selector,
	examples=examples,
	example_prompt=_eg_prompt,
	input_variables=["question", "chat_history"],
	)
	llm = OpenAI(temperature=0, model_name="text-davinci-003")
	key_word_extractor = LLMChain(llm=llm, prompt=prompt)

	EXAMPLE_PROMPT = PromptTemplate(
	template="CONTENT:\n{page_content}\n----------\nSOURCE:\n{source}\n",
	input_variables=["page_content", "source"],
	)
	template = """You are an AI assistant for the TokCast Podcast. You're trained on all the transcripts of the podcast.
	Given a QUESTION and a series one or more CONTENT and SOURCE sections from a long document provide a conversational answer as "ANSWER" and a "SOURCES" output which lists verbatim the SOURCEs used in generating the response.
	You should only use SOURCEs that are explicitly listed as a SOURCE in the context.
	ALWAYS include the "SOURCES" as part of the response. If you don't have any sources, just say "SOURCES:"
	If you don't know the answer, just say "I'm not sure. Check out Brett's Channel" Don't try to make up an answer.
	QUESTION: {question}
	=========
	{context}
	=========
	ANSWER:"""
	PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
	doc_chain = load_qa_chain(
	OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1),
	chain_type="stuff",
	prompt=PROMPT,
	document_prompt=EXAMPLE_PROMPT,
	)
	return CustomChain(
	chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor
	)


	def _get_chat_history(chat_history: List[Tuple[str, str]]):
	buffer = ""
	for human_s, ai_s in chat_history:
	human = "Human: " + human_s
	ai = "Assistant: " + ai_s
	buffer += "\n" + "\n".join([human, ai])
	return buffer