Spaces:

bachephysicdun
/

Backend

Sleeping

App Files Files Community

Backend / app /prompts.py

bachephysicdun

implemented rag and filtered_rag

8dbef5d 2 months ago

raw

history blame contribute delete

4.01 kB

	#%%
	import os
	# from dotenv import load_dotenv
	# load_dotenv('../../.env')

	from langchain_core.prompts import PromptTemplate
	from typing import List

	from transformers import AutoTokenizer
	from huggingface_hub import login

	import models


	login(os.environ['HF_TOKEN'])
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

	def format_prompt(prompt) -> PromptTemplate:
	"""this function prepares a well-formatted prompt template for interacting with a
	large language model, ensuring that the model has a clear role (AI assistant)
	and understands the user’s input.
	It first 1. format the input prompt by using the model specific instruction template
	2. return a langchain PromptTemplate
	"""
	chat = [
	{"role": "system", "content": "You are a helpful AI assistant."},
	{"role": "user", "content": prompt},
	]
	formatted_prompt = tokenizer.apply_chat_template(
	chat, # chat-specific formatting template to a conversation input
	tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens).
	add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow
	)
	return PromptTemplate.from_template(formatted_prompt)

	def format_chat_history(messages: List[models.Message]):
	# TODO: implement format_chat_history to format
	# the list of Message into a text of chat history.

	# Sort messages by timestamp using a lambda function
	ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False)
	return '\n'.join([
	'[{}] {}: {}'.format(
	message.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
	message.type,
	message.message
	) for message in ordered_messages
	])


	def format_context(docs: List[str]):
	# TODO: the output of the DataIndexer.search is a list of text,
	# so we need to concatenate that list into a text that can fit into
	# the rag_prompt_formatted. Implement format_context that takes a
	# like of strings and returns the context as one string.
	return '\n\n'.join(docs)

	prompt = "{question}"

	# TODO: Create the history_prompt prompt that will capture the question and the conversation history.
	# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
	history_prompt: str = """
	Given the follwoing conversation provide a helpful answer to the follow up question.

	Chat History:
	{chat_history}

	Follow Up Question: {question}

	helpful answer:
	"""

	# Create the standalone_prompt prompt that will capture the question and the chat history
	# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
	standalone_prompt: str = """
	Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

	Chat History:
	{chat_history}

	Follow Up Input: {question}

	Standalone question:
	"""

	# Create the rag_prompt that will capture the context and the standalone question to generate
	# a final answer to the question.
	rag_prompt: str = """
	Answer the question based only on the following context:
	{context}

	Question: {standalone_question}
	"""

	# create raw_prompt_formatted by using format_prompt
	#raw_prompt_formatted = format_prompt(raw_prompt)
	#raw_prompt = PromptTemplate.from_template(raw_prompt)

	# i) raw prompt
	raw_prompt = PromptTemplate.from_template(prompt)

	# ii) formatted prompt
	raw_prompt_formatted = format_prompt(prompt)


	# use format_prompt to create history_prompt_formatted
	history_prompt_formatted = format_prompt(history_prompt)

	# use format_prompt to create standalone_prompt_formatted
	standalone_prompt_formatted = format_prompt(standalone_prompt)

	# use format_prompt to create rag_prompt_formatted
	rag_prompt_formatted = format_prompt(rag_prompt)