Backend / app /prompts.py
bachephysicdun's picture
implemented rag and filtered_rag
8dbef5d
#%%
import os
# from dotenv import load_dotenv
# load_dotenv('../../.env')
from langchain_core.prompts import PromptTemplate
from typing import List
from transformers import AutoTokenizer
from huggingface_hub import login
import models
login(os.environ['HF_TOKEN'])
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
def format_prompt(prompt) -> PromptTemplate:
"""this function prepares a well-formatted prompt template for interacting with a
large language model, ensuring that the model has a clear role (AI assistant)
and understands the user’s input.
It first 1. format the input prompt by using the model specific instruction template
2. return a langchain PromptTemplate
"""
chat = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": prompt},
]
formatted_prompt = tokenizer.apply_chat_template(
chat, # chat-specific formatting template to a conversation input
tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens).
add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow
)
return PromptTemplate.from_template(formatted_prompt)
def format_chat_history(messages: List[models.Message]):
# TODO: implement format_chat_history to format
# the list of Message into a text of chat history.
# Sort messages by timestamp using a lambda function
ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False)
return '\n'.join([
'[{}] {}: {}'.format(
message.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
message.type,
message.message
) for message in ordered_messages
])
def format_context(docs: List[str]):
# TODO: the output of the DataIndexer.search is a list of text,
# so we need to concatenate that list into a text that can fit into
# the rag_prompt_formatted. Implement format_context that takes a
# like of strings and returns the context as one string.
return '\n\n'.join(docs)
prompt = "{question}"
# TODO: Create the history_prompt prompt that will capture the question and the conversation history.
# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
history_prompt: str = """
Given the follwoing conversation provide a helpful answer to the follow up question.
Chat History:
{chat_history}
Follow Up Question: {question}
helpful answer:
"""
# Create the standalone_prompt prompt that will capture the question and the chat history
# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
standalone_prompt: str = """
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
# Create the rag_prompt that will capture the context and the standalone question to generate
# a final answer to the question.
rag_prompt: str = """
Answer the question based only on the following context:
{context}
Question: {standalone_question}
"""
# create raw_prompt_formatted by using format_prompt
#raw_prompt_formatted = format_prompt(raw_prompt)
#raw_prompt = PromptTemplate.from_template(raw_prompt)
# i) raw prompt
raw_prompt = PromptTemplate.from_template(prompt)
# ii) formatted prompt
raw_prompt_formatted = format_prompt(prompt)
# use format_prompt to create history_prompt_formatted
history_prompt_formatted = format_prompt(history_prompt)
# use format_prompt to create standalone_prompt_formatted
standalone_prompt_formatted = format_prompt(standalone_prompt)
# use format_prompt to create rag_prompt_formatted
rag_prompt_formatted = format_prompt(rag_prompt)