Spaces:
Sleeping
Sleeping
#%% | |
import os | |
# from dotenv import load_dotenv | |
# load_dotenv('../../.env') | |
from langchain_core.prompts import PromptTemplate | |
from typing import List | |
from transformers import AutoTokenizer | |
from huggingface_hub import login | |
import models | |
login(os.environ['HF_TOKEN']) | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") | |
def format_prompt(prompt) -> PromptTemplate: | |
"""this function prepares a well-formatted prompt template for interacting with a | |
large language model, ensuring that the model has a clear role (AI assistant) | |
and understands the user’s input. | |
It first 1. format the input prompt by using the model specific instruction template | |
2. return a langchain PromptTemplate | |
""" | |
chat = [ | |
{"role": "system", "content": "You are a helpful AI assistant."}, | |
{"role": "user", "content": prompt}, | |
] | |
formatted_prompt = tokenizer.apply_chat_template( | |
chat, # chat-specific formatting template to a conversation input | |
tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens). | |
add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow | |
) | |
return PromptTemplate.from_template(formatted_prompt) | |
def format_chat_history(messages: List[models.Message]): | |
# TODO: implement format_chat_history to format | |
# the list of Message into a text of chat history. | |
# Sort messages by timestamp using a lambda function | |
ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False) | |
return '\n'.join([ | |
'[{}] {}: {}'.format( | |
message.timestamp.strftime("%Y-%m-%d %H:%M:%S"), | |
message.type, | |
message.message | |
) for message in ordered_messages | |
]) | |
def format_context(docs: List[str]): | |
# TODO: the output of the DataIndexer.search is a list of text, | |
# so we need to concatenate that list into a text that can fit into | |
# the rag_prompt_formatted. Implement format_context that takes a | |
# like of strings and returns the context as one string. | |
return '\n\n'.join(docs) | |
prompt = "{question}" | |
# TODO: Create the history_prompt prompt that will capture the question and the conversation history. | |
# The history_prompt needs a {chat_history} placeholder and a {question} placeholder. | |
history_prompt: str = """ | |
Given the follwoing conversation provide a helpful answer to the follow up question. | |
Chat History: | |
{chat_history} | |
Follow Up Question: {question} | |
helpful answer: | |
""" | |
# Create the standalone_prompt prompt that will capture the question and the chat history | |
# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder, | |
standalone_prompt: str = """ | |
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. | |
Chat History: | |
{chat_history} | |
Follow Up Input: {question} | |
Standalone question: | |
""" | |
# Create the rag_prompt that will capture the context and the standalone question to generate | |
# a final answer to the question. | |
rag_prompt: str = """ | |
Answer the question based only on the following context: | |
{context} | |
Question: {standalone_question} | |
""" | |
# create raw_prompt_formatted by using format_prompt | |
#raw_prompt_formatted = format_prompt(raw_prompt) | |
#raw_prompt = PromptTemplate.from_template(raw_prompt) | |
# i) raw prompt | |
raw_prompt = PromptTemplate.from_template(prompt) | |
# ii) formatted prompt | |
raw_prompt_formatted = format_prompt(prompt) | |
# use format_prompt to create history_prompt_formatted | |
history_prompt_formatted = format_prompt(history_prompt) | |
# use format_prompt to create standalone_prompt_formatted | |
standalone_prompt_formatted = format_prompt(standalone_prompt) | |
# use format_prompt to create rag_prompt_formatted | |
rag_prompt_formatted = format_prompt(rag_prompt) |