#%% import os # from dotenv import load_dotenv # load_dotenv('../../.env') from langchain_core.prompts import PromptTemplate from typing import List from transformers import AutoTokenizer from huggingface_hub import login import models login(os.environ['HF_TOKEN']) tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") def format_prompt(prompt) -> PromptTemplate: """this function prepares a well-formatted prompt template for interacting with a large language model, ensuring that the model has a clear role (AI assistant) and understands the user’s input. It first 1. format the input prompt by using the model specific instruction template 2. return a langchain PromptTemplate """ chat = [ {"role": "system", "content": "You are a helpful AI assistant."}, {"role": "user", "content": prompt}, ] formatted_prompt = tokenizer.apply_chat_template( chat, # chat-specific formatting template to a conversation input tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens). add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow ) return PromptTemplate.from_template(formatted_prompt) def format_chat_history(messages: List[models.Message]): # TODO: implement format_chat_history to format # the list of Message into a text of chat history. # Sort messages by timestamp using a lambda function ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False) return '\n'.join([ '[{}] {}: {}'.format( message.timestamp.strftime("%Y-%m-%d %H:%M:%S"), message.type, message.message ) for message in ordered_messages ]) def format_context(docs: List[str]): # TODO: the output of the DataIndexer.search is a list of text, # so we need to concatenate that list into a text that can fit into # the rag_prompt_formatted. Implement format_context that takes a # like of strings and returns the context as one string. return '\n\n'.join(docs) prompt = "{question}" # TODO: Create the history_prompt prompt that will capture the question and the conversation history. # The history_prompt needs a {chat_history} placeholder and a {question} placeholder. history_prompt: str = """ Given the follwoing conversation provide a helpful answer to the follow up question. Chat History: {chat_history} Follow Up Question: {question} helpful answer: """ # Create the standalone_prompt prompt that will capture the question and the chat history # to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder, standalone_prompt: str = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Chat History: {chat_history} Follow Up Input: {question} Standalone question: """ # Create the rag_prompt that will capture the context and the standalone question to generate # a final answer to the question. rag_prompt: str = """ Answer the question based only on the following context: {context} Question: {standalone_question} """ # create raw_prompt_formatted by using format_prompt #raw_prompt_formatted = format_prompt(raw_prompt) #raw_prompt = PromptTemplate.from_template(raw_prompt) # i) raw prompt raw_prompt = PromptTemplate.from_template(prompt) # ii) formatted prompt raw_prompt_formatted = format_prompt(prompt) # use format_prompt to create history_prompt_formatted history_prompt_formatted = format_prompt(history_prompt) # use format_prompt to create standalone_prompt_formatted standalone_prompt_formatted = format_prompt(standalone_prompt) # use format_prompt to create rag_prompt_formatted rag_prompt_formatted = format_prompt(rag_prompt)