Spaces:
Sleeping
Sleeping
File size: 4,014 Bytes
5a2b2d3 78c941e 5a2b2d3 14d48df 6f96ca2 14d48df 6f96ca2 14d48df 5a2b2d3 78c941e 5a2b2d3 8dbef5d 5a2b2d3 14d48df 5a2b2d3 14d48df 5a2b2d3 8dbef5d 5a2b2d3 8dbef5d 5a2b2d3 8dbef5d 5a2b2d3 8dbef5d 5a2b2d3 8dbef5d 14d48df 5a2b2d3 8dbef5d 6f96ca2 14d48df 8dbef5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
#%%
import os
# from dotenv import load_dotenv
# load_dotenv('../../.env')
from langchain_core.prompts import PromptTemplate
from typing import List
from transformers import AutoTokenizer
from huggingface_hub import login
import models
login(os.environ['HF_TOKEN'])
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
def format_prompt(prompt) -> PromptTemplate:
"""this function prepares a well-formatted prompt template for interacting with a
large language model, ensuring that the model has a clear role (AI assistant)
and understands the user’s input.
It first 1. format the input prompt by using the model specific instruction template
2. return a langchain PromptTemplate
"""
chat = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": prompt},
]
formatted_prompt = tokenizer.apply_chat_template(
chat, # chat-specific formatting template to a conversation input
tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens).
add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow
)
return PromptTemplate.from_template(formatted_prompt)
def format_chat_history(messages: List[models.Message]):
# TODO: implement format_chat_history to format
# the list of Message into a text of chat history.
# Sort messages by timestamp using a lambda function
ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False)
return '\n'.join([
'[{}] {}: {}'.format(
message.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
message.type,
message.message
) for message in ordered_messages
])
def format_context(docs: List[str]):
# TODO: the output of the DataIndexer.search is a list of text,
# so we need to concatenate that list into a text that can fit into
# the rag_prompt_formatted. Implement format_context that takes a
# like of strings and returns the context as one string.
return '\n\n'.join(docs)
prompt = "{question}"
# TODO: Create the history_prompt prompt that will capture the question and the conversation history.
# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
history_prompt: str = """
Given the follwoing conversation provide a helpful answer to the follow up question.
Chat History:
{chat_history}
Follow Up Question: {question}
helpful answer:
"""
# Create the standalone_prompt prompt that will capture the question and the chat history
# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
standalone_prompt: str = """
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
# Create the rag_prompt that will capture the context and the standalone question to generate
# a final answer to the question.
rag_prompt: str = """
Answer the question based only on the following context:
{context}
Question: {standalone_question}
"""
# create raw_prompt_formatted by using format_prompt
#raw_prompt_formatted = format_prompt(raw_prompt)
#raw_prompt = PromptTemplate.from_template(raw_prompt)
# i) raw prompt
raw_prompt = PromptTemplate.from_template(prompt)
# ii) formatted prompt
raw_prompt_formatted = format_prompt(prompt)
# use format_prompt to create history_prompt_formatted
history_prompt_formatted = format_prompt(history_prompt)
# use format_prompt to create standalone_prompt_formatted
standalone_prompt_formatted = format_prompt(standalone_prompt)
# use format_prompt to create rag_prompt_formatted
rag_prompt_formatted = format_prompt(rag_prompt) |