Spaces:

bachephysicdun
/

Backend

Sleeping

File size: 3,694 Bytes

#%%
import os
# from dotenv import load_dotenv
# load_dotenv('../../.env')

from langchain_core.prompts import PromptTemplate
from typing import List

from transformers import AutoTokenizer
from huggingface_hub import login

import models


login(os.environ['HF_TOKEN'])
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

def format_prompt(prompt) -> PromptTemplate:
    """this function prepares a well-formatted prompt template for interacting with a 
    large language model, ensuring that the model has a clear role (AI assistant) 
    and understands the user’s input.
    It first 1. format the input prompt by using the model specific instruction template
    2. return a langchain PromptTemplate
    """    
    chat = [
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": prompt},
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        chat, # chat-specific formatting template to a conversation input
        tokenize=False, #  indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens).
        add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow
    )
    return PromptTemplate.from_template(formatted_prompt)

def format_chat_history(messages: List[models.Message]):
    # TODO:  implement format_chat_history to format 
    # the list of Message into a text of chat history.
    
    # Sort messages by timestamp using a lambda function
    ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False)
    return '\n'.join([
        '[{}] {}: {}'.format(
            message.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            message.type,
            message.message
        ) for message in ordered_messages
    ])


def format_context(docs: List[str]):
    # TODO:  the output of the DataIndexer.search is a list of text, 
    # so we need to concatenate that list into a text that can fit into 
    # the rag_prompt_formatted. Implement format_context that takes a 
    # like of strings and returns the context as one string.
    raise NotImplemented

prompt = "{question}"

# TODO: Create the history_prompt prompt that will capture the question and the conversation history. 
# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
history_prompt: str = """
Given the follwoing conversation provide a helpful answer to the follow up question.

Chat History:
{chat_history}

Follow Up Question: {question}

helpful answer:
"""

# TODO: Create the standalone_prompt prompt that will capture the question and the chat history
# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
standalone_prompt: str = None

# TODO: Create the rag_prompt that will capture the context and the standalone question to generate
# a final answer to the question.
rag_prompt: str = None

# TODO: create raw_prompt_formatted by using format_prompt
#raw_prompt_formatted = format_prompt(raw_prompt)
#raw_prompt = PromptTemplate.from_template(raw_prompt)

# i) raw prompt
raw_prompt = PromptTemplate.from_template(prompt)

# ii) formatted prompt
raw_prompt_formatted = format_prompt(prompt)


# TODO: use format_prompt to create history_prompt_formatted
history_prompt_formatted = format_prompt(history_prompt)

# TODO: use format_prompt to create standalone_prompt_formatted
standalone_prompt_formatted: PromptTemplate = None
# TODO: use format_prompt to create rag_prompt_formatted
rag_prompt_formatted: PromptTemplate = None