Spaces:

bachephysicdun
/

Backend

Sleeping

File size: 4,014 Bytes

5a2b2d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78c941e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a2b2d3
 
 
 
14d48df
6f96ca2
 
14d48df
 
 
 
 
6f96ca2
14d48df
5a2b2d3
78c941e
5a2b2d3
 
 
 
 
8dbef5d
5a2b2d3
14d48df
5a2b2d3
 
 
14d48df
 
 
 
 
 
 
 
 
 
5a2b2d3
8dbef5d
5a2b2d3
8dbef5d
 
5a2b2d3
8dbef5d
 
 
 
 
 
 
 
 
5a2b2d3
8dbef5d
 
 
 
 
 
5a2b2d3
8dbef5d
14d48df
 
 
 
 
 
 
 
 
5a2b2d3
8dbef5d
6f96ca2
14d48df
8dbef5d

#%%
import os
# from dotenv import load_dotenv
# load_dotenv('../../.env')

from langchain_core.prompts import PromptTemplate
from typing import List

from transformers import AutoTokenizer
from huggingface_hub import login

import models


login(os.environ['HF_TOKEN'])
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

def format_prompt(prompt) -> PromptTemplate:
    """this function prepares a well-formatted prompt template for interacting with a 
    large language model, ensuring that the model has a clear role (AI assistant) 
    and understands the user’s input.
    It first 1. format the input prompt by using the model specific instruction template
    2. return a langchain PromptTemplate
    """    
    chat = [
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": prompt},
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        chat, # chat-specific formatting template to a conversation input
        tokenize=False, #  indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens).
        add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow
    )
    return PromptTemplate.from_template(formatted_prompt)

def format_chat_history(messages: List[models.Message]):
    # TODO:  implement format_chat_history to format 
    # the list of Message into a text of chat history.
    
    # Sort messages by timestamp using a lambda function
    ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False)
    return '\n'.join([
        '[{}] {}: {}'.format(
            message.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            message.type,
            message.message
        ) for message in ordered_messages
    ])


def format_context(docs: List[str]):
    # TODO:  the output of the DataIndexer.search is a list of text, 
    # so we need to concatenate that list into a text that can fit into 
    # the rag_prompt_formatted. Implement format_context that takes a 
    # like of strings and returns the context as one string.
    return '\n\n'.join(docs)

prompt = "{question}"

# TODO: Create the history_prompt prompt that will capture the question and the conversation history. 
# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
history_prompt: str = """
Given the follwoing conversation provide a helpful answer to the follow up question.

Chat History:
{chat_history}

Follow Up Question: {question}

helpful answer:
"""

# Create the standalone_prompt prompt that will capture the question and the chat history
# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
standalone_prompt: str = """
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}

Follow Up Input: {question}

Standalone question:
"""

# Create the rag_prompt that will capture the context and the standalone question to generate
# a final answer to the question.
rag_prompt: str = """
Answer the question based only on the following context:
{context}

Question: {standalone_question}
"""

# create raw_prompt_formatted by using format_prompt
#raw_prompt_formatted = format_prompt(raw_prompt)
#raw_prompt = PromptTemplate.from_template(raw_prompt)

# i) raw prompt
raw_prompt = PromptTemplate.from_template(prompt)

# ii) formatted prompt
raw_prompt_formatted = format_prompt(prompt)


# use format_prompt to create history_prompt_formatted
history_prompt_formatted = format_prompt(history_prompt)

#  use format_prompt to create standalone_prompt_formatted
standalone_prompt_formatted = format_prompt(standalone_prompt)

# use format_prompt to create rag_prompt_formatted
rag_prompt_formatted = format_prompt(rag_prompt)