Last commit not found
"""Module containing the AlpacaQAPromptTokenizingStrategy class""" | |
from typing import Tuple | |
from axolotl.prompt_tokenizers import ( | |
AlpacaPromptTokenizingStrategy, | |
InstructionPromptTokenizingStrategy, | |
) | |
from axolotl.prompters import AlpacaPrompter, PromptStyle | |
def load(tokenizer, cfg): | |
return AlpacaPromptTokenizingStrategy( | |
AlpacaPrompter(PromptStyle.CHAT.value), | |
tokenizer, | |
cfg.train_on_inputs, | |
cfg.sequence_len, | |
) | |
class AlpacaConcisePrompter(AlpacaPrompter): | |
""" | |
Alpaca Prompter extending the system prompt to ask for concise answers | |
""" | |
system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that concisely and appropriately completes the request.\n\n" | |
system_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately and concisely completes the request.\n\n" | |
class AlpacaQAPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): | |
""" | |
Tokenizing strategy for AlpacaQA | |
""" | |
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: | |
return ( | |
prompt["question"], | |
"", | |
prompt["answer"], | |
) | |
class CamelAIPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): | |
""" | |
Tokenizing strategy for CamelAI datasets | |
""" | |
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: | |
return ( | |
prompt["message_1"], | |
"", | |
prompt["message_2"], | |
) | |
def load_concise(tokenizer, cfg): | |
return AlpacaPromptTokenizingStrategy( | |
AlpacaConcisePrompter(PromptStyle.CHAT.value), | |
tokenizer, | |
cfg.train_on_inputs, | |
cfg.sequence_len, | |
) | |
def load_qa(tokenizer, cfg): | |
return AlpacaQAPromptTokenizingStrategy( | |
AlpacaPrompter(PromptStyle.CHAT.value), | |
tokenizer, | |
cfg.train_on_inputs, | |
cfg.sequence_len, | |
) | |
def load_camel_ai(tokenizer, cfg): | |
return CamelAIPromptTokenizingStrategy( | |
AlpacaPrompter(PromptStyle.CHAT.value), | |
tokenizer, | |
cfg.train_on_inputs, | |
cfg.sequence_len, | |
) | |