import os
import traceback
import re
import gradio as gr
import math
from ...config import Config
from ...utils.prompter import Prompter
from .data_processing import get_data_from_input
def refresh_preview(
template,
load_dataset_from,
dataset_from_data_dir,
dataset_text,
dataset_text_format,
dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator,
max_preview_count,
):
try:
prompter = Prompter(template)
variable_names = prompter.get_variable_names()
data = get_data_from_input(
load_dataset_from=load_dataset_from,
dataset_text=dataset_text,
dataset_text_format=dataset_text_format,
dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator=dataset_plain_text_data_separator,
dataset_from_data_dir=dataset_from_data_dir,
prompter=prompter
)
train_data = prompter.get_train_data_from_dataset(
data, max_preview_count)
train_data = train_data[:max_preview_count]
data_count = len(data)
headers = ['Prompt', 'Completion']
preview_data = [
[item.get("prompt", ""), item.get("completion", "")]
for item in train_data
]
if not prompter.template_module:
variable_names = prompter.get_variable_names()
headers += [f"Variable: {variable_name}" for variable_name in variable_names]
variables = [
[item.get(f"_var_{name}", "") for name in variable_names]
for item in train_data
]
preview_data = [d + v for d, v in zip(preview_data, variables)]
preview_info_message = f"The dataset has about {data_count} item(s)."
if data_count > max_preview_count:
preview_info_message += f" Previewing the first {max_preview_count}."
info_message = f"about {data_count} item(s)."
if load_dataset_from == "Data Dir":
info_message = "This dataset contains about " + info_message
update_message = gr.Markdown.update(info_message, visible=True)
return (
gr.Dataframe.update(
value={'data': preview_data, 'headers': headers}),
gr.Markdown.update(preview_info_message),
update_message,
update_message
)
except Exception as e:
update_message = gr.Markdown.update(
f"Error: {e}.",
visible=True)
return (
gr.Dataframe.update(value={'data': [], 'headers': []}),
gr.Markdown.update(
"Set the dataset in the \"Prepare\" tab, then preview it here."),
update_message,
update_message
)
def refresh_dataset_items_count(
template,
load_dataset_from,
dataset_from_data_dir,
dataset_text,
dataset_text_format,
dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator,
max_preview_count,
):
try:
prompter = Prompter(template)
data = get_data_from_input(
load_dataset_from=load_dataset_from,
dataset_text=dataset_text,
dataset_text_format=dataset_text_format,
dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator=dataset_plain_text_data_separator,
dataset_from_data_dir=dataset_from_data_dir,
prompter=prompter
)
train_data = prompter.get_train_data_from_dataset(
data)
data_count = len(train_data)
preview_info_message = f"The dataset contains {data_count} item(s)."
if data_count > max_preview_count:
preview_info_message += f" Previewing the first {max_preview_count}."
info_message = f"{data_count} item(s)."
if load_dataset_from == "Data Dir":
info_message = "This dataset contains " + info_message
update_message = gr.Markdown.update(info_message, visible=True)
return (
gr.Markdown.update(preview_info_message),
update_message,
update_message,
gr.Slider.update(maximum=math.floor(data_count / 2))
)
except Exception as e:
update_message = gr.Markdown.update(
f"Error: {e}.",
visible=True)
trace = traceback.format_exc()
traces = [s.strip() for s in re.split("\n * File ", trace)]
traces_to_show = [s for s in traces if os.path.join(
Config.data_dir, "templates") in s]
traces_to_show = [re.sub(" *\n *", ": ", s) for s in traces_to_show]
if len(traces_to_show) > 0:
update_message = gr.Markdown.update(
f"Error: {e} ({','.join(traces_to_show)}).",
visible=True)
return (
gr.Markdown.update(
"Set the dataset in the \"Prepare\" tab, then preview it here."),
update_message,
update_message,
gr.Slider.update(maximum=1)
)