Spaces:
Runtime error
Runtime error
File size: 5,674 Bytes
a5e11b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import os
import traceback
import re
import gradio as gr
import math
from ...config import Config
from ...utils.prompter import Prompter
from .data_processing import get_data_from_input
def refresh_preview(
template,
load_dataset_from,
dataset_from_data_dir,
dataset_text,
dataset_text_format,
dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator,
max_preview_count,
):
try:
prompter = Prompter(template)
variable_names = prompter.get_variable_names()
data = get_data_from_input(
load_dataset_from=load_dataset_from,
dataset_text=dataset_text,
dataset_text_format=dataset_text_format,
dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator=dataset_plain_text_data_separator,
dataset_from_data_dir=dataset_from_data_dir,
prompter=prompter
)
train_data = prompter.get_train_data_from_dataset(
data, max_preview_count)
train_data = train_data[:max_preview_count]
data_count = len(data)
headers = ['Prompt', 'Completion']
preview_data = [
[item.get("prompt", ""), item.get("completion", "")]
for item in train_data
]
if not prompter.template_module:
variable_names = prompter.get_variable_names()
headers += [f"Variable: {variable_name}" for variable_name in variable_names]
variables = [
[item.get(f"_var_{name}", "") for name in variable_names]
for item in train_data
]
preview_data = [d + v for d, v in zip(preview_data, variables)]
preview_info_message = f"The dataset has about {data_count} item(s)."
if data_count > max_preview_count:
preview_info_message += f" Previewing the first {max_preview_count}."
info_message = f"about {data_count} item(s)."
if load_dataset_from == "Data Dir":
info_message = "This dataset contains about " + info_message
update_message = gr.Markdown.update(info_message, visible=True)
return (
gr.Dataframe.update(
value={'data': preview_data, 'headers': headers}),
gr.Markdown.update(preview_info_message),
update_message,
update_message
)
except Exception as e:
update_message = gr.Markdown.update(
f"<span class=\"finetune_dataset_error_message\">Error: {e}.</span>",
visible=True)
return (
gr.Dataframe.update(value={'data': [], 'headers': []}),
gr.Markdown.update(
"Set the dataset in the \"Prepare\" tab, then preview it here."),
update_message,
update_message
)
def refresh_dataset_items_count(
template,
load_dataset_from,
dataset_from_data_dir,
dataset_text,
dataset_text_format,
dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator,
max_preview_count,
):
try:
prompter = Prompter(template)
data = get_data_from_input(
load_dataset_from=load_dataset_from,
dataset_text=dataset_text,
dataset_text_format=dataset_text_format,
dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
dataset_plain_text_data_separator=dataset_plain_text_data_separator,
dataset_from_data_dir=dataset_from_data_dir,
prompter=prompter
)
train_data = prompter.get_train_data_from_dataset(
data)
data_count = len(train_data)
preview_info_message = f"The dataset contains {data_count} item(s)."
if data_count > max_preview_count:
preview_info_message += f" Previewing the first {max_preview_count}."
info_message = f"{data_count} item(s)."
if load_dataset_from == "Data Dir":
info_message = "This dataset contains " + info_message
update_message = gr.Markdown.update(info_message, visible=True)
return (
gr.Markdown.update(preview_info_message),
update_message,
update_message,
gr.Slider.update(maximum=math.floor(data_count / 2))
)
except Exception as e:
update_message = gr.Markdown.update(
f"<span class=\"finetune_dataset_error_message\">Error: {e}.</span>",
visible=True)
trace = traceback.format_exc()
traces = [s.strip() for s in re.split("\n * File ", trace)]
traces_to_show = [s for s in traces if os.path.join(
Config.data_dir, "templates") in s]
traces_to_show = [re.sub(" *\n *", ": ", s) for s in traces_to_show]
if len(traces_to_show) > 0:
update_message = gr.Markdown.update(
f"<span class=\"finetune_dataset_error_message\">Error: {e} ({','.join(traces_to_show)}).</span>",
visible=True)
return (
gr.Markdown.update(
"Set the dataset in the \"Prepare\" tab, then preview it here."),
update_message,
update_message,
gr.Slider.update(maximum=1)
)
|