zetavg commited on
Commit
7f924a0
·
unverified ·
1 Parent(s): 4a6324a

show error traces on templates

Browse files
Files changed (1) hide show
  1. llama_lora/ui/finetune_ui.py +76 -7
llama_lora/ui/finetune_ui.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import json
3
  import time
 
 
4
  from datetime import datetime
5
  import gradio as gr
6
  import math
@@ -123,6 +125,9 @@ def get_data_from_input(load_dataset_from, dataset_text, dataset_text_format,
123
  return data
124
 
125
 
 
 
 
126
  def refresh_preview(
127
  template,
128
  load_dataset_from,
@@ -135,7 +140,6 @@ def refresh_preview(
135
  preview_show_actual_prompt,
136
  ):
137
  try:
138
- max_preview_count = 30
139
  prompter = Prompter(template)
140
  variable_names = prompter.get_variable_names()
141
 
@@ -174,7 +178,7 @@ def refresh_preview(
174
  if data_count > max_preview_count:
175
  preview_info_message += f" Previewing the first {max_preview_count}."
176
 
177
- info_message = f"{data_count} item(s)."
178
  if load_dataset_from == "Data Dir":
179
  info_message = "This dataset contains about " + info_message
180
  update_message = gr.Markdown.update(info_message, visible=True)
@@ -186,6 +190,62 @@ def refresh_preview(
186
  return gr.Dataframe.update(value={'data': [], 'headers': []}), gr.Markdown.update("Set the dataset in the \"Prepare\" tab, then preview it here."), update_message, update_message
187
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def parse_plain_text_input(
190
  value,
191
  variables_separator, input_output_separator, data_separator,
@@ -582,11 +642,20 @@ def finetune_ui():
582
  i.change(
583
  fn=refresh_preview,
584
  inputs=dataset_preview_inputs,
585
- outputs=[finetune_dataset_preview,
586
- finetune_dataset_preview_info_message,
587
- dataset_from_text_message,
588
- dataset_from_data_dir_message
589
- ]
 
 
 
 
 
 
 
 
 
590
  ))
591
 
592
  things_that_might_timeout.append(reload_selections_button.click(
 
1
  import os
2
  import json
3
  import time
4
+ import traceback
5
+ import re
6
  from datetime import datetime
7
  import gradio as gr
8
  import math
 
125
  return data
126
 
127
 
128
+ max_preview_count = 30
129
+
130
+
131
  def refresh_preview(
132
  template,
133
  load_dataset_from,
 
140
  preview_show_actual_prompt,
141
  ):
142
  try:
 
143
  prompter = Prompter(template)
144
  variable_names = prompter.get_variable_names()
145
 
 
178
  if data_count > max_preview_count:
179
  preview_info_message += f" Previewing the first {max_preview_count}."
180
 
181
+ info_message = f"about {data_count} item(s)."
182
  if load_dataset_from == "Data Dir":
183
  info_message = "This dataset contains about " + info_message
184
  update_message = gr.Markdown.update(info_message, visible=True)
 
190
  return gr.Dataframe.update(value={'data': [], 'headers': []}), gr.Markdown.update("Set the dataset in the \"Prepare\" tab, then preview it here."), update_message, update_message
191
 
192
 
193
+ def refresh_dataset_items_count(
194
+ template,
195
+ load_dataset_from,
196
+ dataset_from_data_dir,
197
+ dataset_text,
198
+ dataset_text_format,
199
+ dataset_plain_text_input_variables_separator,
200
+ dataset_plain_text_input_and_output_separator,
201
+ dataset_plain_text_data_separator,
202
+ preview_show_actual_prompt,
203
+ ):
204
+ try:
205
+ prompter = Prompter(template)
206
+ variable_names = prompter.get_variable_names()
207
+
208
+ data = get_data_from_input(
209
+ load_dataset_from=load_dataset_from,
210
+ dataset_text=dataset_text,
211
+ dataset_text_format=dataset_text_format,
212
+ dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
213
+ dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
214
+ dataset_plain_text_data_separator=dataset_plain_text_data_separator,
215
+ dataset_from_data_dir=dataset_from_data_dir,
216
+ prompter=prompter
217
+ )
218
+
219
+ train_data = prompter.get_train_data_from_dataset(
220
+ data)
221
+ data_count = len(train_data)
222
+
223
+ preview_info_message = f"The dataset contains {data_count} item(s)."
224
+ if data_count > max_preview_count:
225
+ preview_info_message += f" Previewing the first {max_preview_count}."
226
+
227
+ info_message = f"{data_count} item(s)."
228
+ if load_dataset_from == "Data Dir":
229
+ info_message = "This dataset contains " + info_message
230
+ update_message = gr.Markdown.update(info_message, visible=True)
231
+
232
+ return gr.Markdown.update(preview_info_message), update_message, update_message
233
+ except Exception as e:
234
+ update_message = gr.Markdown.update(
235
+ f"<span class=\"finetune_dataset_error_message\">Error: {e}.</span>", visible=True)
236
+
237
+ trace = traceback.format_exc()
238
+ traces = [s.strip() for s in re.split("\n * File ", trace)]
239
+ templates_path = os.path.join(Global.data_dir, "templates")
240
+ traces_to_show = [s for s in traces if os.path.join(Global.data_dir, "templates") in s]
241
+ traces_to_show = [re.sub(" *\n *", ": ", s) for s in traces_to_show]
242
+ if len(traces_to_show) > 0:
243
+ update_message = gr.Markdown.update(
244
+ f"<span class=\"finetune_dataset_error_message\">Error: {e} ({','.join(traces_to_show)}).</span>", visible=True)
245
+
246
+ return gr.Markdown.update("Set the dataset in the \"Prepare\" tab, then preview it here."), update_message, update_message
247
+
248
+
249
  def parse_plain_text_input(
250
  value,
251
  variables_separator, input_output_separator, data_separator,
 
642
  i.change(
643
  fn=refresh_preview,
644
  inputs=dataset_preview_inputs,
645
+ outputs=[
646
+ finetune_dataset_preview,
647
+ finetune_dataset_preview_info_message,
648
+ dataset_from_text_message,
649
+ dataset_from_data_dir_message
650
+ ]
651
+ ).then(
652
+ fn=refresh_dataset_items_count,
653
+ inputs=dataset_preview_inputs,
654
+ outputs=[
655
+ finetune_dataset_preview_info_message,
656
+ dataset_from_text_message,
657
+ dataset_from_data_dir_message
658
+ ]
659
  ))
660
 
661
  things_that_might_timeout.append(reload_selections_button.click(