lhoestq HF staff commited on
Commit
d615ba4
·
1 Parent(s): de383a5
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -365,7 +365,7 @@ with gr.Blocks(css=css) as demo:
365
  continue
366
  break
367
  # for debugging
368
- # with open(f"output{indices_to_generate[0]}.txt", "w") as f:
369
  # f.write(generated_text)
370
 
371
 
@@ -399,9 +399,9 @@ with gr.Blocks(css=css) as demo:
399
 
400
  def parse_csv_df(csv: str, csv_header: Optional[str] = None) -> pd.DataFrame:
401
  # Fix generation mistake when providing a list that is not in quotes
402
- for match in re.finditer(r'\[("[\w ]+"[, ]?)+\]', csv):
403
  span = match.string[match.start() : match.end()]
404
- csv = csv.replace(span, '"' + span.replace('"', "'") + '"')
405
  # Add header if missing
406
  if csv_header and csv.strip().split("\n")[0] != csv_header:
407
  csv = csv_header + "\n" + csv
@@ -539,6 +539,8 @@ with gr.Blocks(css=css) as demo:
539
 
540
  @generate_full_dataset_button.click(inputs=[dataset_title, dataset_content, search_bar], outputs=[dataset_dataframe, generate_full_dataset_button, save_dataset_button])
541
  def generate_full_dataset(title, content, search_query):
 
 
542
  csv_header, preview_df = parse_preview_df(content)
543
  # Remove dummy "id" columns
544
  for column_name, values in preview_df.to_dict(orient="series").items():
 
365
  continue
366
  break
367
  # for debugging
368
+ # with open(f".output{indices_to_generate[0]}.txt", "w") as f:
369
  # f.write(generated_text)
370
 
371
 
 
399
 
400
  def parse_csv_df(csv: str, csv_header: Optional[str] = None) -> pd.DataFrame:
401
  # Fix generation mistake when providing a list that is not in quotes
402
+ for match in re.finditer(r'''(?!")\[(["'][\w ]+["'][, ]*)+\](?!")''', csv):
403
  span = match.string[match.start() : match.end()]
404
+ csv = csv.replace(span, '"' + span.replace('"', "'") + '"', 1)
405
  # Add header if missing
406
  if csv_header and csv.strip().split("\n")[0] != csv_header:
407
  csv = csv_header + "\n" + csv
 
539
 
540
  @generate_full_dataset_button.click(inputs=[dataset_title, dataset_content, search_bar], outputs=[dataset_dataframe, generate_full_dataset_button, save_dataset_button])
541
  def generate_full_dataset(title, content, search_query):
542
+ dataset_name, tags = title.strip("# ").split("\ntags:", 1)
543
+ dataset_name, tags = dataset_name.strip(), tags.strip()
544
  csv_header, preview_df = parse_preview_df(content)
545
  # Remove dummy "id" columns
546
  for column_name, values in preview_df.to_dict(orient="series").items():