ramalMr commited on
Commit
14ffea8
·
verified ·
1 Parent(s): fa4d0d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import PyPDF2
4
  import random
5
  import pandas as pd
6
- from io import StringIO
7
  import csv
8
  import os
9
 
@@ -29,7 +29,9 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
29
 
30
  # CSV dosyası için başlık
31
  if not os.path.exists("synthetic_data.csv"):
32
- save_to_csv("Original Sentence", "Synthetic Data")
 
 
33
 
34
  for sentence in sentences:
35
  sentence = sentence.strip()
@@ -55,7 +57,9 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
55
  print(f"Error generating data for sentence '{sentence}': {e}")
56
  save_to_csv(sentence, f"Error: {e}")
57
 
58
- return gr.File(value="synthetic_data.csv", file_name="synthetic_data.csv")
 
 
59
 
60
  gr.Interface(
61
  fn=generate,
@@ -66,7 +70,7 @@ gr.Interface(
66
  gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
67
  gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
68
  ],
69
- outputs="file",
70
  title="Synthetic Data Generation",
71
  description="This tool generates synthetic data from the sentences in your PDF and saves it to a CSV file.",
72
  allow_flagging="never",
 
3
  import PyPDF2
4
  import random
5
  import pandas as pd
6
+ from io import BytesIO
7
  import csv
8
  import os
9
 
 
29
 
30
  # CSV dosyası için başlık
31
  if not os.path.exists("synthetic_data.csv"):
32
+ with open("synthetic_data.csv", mode='w', newline='', encoding='utf-8') as file:
33
+ writer = csv.writer(file)
34
+ writer.writerow(["Original Sentence", "Synthetic Data"])
35
 
36
  for sentence in sentences:
37
  sentence = sentence.strip()
 
57
  print(f"Error generating data for sentence '{sentence}': {e}")
58
  save_to_csv(sentence, f"Error: {e}")
59
 
60
+ # CSV dosyasını okuyup byte olarak döndür
61
+ with open("synthetic_data.csv", "rb") as file:
62
+ return file.read(), "synthetic_data.csv"
63
 
64
  gr.Interface(
65
  fn=generate,
 
70
  gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
71
  gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
72
  ],
73
+ outputs=gr.File(label="Download Synthetic Data CSV"),
74
  title="Synthetic Data Generation",
75
  description="This tool generates synthetic data from the sentences in your PDF and saves it to a CSV file.",
76
  allow_flagging="never",