Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
|
|
3 |
import PyPDF2
|
4 |
import random
|
5 |
import pandas as pd
|
6 |
-
from io import
|
7 |
import csv
|
8 |
import os
|
9 |
|
@@ -29,7 +29,9 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
|
|
29 |
|
30 |
# CSV dosyası için başlık
|
31 |
if not os.path.exists("synthetic_data.csv"):
|
32 |
-
|
|
|
|
|
33 |
|
34 |
for sentence in sentences:
|
35 |
sentence = sentence.strip()
|
@@ -55,7 +57,9 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
|
|
55 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
56 |
save_to_csv(sentence, f"Error: {e}")
|
57 |
|
58 |
-
|
|
|
|
|
59 |
|
60 |
gr.Interface(
|
61 |
fn=generate,
|
@@ -66,7 +70,7 @@ gr.Interface(
|
|
66 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
|
67 |
gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
|
68 |
],
|
69 |
-
outputs="
|
70 |
title="Synthetic Data Generation",
|
71 |
description="This tool generates synthetic data from the sentences in your PDF and saves it to a CSV file.",
|
72 |
allow_flagging="never",
|
|
|
3 |
import PyPDF2
|
4 |
import random
|
5 |
import pandas as pd
|
6 |
+
from io import BytesIO
|
7 |
import csv
|
8 |
import os
|
9 |
|
|
|
29 |
|
30 |
# CSV dosyası için başlık
|
31 |
if not os.path.exists("synthetic_data.csv"):
|
32 |
+
with open("synthetic_data.csv", mode='w', newline='', encoding='utf-8') as file:
|
33 |
+
writer = csv.writer(file)
|
34 |
+
writer.writerow(["Original Sentence", "Synthetic Data"])
|
35 |
|
36 |
for sentence in sentences:
|
37 |
sentence = sentence.strip()
|
|
|
57 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
58 |
save_to_csv(sentence, f"Error: {e}")
|
59 |
|
60 |
+
# CSV dosyasını okuyup byte olarak döndür
|
61 |
+
with open("synthetic_data.csv", "rb") as file:
|
62 |
+
return file.read(), "synthetic_data.csv"
|
63 |
|
64 |
gr.Interface(
|
65 |
fn=generate,
|
|
|
70 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
|
71 |
gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
|
72 |
],
|
73 |
+
outputs=gr.File(label="Download Synthetic Data CSV"),
|
74 |
title="Synthetic Data Generation",
|
75 |
description="This tool generates synthetic data from the sentences in your PDF and saves it to a CSV file.",
|
76 |
allow_flagging="never",
|