Spaces:

ramalMr
/

data_gen

Sleeping

ramalMr commited on Apr 1, 2024

Commit

acf104d

verified ·

1 Parent(s): d53066f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
     sentences = text.split('.')
     random.shuffle(sentences)  # Shuffle sentences
-    # Geçici dosya oluştur ve CSV yazıcısını başlat
     with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
         writer = csv.writer(tmp)
@@ -52,7 +52,7 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
                 output = ""
                 for response in stream:
                     output += response.token.text
-                writer.writerow([sentence, output])  # Orijinal cümle ve yanıt CSV'ye yazılır
             except Exception as e:
                 print(f"Error generating data for sentence '{sentence}': {e}")
@@ -68,8 +68,8 @@ gr.Interface(
         gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
         gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
     ],
-    outputs=gr.File(label="Synthetic Data CSV"),
-    title="Synthetic Data Generation",
-    description="This tool generates synthetic data from the sentences in your PDF and saves it to a CSV file.",
     allow_flagging="never",
 ).launch()

     sentences = text.split('.')
     random.shuffle(sentences)  # Shuffle sentences
     with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
         writer = csv.writer(tmp)
                 output = ""
                 for response in stream:
                     output += response.token.text
+                writer.writerow([sentence, output])
             except Exception as e:
                 print(f"Error generating data for sentence '{sentence}': {e}")
         gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
         gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
     ],
+    outputs=gr.File(label="Synthetic Data "),
+    title="SDG",
+    description="AYE QABIL.",
     allow_flagging="never",
 ).launch()