Spaces:

ramalMr
/

data_gen

Sleeping

ramalMr commited on Apr 2, 2024

Commit

d435c8a

verified ·

1 Parent(s): a1f8d56

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import json
 from huggingface_hub import InferenceClient
 import gradio as gr
 import random
 import pandas as pd
 from io import BytesIO
 import os
 import io
 import tempfile
@@ -15,7 +17,7 @@ def extract_sentences_from_excel(file):
     df = pd.read_excel(file)
     text = ' '.join(df['Unnamed: 1'].astype(str))
     sentences = text.split('.')
-    sentences = [s.strip() for s in sentences if s.strip()]
     return sentences
 def save_to_json(data, filename="synthetic_data.json"):
@@ -42,14 +44,14 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
             }
             try:
-                stream = client.text_generation(f"{prompt} Output the response in JSON format.", **generate_kwargs, stream=True, details=True, return_full_text=False)
                 output = ""
                 for response in stream:
                     output += response.token.text
                 try:
                     json_output = json.loads(output)
-                    data.append({"original_sentence": sentence, "generated_sentence": json_output})
                 except json.JSONDecodeError:
                     print(f"Error decoding JSON for sentence '{sentence}': {output}")

 import json
 from huggingface_hub import InferenceClient
 import gradio as gr
+import PyPDF2
 import random
 import pandas as pd
 from io import BytesIO
+import csv
 import os
 import io
 import tempfile
     df = pd.read_excel(file)
     text = ' '.join(df['Unnamed: 1'].astype(str))
     sentences = text.split('.')
+    sentences = [s.strip() for s in sentences if s.strip() and s.strip() != 'nan']
     return sentences
 def save_to_json(data, filename="synthetic_data.json"):
             }
             try:
+                stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}}", **generate_kwargs, stream=True, details=True, return_full_text=False)
                 output = ""
                 for response in stream:
                     output += response.token.text
                 try:
                     json_output = json.loads(output)
+                    data.append({"original_sentence": sentence, "generated_data": json_output})
                 except json.JSONDecodeError:
                     print(f"Error decoding JSON for sentence '{sentence}': {output}")