Spaces:

ramalMr
/

data_gen

Running

ramalMr commited on Apr 6, 2024

Commit

5ff454a

verified ·

1 Parent(s): aebf89a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,34 +31,29 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
         "seed": 42,
     }
-    all_outputs = []
     for sentence in sentences:
         try:
-            stream = client.text_generation(f"{prompt} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
             output = ""
             for response in stream:
                 output += response.token.text
-            all_outputs.append(output)
             data.append({"original_sentence": sentence, "generated_data": output})
         except Exception as e:
             print(f"Error generating data for sentence '{sentence}': {e}")
-            all_outputs.append("")
-            data.append({"original_sentence": sentence, "generated_data": ""})
     filename = "synthetic_data.json"
     save_to_json(data, filename)
-    with open("model_outputs.txt", "w", encoding="utf-8") as f:
-        for output in all_outputs:
-            f.write(output + "\n")
     return filename
 def save_to_json(data, filename):
     json_data = []
     for item in data:
-        generated_sentences = re.findall(r"{'generated_sentence': '(.+?)'", item['generated_data'])
-        confidence_scores = [0.9] * len(generated_sentences)  # Varsayılan güven skoru
         json_data.append({
             'original_sentence': item['original_sentence'],
             'generated_sentences': generated_sentences,

         "seed": 42,
     }
     for sentence in sentences:
         try:
+            stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
             output = ""
             for response in stream:
                 output += response.token.text
             data.append({"original_sentence": sentence, "generated_data": output})
         except Exception as e:
             print(f"Error generating data for sentence '{sentence}': {e}")
     filename = "synthetic_data.json"
     save_to_json(data, filename)
     return filename
 def save_to_json(data, filename):
     json_data = []
     for item in data:
+        generated_sentences = []
+        confidence_scores = []
+        for match in re.finditer(r"{'generated_sentence': '(.+?)', 'confidence_score': ([\d\.]+)}", item['generated_data']):
+            generated_sentences.append(match.group(1))
+            confidence_scores.append(float(match.group(2)))
         json_data.append({
             'original_sentence': item['original_sentence'],
             'generated_sentences': generated_sentences,