Update app.py
Browse files
app.py
CHANGED
@@ -31,34 +31,29 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
|
|
31 |
"seed": 42,
|
32 |
}
|
33 |
|
34 |
-
all_outputs = []
|
35 |
for sentence in sentences:
|
36 |
try:
|
37 |
-
stream = client.text_generation(f"{prompt} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
|
38 |
output = ""
|
39 |
for response in stream:
|
40 |
output += response.token.text
|
41 |
-
all_outputs.append(output)
|
42 |
data.append({"original_sentence": sentence, "generated_data": output})
|
43 |
except Exception as e:
|
44 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
45 |
-
all_outputs.append("")
|
46 |
-
data.append({"original_sentence": sentence, "generated_data": ""})
|
47 |
|
48 |
filename = "synthetic_data.json"
|
49 |
save_to_json(data, filename)
|
50 |
|
51 |
-
with open("model_outputs.txt", "w", encoding="utf-8") as f:
|
52 |
-
for output in all_outputs:
|
53 |
-
f.write(output + "\n")
|
54 |
-
|
55 |
return filename
|
56 |
|
57 |
def save_to_json(data, filename):
|
58 |
json_data = []
|
59 |
for item in data:
|
60 |
-
generated_sentences =
|
61 |
-
confidence_scores = [
|
|
|
|
|
|
|
62 |
json_data.append({
|
63 |
'original_sentence': item['original_sentence'],
|
64 |
'generated_sentences': generated_sentences,
|
|
|
31 |
"seed": 42,
|
32 |
}
|
33 |
|
|
|
34 |
for sentence in sentences:
|
35 |
try:
|
36 |
+
stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
|
37 |
output = ""
|
38 |
for response in stream:
|
39 |
output += response.token.text
|
|
|
40 |
data.append({"original_sentence": sentence, "generated_data": output})
|
41 |
except Exception as e:
|
42 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
|
|
|
|
43 |
|
44 |
filename = "synthetic_data.json"
|
45 |
save_to_json(data, filename)
|
46 |
|
|
|
|
|
|
|
|
|
47 |
return filename
|
48 |
|
49 |
def save_to_json(data, filename):
|
50 |
json_data = []
|
51 |
for item in data:
|
52 |
+
generated_sentences = []
|
53 |
+
confidence_scores = []
|
54 |
+
for match in re.finditer(r"{'generated_sentence': '(.+?)', 'confidence_score': ([\d\.]+)}", item['generated_data']):
|
55 |
+
generated_sentences.append(match.group(1))
|
56 |
+
confidence_scores.append(float(match.group(2)))
|
57 |
json_data.append({
|
58 |
'original_sentence': item['original_sentence'],
|
59 |
'generated_sentences': generated_sentences,
|