ramalMr commited on
Commit
5ff454a
·
verified ·
1 Parent(s): aebf89a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -31,34 +31,29 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
31
  "seed": 42,
32
  }
33
 
34
- all_outputs = []
35
  for sentence in sentences:
36
  try:
37
- stream = client.text_generation(f"{prompt} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
38
  output = ""
39
  for response in stream:
40
  output += response.token.text
41
- all_outputs.append(output)
42
  data.append({"original_sentence": sentence, "generated_data": output})
43
  except Exception as e:
44
  print(f"Error generating data for sentence '{sentence}': {e}")
45
- all_outputs.append("")
46
- data.append({"original_sentence": sentence, "generated_data": ""})
47
 
48
  filename = "synthetic_data.json"
49
  save_to_json(data, filename)
50
 
51
- with open("model_outputs.txt", "w", encoding="utf-8") as f:
52
- for output in all_outputs:
53
- f.write(output + "\n")
54
-
55
  return filename
56
 
57
  def save_to_json(data, filename):
58
  json_data = []
59
  for item in data:
60
- generated_sentences = re.findall(r"{'generated_sentence': '(.+?)'", item['generated_data'])
61
- confidence_scores = [0.9] * len(generated_sentences) # Varsayılan güven skoru
 
 
 
62
  json_data.append({
63
  'original_sentence': item['original_sentence'],
64
  'generated_sentences': generated_sentences,
 
31
  "seed": 42,
32
  }
33
 
 
34
  for sentence in sentences:
35
  try:
36
+ stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
37
  output = ""
38
  for response in stream:
39
  output += response.token.text
 
40
  data.append({"original_sentence": sentence, "generated_data": output})
41
  except Exception as e:
42
  print(f"Error generating data for sentence '{sentence}': {e}")
 
 
43
 
44
  filename = "synthetic_data.json"
45
  save_to_json(data, filename)
46
 
 
 
 
 
47
  return filename
48
 
49
  def save_to_json(data, filename):
50
  json_data = []
51
  for item in data:
52
+ generated_sentences = []
53
+ confidence_scores = []
54
+ for match in re.finditer(r"{'generated_sentence': '(.+?)', 'confidence_score': ([\d\.]+)}", item['generated_data']):
55
+ generated_sentences.append(match.group(1))
56
+ confidence_scores.append(float(match.group(2)))
57
  json_data.append({
58
  'original_sentence': item['original_sentence'],
59
  'generated_sentences': generated_sentences,