ramalMr commited on
Commit
7b026a2
·
verified ·
1 Parent(s): 9cbb806

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -25
app.py CHANGED
@@ -20,37 +20,42 @@ def generate_synthetic_data(file, temperature, max_new_tokens, top_p, repetition
20
  sentences = extract_sentences_from_excel(file)
21
  random.shuffle(sentences)
22
 
23
- with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
24
- fieldnames = ['Original Sentence', 'Synthetic Data']
25
- writer = csv.DictWriter(tmp, fieldnames=fieldnames)
26
- writer.writeheader()
 
 
27
 
28
- for sentence in sentences:
29
- sentence = sentence.strip()
30
- if not sentence:
31
- continue
 
 
 
 
32
 
33
- generate_kwargs = {
34
- "temperature": temperature,
35
- "max_new_tokens": max_new_tokens,
36
- "top_p": top_p,
37
- "repetition_penalty": repetition_penalty,
38
- "do_sample": True,
39
- "seed": 42,
40
- }
41
 
42
- try:
43
- output = client.generate(sentence, **generate_kwargs, return_full_text=True)
44
- generated_data = output.text.strip()
45
 
46
- generated_sentences = re.split(r'(?<=[\.\!\?:])[\s\n]+', generated_data)
47
- generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
48
 
49
- for generated_sentence in generated_sentences:
50
- writer.writerow({'Original Sentence': sentence, 'Synthetic Data': generated_sentence})
 
 
 
 
 
51
 
52
- except Exception as e:
53
- print(f"Error generating data for sentence '{sentence}': {e}")
54
 
55
  tmp_path = tmp.name
56
 
 
20
  sentences = extract_sentences_from_excel(file)
21
  random.shuffle(sentences)
22
 
23
+ generated_data = []
24
+
25
+ for sentence in sentences:
26
+ sentence = sentence.strip()
27
+ if not sentence:
28
+ continue
29
 
30
+ generate_kwargs = {
31
+ "temperature": temperature,
32
+ "max_new_tokens": max_new_tokens,
33
+ "top_p": top_p,
34
+ "repetition_penalty": repetition_penalty,
35
+ "do_sample": True,
36
+ "seed": 42,
37
+ }
38
 
39
+ try:
40
+ output = client.generate(sentence, **generate_kwargs, return_full_text=True)
41
+ synthetic_data = output.text.strip()
 
 
 
 
 
42
 
43
+ generated_sentences = re.split(r'(?<=[\.\!\?:])[\s\n]+', synthetic_data)
44
+ generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
 
45
 
46
+ for generated_sentence in generated_sentences:
47
+ generated_data.append({'Original Sentence': sentence, 'Synthetic Data': generated_sentence})
48
 
49
+ except Exception as e:
50
+ print(f"Error generating data for sentence '{sentence}': {e}")
51
+
52
+ with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
53
+ fieldnames = ['Original Sentence', 'Synthetic Data']
54
+ writer = csv.DictWriter(tmp, fieldnames=fieldnames)
55
+ writer.writeheader()
56
 
57
+ for data in generated_data:
58
+ writer.writerow(data)
59
 
60
  tmp_path = tmp.name
61