ramalMr commited on
Commit
3c1274a
·
verified ·
1 Parent(s): 1fd65af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -18,6 +18,11 @@ def extract_text_from_pdf(file):
18
  def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repetition_penalty):
19
  synthetic_data = []
20
  for sentence in sentences:
 
 
 
 
 
21
  generate_kwargs = {
22
  "temperature": temperature,
23
  "max_new_tokens": max_new_tokens,
@@ -26,12 +31,17 @@ def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repet
26
  "do_sample": True,
27
  "seed": 42,
28
  }
29
- formatted_prompt = sentence # Using the sentence directly as the prompt
30
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
31
- output = ""
32
- for response in stream:
33
- output += response.token.text
34
- synthetic_data.append(output)
 
 
 
 
 
35
  return synthetic_data
36
 
37
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
 
18
  def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repetition_penalty):
19
  synthetic_data = []
20
  for sentence in sentences:
21
+ # Trim whitespace and skip if the sentence is empty
22
+ sentence = sentence.strip()
23
+ if not sentence:
24
+ continue
25
+
26
  generate_kwargs = {
27
  "temperature": temperature,
28
  "max_new_tokens": max_new_tokens,
 
31
  "do_sample": True,
32
  "seed": 42,
33
  }
34
+
35
+ try:
36
+ stream = client.text_generation(sentence, **generate_kwargs, stream=True, details=True, return_full_text=False)
37
+ output = ""
38
+ for response in stream:
39
+ output += response.token.text
40
+ synthetic_data.append(output)
41
+ except Exception as e:
42
+ print(f"Error generating data for sentence '{sentence}': {e}")
43
+ # Optionally, append a placeholder or error message to `synthetic_data` to maintain alignment with input sentences
44
+ synthetic_data.append(f"Error: {e}")
45
  return synthetic_data
46
 
47
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):