Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,11 @@ def extract_text_from_pdf(file):
|
|
18 |
def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repetition_penalty):
|
19 |
synthetic_data = []
|
20 |
for sentence in sentences:
|
|
|
|
|
|
|
|
|
|
|
21 |
generate_kwargs = {
|
22 |
"temperature": temperature,
|
23 |
"max_new_tokens": max_new_tokens,
|
@@ -26,12 +31,17 @@ def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repet
|
|
26 |
"do_sample": True,
|
27 |
"seed": 42,
|
28 |
}
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
return synthetic_data
|
36 |
|
37 |
def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
|
|
|
18 |
def generate_synthetic_data(sentences, temperature, max_new_tokens, top_p, repetition_penalty):
|
19 |
synthetic_data = []
|
20 |
for sentence in sentences:
|
21 |
+
# Trim whitespace and skip if the sentence is empty
|
22 |
+
sentence = sentence.strip()
|
23 |
+
if not sentence:
|
24 |
+
continue
|
25 |
+
|
26 |
generate_kwargs = {
|
27 |
"temperature": temperature,
|
28 |
"max_new_tokens": max_new_tokens,
|
|
|
31 |
"do_sample": True,
|
32 |
"seed": 42,
|
33 |
}
|
34 |
+
|
35 |
+
try:
|
36 |
+
stream = client.text_generation(sentence, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
37 |
+
output = ""
|
38 |
+
for response in stream:
|
39 |
+
output += response.token.text
|
40 |
+
synthetic_data.append(output)
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error generating data for sentence '{sentence}': {e}")
|
43 |
+
# Optionally, append a placeholder or error message to `synthetic_data` to maintain alignment with input sentences
|
44 |
+
synthetic_data.append(f"Error: {e}")
|
45 |
return synthetic_data
|
46 |
|
47 |
def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
|