Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
import json
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import gradio as gr
|
|
|
4 |
import random
|
5 |
import pandas as pd
|
6 |
from io import BytesIO
|
|
|
7 |
import os
|
8 |
import io
|
9 |
import tempfile
|
@@ -15,7 +17,7 @@ def extract_sentences_from_excel(file):
|
|
15 |
df = pd.read_excel(file)
|
16 |
text = ' '.join(df['Unnamed: 1'].astype(str))
|
17 |
sentences = text.split('.')
|
18 |
-
sentences = [s.strip() for s in sentences if s.strip()]
|
19 |
return sentences
|
20 |
|
21 |
def save_to_json(data, filename="synthetic_data.json"):
|
@@ -42,14 +44,14 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
|
|
42 |
}
|
43 |
|
44 |
try:
|
45 |
-
stream = client.text_generation(f"{prompt} Output the response in JSON format.", **generate_kwargs, stream=True, details=True, return_full_text=False)
|
46 |
output = ""
|
47 |
for response in stream:
|
48 |
output += response.token.text
|
49 |
|
50 |
try:
|
51 |
json_output = json.loads(output)
|
52 |
-
data.append({"original_sentence": sentence, "
|
53 |
except json.JSONDecodeError:
|
54 |
print(f"Error decoding JSON for sentence '{sentence}': {output}")
|
55 |
|
|
|
1 |
import json
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import gradio as gr
|
4 |
+
import PyPDF2
|
5 |
import random
|
6 |
import pandas as pd
|
7 |
from io import BytesIO
|
8 |
+
import csv
|
9 |
import os
|
10 |
import io
|
11 |
import tempfile
|
|
|
17 |
df = pd.read_excel(file)
|
18 |
text = ' '.join(df['Unnamed: 1'].astype(str))
|
19 |
sentences = text.split('.')
|
20 |
+
sentences = [s.strip() for s in sentences if s.strip() and s.strip() != 'nan']
|
21 |
return sentences
|
22 |
|
23 |
def save_to_json(data, filename="synthetic_data.json"):
|
|
|
44 |
}
|
45 |
|
46 |
try:
|
47 |
+
stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}}", **generate_kwargs, stream=True, details=True, return_full_text=False)
|
48 |
output = ""
|
49 |
for response in stream:
|
50 |
output += response.token.text
|
51 |
|
52 |
try:
|
53 |
json_output = json.loads(output)
|
54 |
+
data.append({"original_sentence": sentence, "generated_data": json_output})
|
55 |
except json.JSONDecodeError:
|
56 |
print(f"Error decoding JSON for sentence '{sentence}': {output}")
|
57 |
|