Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
import gradio as gr
|
3 |
import random
|
4 |
import pandas as pd
|
5 |
-
from io import BytesIO
|
6 |
-
import csv
|
7 |
import os
|
8 |
-
import io
|
9 |
import tempfile
|
10 |
import re
|
11 |
|
@@ -13,24 +13,20 @@ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
|
13 |
|
14 |
def extract_sentences_from_excel(file):
|
15 |
df = pd.read_excel(file)
|
16 |
-
text = ' '.join(df['
|
17 |
sentences = text.split('.')
|
18 |
sentences = [s.strip() for s in sentences if s.strip()]
|
19 |
return sentences
|
20 |
|
21 |
-
def
|
22 |
-
with open(filename, mode='a',
|
23 |
-
|
24 |
-
writer.writerow([sentence, output])
|
25 |
|
26 |
def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalty):
|
27 |
sentences = extract_sentences_from_excel(file)
|
|
|
28 |
|
29 |
-
with tempfile.NamedTemporaryFile(mode='w',
|
30 |
-
fieldnames = ['Original Sentence', 'Generated Sentence']
|
31 |
-
writer = csv.DictWriter(tmp, fieldnames=fieldnames)
|
32 |
-
writer.writeheader()
|
33 |
-
|
34 |
for sentence in sentences:
|
35 |
sentence = sentence.strip()
|
36 |
if not sentence:
|
@@ -46,25 +42,26 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
|
|
46 |
}
|
47 |
|
48 |
try:
|
49 |
-
stream = client.text_generation(f"{prompt}
|
50 |
output = ""
|
51 |
for response in stream:
|
52 |
output += response.token.text
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
|
60 |
except Exception as e:
|
61 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
62 |
|
|
|
63 |
tmp_path = tmp.name
|
64 |
|
65 |
return tmp_path
|
66 |
|
67 |
-
gr.Interface(
|
68 |
fn=generate,
|
69 |
inputs=[
|
70 |
gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx"]),
|
|
|
1 |
+
import json
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import gradio as gr
|
4 |
import random
|
5 |
import pandas as pd
|
6 |
+
from io import BytesIO
|
|
|
7 |
import os
|
8 |
+
import io
|
9 |
import tempfile
|
10 |
import re
|
11 |
|
|
|
13 |
|
14 |
def extract_sentences_from_excel(file):
|
15 |
df = pd.read_excel(file)
|
16 |
+
text = ' '.join(df['Column_Name'].astype(str))
|
17 |
sentences = text.split('.')
|
18 |
sentences = [s.strip() for s in sentences if s.strip()]
|
19 |
return sentences
|
20 |
|
21 |
+
def save_to_json(data, filename="synthetic_data.json"):
|
22 |
+
with open(filename, mode='a', encoding='utf-8') as file:
|
23 |
+
json.dump(data, file, indent=4, ensure_ascii=False)
|
|
|
24 |
|
25 |
def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalty):
|
26 |
sentences = extract_sentences_from_excel(file)
|
27 |
+
data = []
|
28 |
|
29 |
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as tmp:
|
|
|
|
|
|
|
|
|
30 |
for sentence in sentences:
|
31 |
sentence = sentence.strip()
|
32 |
if not sentence:
|
|
|
42 |
}
|
43 |
|
44 |
try:
|
45 |
+
stream = client.text_generation(f"{prompt} Output the response in JSON format.", **generate_kwargs, stream=True, details=True, return_full_text=False)
|
46 |
output = ""
|
47 |
for response in stream:
|
48 |
output += response.token.text
|
49 |
|
50 |
+
try:
|
51 |
+
json_output = json.loads(output)
|
52 |
+
data.append({"original_sentence": sentence, "generated_sentence": json_output})
|
53 |
+
except json.JSONDecodeError:
|
54 |
+
print(f"Error decoding JSON for sentence '{sentence}': {output}")
|
55 |
|
56 |
except Exception as e:
|
57 |
print(f"Error generating data for sentence '{sentence}': {e}")
|
58 |
|
59 |
+
save_to_json(data, tmp.name)
|
60 |
tmp_path = tmp.name
|
61 |
|
62 |
return tmp_path
|
63 |
|
64 |
+
gr.Interface(
|
65 |
fn=generate,
|
66 |
inputs=[
|
67 |
gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx"]),
|