ramalMr commited on
Commit
d435c8a
·
verified ·
1 Parent(s): a1f8d56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import json
2
  from huggingface_hub import InferenceClient
3
  import gradio as gr
 
4
  import random
5
  import pandas as pd
6
  from io import BytesIO
 
7
  import os
8
  import io
9
  import tempfile
@@ -15,7 +17,7 @@ def extract_sentences_from_excel(file):
15
  df = pd.read_excel(file)
16
  text = ' '.join(df['Unnamed: 1'].astype(str))
17
  sentences = text.split('.')
18
- sentences = [s.strip() for s in sentences if s.strip()]
19
  return sentences
20
 
21
  def save_to_json(data, filename="synthetic_data.json"):
@@ -42,14 +44,14 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
42
  }
43
 
44
  try:
45
- stream = client.text_generation(f"{prompt} Output the response in JSON format.", **generate_kwargs, stream=True, details=True, return_full_text=False)
46
  output = ""
47
  for response in stream:
48
  output += response.token.text
49
 
50
  try:
51
  json_output = json.loads(output)
52
- data.append({"original_sentence": sentence, "generated_sentence": json_output})
53
  except json.JSONDecodeError:
54
  print(f"Error decoding JSON for sentence '{sentence}': {output}")
55
 
 
1
  import json
2
  from huggingface_hub import InferenceClient
3
  import gradio as gr
4
+ import PyPDF2
5
  import random
6
  import pandas as pd
7
  from io import BytesIO
8
+ import csv
9
  import os
10
  import io
11
  import tempfile
 
17
  df = pd.read_excel(file)
18
  text = ' '.join(df['Unnamed: 1'].astype(str))
19
  sentences = text.split('.')
20
+ sentences = [s.strip() for s in sentences if s.strip() and s.strip() != 'nan']
21
  return sentences
22
 
23
  def save_to_json(data, filename="synthetic_data.json"):
 
44
  }
45
 
46
  try:
47
+ stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}}", **generate_kwargs, stream=True, details=True, return_full_text=False)
48
  output = ""
49
  for response in stream:
50
  output += response.token.text
51
 
52
  try:
53
  json_output = json.loads(output)
54
+ data.append({"original_sentence": sentence, "generated_data": json_output})
55
  except json.JSONDecodeError:
56
  print(f"Error decoding JSON for sentence '{sentence}': {output}")
57