ramalMr commited on
Commit
57e7335
·
verified ·
1 Parent(s): 7242619

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -14
app.py CHANGED
@@ -2,27 +2,19 @@ from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import random
4
  import pandas as pd
5
- from io import BytesIO
6
  import csv
7
- import os
8
- import io
9
  import tempfile
10
  import re
11
 
12
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
13
 
14
- def extract_text_from_excel(file):
15
  df = pd.read_excel(file)
16
- text = ' '.join(df['Unnamed: 1'].astype(str))
17
  return text
18
 
19
- def save_to_csv(sentence, output, filename="synthetic_data.csv"):
20
- with open(filename, mode='a', newline='', encoding='utf-8') as file:
21
- writer = csv.writer(file)
22
- writer.writerow([sentence, output])
23
-
24
- def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_similar_sentences):
25
- text = extract_text_from_excel(file)
26
  sentences = text.split('.')
27
  random.shuffle(sentences) # Shuffle sentences
28
 
@@ -46,10 +38,10 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_s
46
  }
47
 
48
  try:
49
- stream = client.text_generation(sentence, **generate_kwargs, stream=True, details=True, return_full_text=False)
50
  output = ""
51
  for response in stream:
52
- output += response.token.text
53
 
54
  generated_sentences = re.split(r'(?<=[\.\!\?:])[\s\n]+', output)
55
  generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
@@ -71,6 +63,7 @@ gr.Interface(
71
  fn=generate,
72
  inputs=[
73
  gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx"]),
 
74
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
75
  gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=5120, step=64, interactive=True, info="The maximum numbers of new tokens"),
76
  gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
 
2
  import gradio as gr
3
  import random
4
  import pandas as pd
 
5
  import csv
 
 
6
  import tempfile
7
  import re
8
 
9
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
10
 
11
+ def extract_text_from_excel(file, column_name):
12
  df = pd.read_excel(file)
13
+ text = ' '.join(df[column_name].astype(str))
14
  return text
15
 
16
+ def generate(file, column_name, temperature, max_new_tokens, top_p, repetition_penalty, num_similar_sentences):
17
+ text = extract_text_from_excel(file, column_name)
 
 
 
 
 
18
  sentences = text.split('.')
19
  random.shuffle(sentences) # Shuffle sentences
20
 
 
38
  }
39
 
40
  try:
41
+ stream = client.text_generation(sentence, **generate_kwargs, stream=True, return_full_text=False)
42
  output = ""
43
  for response in stream:
44
+ output += response.text
45
 
46
  generated_sentences = re.split(r'(?<=[\.\!\?:])[\s\n]+', output)
47
  generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
 
63
  fn=generate,
64
  inputs=[
65
  gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx"]),
66
+ gr.TextAreaInput(label="Column Name", placeholder="Enter the column name"),
67
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
68
  gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=5120, step=64, interactive=True, info="The maximum numbers of new tokens"),
69
  gr.Slider(label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),