ramalMr commited on
Commit
4a3788c
·
verified ·
1 Parent(s): f5352d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -1,13 +1,10 @@
1
  from huggingface_hub import InferenceClient
2
- import gradio as gr
3
  import pandas as pd
4
  import re
5
  import random
6
  import csv
7
- import os
8
- import io
9
  import tempfile
10
-
11
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
12
 
13
  def extract_sentences_from_excel(file):
@@ -19,21 +16,16 @@ def extract_sentences_from_excel(file):
19
  sentences.extend([s.strip() for s in new_sentences if s.strip()])
20
  return sentences
21
 
22
- def save_to_csv(sentence, output, filename="synthetic_data.csv"):
23
- with open(filename, mode='a', newline='', encoding='utf-8') as file:
24
- writer = csv.writer(file)
25
- writer.writerow([sentence, output])
26
-
27
- def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_sentences=10000):
28
  sentences = extract_sentences_from_excel(file)
29
- random.shuffle(sentences) # Shuffle sentences
30
 
31
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
32
  fieldnames = ['Original Sentence', 'Generated Sentence']
33
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
34
  writer.writeheader()
35
 
36
- for sentence in sentences[:num_sentences]: # Process the first num_sentences sentences
37
  sentence = sentence.strip()
38
  if not sentence:
39
  continue
@@ -65,9 +57,8 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_s
65
  tmp_path = tmp.name
66
 
67
  return tmp_path
68
-
69
  gr.Interface(
70
- fn=generate,
71
  inputs=[
72
  gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx", ".xls"]),
73
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
@@ -76,8 +67,8 @@ gr.Interface(
76
  gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
77
  gr.Slider(label="Number of sentences", value=10000, minimum=1, maximum=100000, step=1000, interactive=True, info="The number of sentences to generate"),
78
  ],
79
- outputs=gr.File(label="Synthetic Data"),
80
- title="SDG",
81
- description="AYE QABIL.",
82
  allow_flagging="never",
83
  ).launch()
 
1
  from huggingface_hub import InferenceClient
 
2
  import pandas as pd
3
  import re
4
  import random
5
  import csv
 
 
6
  import tempfile
7
+ import gradio as gr
8
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
9
 
10
  def extract_sentences_from_excel(file):
 
16
  sentences.extend([s.strip() for s in new_sentences if s.strip()])
17
  return sentences
18
 
19
+ def generate_text(file, temperature, max_new_tokens, top_p, repetition_penalty, num_sentences=10000):
 
 
 
 
 
20
  sentences = extract_sentences_from_excel(file)
21
+ random.shuffle(sentences)
22
 
23
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
24
  fieldnames = ['Original Sentence', 'Generated Sentence']
25
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
26
  writer.writeheader()
27
 
28
+ for sentence in sentences[:num_sentences]:
29
  sentence = sentence.strip()
30
  if not sentence:
31
  continue
 
57
  tmp_path = tmp.name
58
 
59
  return tmp_path
 
60
  gr.Interface(
61
+ fn=generate_text,
62
  inputs=[
63
  gr.File(label="Upload Excel File", file_count="single", file_types=[".xlsx", ".xls"]),
64
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
 
67
  gr.Slider(label="Repetition penalty", value=1.0, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Penalize repeated tokens"),
68
  gr.Slider(label="Number of sentences", value=10000, minimum=1, maximum=100000, step=1000, interactive=True, info="The number of sentences to generate"),
69
  ],
70
+ outputs=gr.File(label="Generated CSV"),
71
+ title="Text Generation from Excel",
72
+ description="Generate text from sentences in an Excel file and save it to a CSV file.",
73
  allow_flagging="never",
74
  ).launch()