ramalMr commited on
Commit
f5352d5
·
verified ·
1 Parent(s): e4726fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -1,19 +1,23 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import pandas as pd
 
4
  import random
5
  import csv
6
  import os
7
  import io
8
  import tempfile
9
- import re
10
- import openpyxl
11
 
12
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
13
 
14
- def extract_data_from_excel(file):
15
  df = pd.read_excel(file)
16
- return df.values.tolist()
 
 
 
 
 
17
 
18
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
19
  with open(filename, mode='a', newline='', encoding='utf-8') as file:
@@ -21,8 +25,7 @@ def save_to_csv(sentence, output, filename="synthetic_data.csv"):
21
  writer.writerow([sentence, output])
22
 
23
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_sentences=10000):
24
- data = extract_data_from_excel(file)
25
- sentences = [str(row) for row in data] # Convert each row to a string
26
  random.shuffle(sentences) # Shuffle sentences
27
 
28
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
@@ -74,7 +77,7 @@ gr.Interface(
74
  gr.Slider(label="Number of sentences", value=10000, minimum=1, maximum=100000, step=1000, interactive=True, info="The number of sentences to generate"),
75
  ],
76
  outputs=gr.File(label="Synthetic Data"),
77
- title="SDG",
78
  description="AYE QABIL.",
79
  allow_flagging="never",
80
  ).launch()
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import pandas as pd
4
+ import re
5
  import random
6
  import csv
7
  import os
8
  import io
9
  import tempfile
 
 
10
 
11
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
12
 
13
+ def extract_sentences_from_excel(file):
14
  df = pd.read_excel(file)
15
+ sentences = []
16
+ for row in df.values.tolist():
17
+ text = ' '.join(str(x) for x in row)
18
+ new_sentences = re.split(r'(?<=[^.!?])(?=[.!?])', text)
19
+ sentences.extend([s.strip() for s in new_sentences if s.strip()])
20
+ return sentences
21
 
22
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
23
  with open(filename, mode='a', newline='', encoding='utf-8') as file:
 
25
  writer.writerow([sentence, output])
26
 
27
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_sentences=10000):
28
+ sentences = extract_sentences_from_excel(file)
 
29
  random.shuffle(sentences) # Shuffle sentences
30
 
31
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
 
77
  gr.Slider(label="Number of sentences", value=10000, minimum=1, maximum=100000, step=1000, interactive=True, info="The number of sentences to generate"),
78
  ],
79
  outputs=gr.File(label="Synthetic Data"),
80
+ title="SDG",
81
  description="AYE QABIL.",
82
  allow_flagging="never",
83
  ).launch()