ramalMr commited on
Commit
09b14bf
·
verified ·
1 Parent(s): 4e49a48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
 
3
  import random
4
  import pandas as pd
5
  from io import BytesIO
@@ -8,12 +9,12 @@ import os
8
  import io
9
  import tempfile
10
  import re
11
- from transformers import MarianMTModel, MarianTokenizer
12
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
13
 
14
  def extract_text_from_excel(file):
15
  df = pd.read_excel(file)
16
- text = ' '.join(df['Unnamed: 1'].astype(str))
17
  return text
18
 
19
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
@@ -21,16 +22,6 @@ def save_to_csv(sentence, output, filename="synthetic_data.csv"):
21
  writer = csv.writer(file)
22
  writer.writerow([sentence, output])
23
 
24
-
25
-
26
- def translate_english_to_azerbaijani(text):
27
- model_name = 'Helsinki-NLP/opus-mt-en-az'
28
- tokenizer = MarianTokenizer.from_pretrained(model_name)
29
- model = MarianMTModel.from_pretrained(model_name)
30
- translated = model.generate(**tokenizer.prepare_translation_batch([text]))
31
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
32
- return translated_text
33
-
34
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
35
  text = extract_text_from_excel(file)
36
  sentences = text.split('.')
@@ -39,7 +30,7 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
39
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
40
  fieldnames = ['Original Sentence', 'Generated Sentence']
41
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
42
- writer.writeheader()
43
 
44
  for sentence in sentences:
45
  sentence = sentence.strip()
@@ -65,9 +56,7 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
65
  generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
66
 
67
  for generated_sentence in generated_sentences:
68
- translated_original = translate_english_to_azerbaijani(sentence)
69
- translated_generated = translate_english_to_azerbaijani(generated_sentence)
70
- writer.writerow({'Original Sentence': translated_original, 'Generated Sentence': translated_generated})
71
 
72
  except Exception as e:
73
  print(f"Error generating data for sentence '{sentence}': {e}")
@@ -89,4 +78,4 @@ gr.Interface(
89
  title="SDG",
90
  description="AYE QABIL.",
91
  allow_flagging="never",
92
- ).launch()
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
+ import PyPDF2
4
  import random
5
  import pandas as pd
6
  from io import BytesIO
 
9
  import io
10
  import tempfile
11
  import re
12
+
13
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
14
 
15
  def extract_text_from_excel(file):
16
  df = pd.read_excel(file)
17
+ text = ' '.join(df['Column_Name'].astype(str))
18
  return text
19
 
20
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
 
22
  writer = csv.writer(file)
23
  writer.writerow([sentence, output])
24
 
 
 
 
 
 
 
 
 
 
 
25
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
26
  text = extract_text_from_excel(file)
27
  sentences = text.split('.')
 
30
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
31
  fieldnames = ['Original Sentence', 'Generated Sentence']
32
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
33
+ writer.writeheader()
34
 
35
  for sentence in sentences:
36
  sentence = sentence.strip()
 
56
  generated_sentences = [s.strip() for s in generated_sentences if s.strip() and s != '.']
57
 
58
  for generated_sentence in generated_sentences:
59
+ writer.writerow({'Original Sentence': sentence, 'Generated Sentence': generated_sentence})
 
 
60
 
61
  except Exception as e:
62
  print(f"Error generating data for sentence '{sentence}': {e}")
 
78
  title="SDG",
79
  description="AYE QABIL.",
80
  allow_flagging="never",
81
+ ).launch()