ramalMr commited on
Commit
beea405
·
verified ·
1 Parent(s): eb1c35b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -22
app.py CHANGED
@@ -2,43 +2,24 @@ from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import random
4
  import pandas as pd
5
- from io import BytesIO
6
  import csv
7
  import os
8
  import io
9
  import tempfile
10
  import re
11
- from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
12
 
13
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
14
 
15
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
16
- model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
17
-
18
- def translate_to_english(text, source_lang):
19
- encoded_input = tokenizer(text, return_tensors="pt")
20
- generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id("en"))
21
- translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
22
- return translated_text
23
-
24
- def translate_to_azerbaijani(text):
25
- encoded_input = tokenizer(text, return_tensors="pt")
26
- generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id("az"))
27
- translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
28
- return translated_text
29
-
30
  def extract_text_from_excel(file):
31
  df = pd.read_excel(file)
32
  text = ' '.join(df['Unnamed: 1'].astype(str))
33
- source_lang = "az" # Azerbaijani
34
- english_text = translate_to_english(text, source_lang)
35
- return english_text
36
 
37
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
38
- azerbaijani_output = translate_to_azerbaijani(output)
39
  with open(filename, mode='a', newline='', encoding='utf-8') as file:
40
  writer = csv.writer(file)
41
- writer.writerow([sentence, azerbaijani_output])
42
 
43
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_similar_sentences):
44
  text = extract_text_from_excel(file)
 
2
  import gradio as gr
3
  import random
4
  import pandas as pd
5
+ from io import BytesIO
6
  import csv
7
  import os
8
  import io
9
  import tempfile
10
  import re
 
11
 
12
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_from_excel(file):
15
  df = pd.read_excel(file)
16
  text = ' '.join(df['Unnamed: 1'].astype(str))
17
+ return text
 
 
18
 
19
  def save_to_csv(sentence, output, filename="synthetic_data.csv"):
 
20
  with open(filename, mode='a', newline='', encoding='utf-8') as file:
21
  writer = csv.writer(file)
22
+ writer.writerow([sentence, output])
23
 
24
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty, num_similar_sentences):
25
  text = extract_text_from_excel(file)