grpathak22 commited on
Commit
e4f9fa8
·
verified ·
1 Parent(s): eaca639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -57
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transformers import AutoTokenizer, MT5ForConditionalGeneration
2
  from transformers import T5Tokenizer
3
  import streamlit as st
@@ -8,63 +9,6 @@ from datasets import Dataset, DatasetDict
8
  from transformers import Trainer, TrainingArguments
9
 
10
 
11
- tokenizer = T5Tokenizer.from_pretrained('google/mt5-base')
12
- model = MT5ForConditionalGeneration.from_pretrained("google/mt5-base")
13
- #st.write(model)
14
-
15
- df = pd.read_csv('proverbs.csv')
16
- df
17
- dataset = Dataset.from_pandas(df)
18
-
19
- def preprocess_function(examples):
20
- inputs = examples['Proverb']
21
- targets = examples['Meaning']
22
- model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
23
- with tokenizer.as_target_tokenizer():
24
- labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")
25
- model_inputs["labels"] = labels["input_ids"]
26
- return model_inputs
27
-
28
-
29
- tokenized_dataset = dataset.map(preprocess_function, batched=True)
30
-
31
-
32
- dataset_split = tokenized_dataset.train_test_split(test_size=0.2)
33
-
34
-
35
- train_dataset = dataset_split['train']
36
- test_dataset = dataset_split['test']
37
-
38
-
39
- print(f"Training dataset size: {len(train_dataset)}")
40
- print(f"Testing dataset size: {len(test_dataset)}")
41
-
42
- training_args = TrainingArguments(
43
- output_dir="./results",
44
- evaluation_strategy="epoch",
45
- learning_rate=2e-5,
46
- per_device_train_batch_size=4,
47
- per_device_eval_batch_size=4,
48
- num_train_epochs=3,
49
- weight_decay=0.01,
50
- save_total_limit=2,
51
- save_steps=500,
52
- )
53
-
54
- # Initialize Trainer
55
- trainer = Trainer(
56
- model=model,
57
- args=training_args,
58
- train_dataset=tokenized_dataset,
59
- eval_dataset=tokenized_dataset, # Typically you'd have a separate eval dataset
60
- )
61
-
62
- # Fine-tune the model
63
- trainer.train()
64
-
65
- model.save_pretrained("./fine-tuned-mt5-marathi-proverbs")
66
- tokenizer.save_pretrained("./fine-tuned-mt5-marathi-proverbs")
67
-
68
  prompt = st.text_input("Enter your proverb: ")
69
 
70
  # Tokenize the input prompt
 
1
+ import modelrun.py
2
  from transformers import AutoTokenizer, MT5ForConditionalGeneration
3
  from transformers import T5Tokenizer
4
  import streamlit as st
 
9
  from transformers import Trainer, TrainingArguments
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  prompt = st.text_input("Enter your proverb: ")
13
 
14
  # Tokenize the input prompt