gagan3012 commited on
Commit
0ee5810
·
1 Parent(s): c015c4c
src/data/make_dataset.py CHANGED
@@ -10,5 +10,6 @@ def make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0"):
10
  df['output_text'] = dataset['target']
11
  return df
12
 
 
13
  if __name__ == '__main__':
14
- make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0")
 
10
  df['output_text'] = dataset['target']
11
  return df
12
 
13
+
14
  if __name__ == '__main__':
15
+ make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0")
src/models/model.py CHANGED
@@ -302,16 +302,7 @@ class Summarization:
302
  tokenizer=self.tokenizer, model=self.model, output=outputdir
303
  )
304
 
305
- # checkpoint_callback = ModelCheckpoint(
306
- # dirpath="checkpoints",
307
- # filename="best-checkpoint-{epoch}-{train_loss:.2f}",
308
- # save_top_k=-1,
309
- # verbose=True,
310
- # monitor="train_loss",
311
- # mode="min",
312
- # )
313
-
314
- logger = MLFlowLogger(experiment_name="Summarization")
315
 
316
  early_stop_callback = (
317
  [
 
302
  tokenizer=self.tokenizer, model=self.model, output=outputdir
303
  )
304
 
305
+ logger = MLFlowLogger(experiment_name="Summarization",tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
 
 
 
 
 
 
 
 
 
306
 
307
  early_stop_callback = (
308
  [
src/models/predict_model.py CHANGED
@@ -1,4 +1,5 @@
1
  from .model import Summarization
 
2
 
3
  def predict_model(text):
4
  """
@@ -8,4 +9,9 @@ def predict_model(text):
8
  model.load_model()
9
  pre_summary = model.predict(text)
10
  return pre_summary
11
-
 
 
 
 
 
 
1
  from .model import Summarization
2
+ from .make_dataset import make_dataset
3
 
4
  def predict_model(text):
5
  """
 
9
  model.load_model()
10
  pre_summary = model.predict(text)
11
  return pre_summary
12
+
13
+
14
+ if __name__ == '__main__':
15
+ text = make_dataset(split="test")['input_text']
16
+ pre_summary = predict_model(text)
17
+ print(pre_summary)
src/models/train_model.py CHANGED
@@ -7,9 +7,12 @@ def train_model():
7
  """
8
  # Load the data
9
  train_df = make_dataset(split = 'train')
10
- eval_df = make_dataset(split = 'test')
11
 
12
  model = Summarization()
13
  model.from_pretrained('t5-base')
14
  model.train(train_df=train_df, eval_df=eval_df, batch_size=4, max_epochs=3, use_gpu=True)
15
- model.save_model()
 
 
 
 
7
  """
8
  # Load the data
9
  train_df = make_dataset(split = 'train')
10
+ eval_df = make_dataset(split = 'val')
11
 
12
  model = Summarization()
13
  model.from_pretrained('t5-base')
14
  model.train(train_df=train_df, eval_df=eval_df, batch_size=4, max_epochs=3, use_gpu=True)
15
+ model.save_model()
16
+
17
+ if __name__ == '__main__':
18
+ train_model()