Spaces:
Runtime error
Runtime error
Bug fixes
Browse files- src/data/make_dataset.py +2 -1
- src/models/model.py +1 -10
- src/models/predict_model.py +7 -1
- src/models/train_model.py +5 -2
src/data/make_dataset.py
CHANGED
|
@@ -10,5 +10,6 @@ def make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0"):
|
|
| 10 |
df['output_text'] = dataset['target']
|
| 11 |
return df
|
| 12 |
|
|
|
|
| 13 |
if __name__ == '__main__':
|
| 14 |
-
make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0")
|
|
|
|
| 10 |
df['output_text'] = dataset['target']
|
| 11 |
return df
|
| 12 |
|
| 13 |
+
|
| 14 |
if __name__ == '__main__':
|
| 15 |
+
make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0")
|
src/models/model.py
CHANGED
|
@@ -302,16 +302,7 @@ class Summarization:
|
|
| 302 |
tokenizer=self.tokenizer, model=self.model, output=outputdir
|
| 303 |
)
|
| 304 |
|
| 305 |
-
|
| 306 |
-
# dirpath="checkpoints",
|
| 307 |
-
# filename="best-checkpoint-{epoch}-{train_loss:.2f}",
|
| 308 |
-
# save_top_k=-1,
|
| 309 |
-
# verbose=True,
|
| 310 |
-
# monitor="train_loss",
|
| 311 |
-
# mode="min",
|
| 312 |
-
# )
|
| 313 |
-
|
| 314 |
-
logger = MLFlowLogger(experiment_name="Summarization")
|
| 315 |
|
| 316 |
early_stop_callback = (
|
| 317 |
[
|
|
|
|
| 302 |
tokenizer=self.tokenizer, model=self.model, output=outputdir
|
| 303 |
)
|
| 304 |
|
| 305 |
+
logger = MLFlowLogger(experiment_name="Summarization",tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
early_stop_callback = (
|
| 308 |
[
|
src/models/predict_model.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from .model import Summarization
|
|
|
|
| 2 |
|
| 3 |
def predict_model(text):
|
| 4 |
"""
|
|
@@ -8,4 +9,9 @@ def predict_model(text):
|
|
| 8 |
model.load_model()
|
| 9 |
pre_summary = model.predict(text)
|
| 10 |
return pre_summary
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from .model import Summarization
|
| 2 |
+
from .make_dataset import make_dataset
|
| 3 |
|
| 4 |
def predict_model(text):
|
| 5 |
"""
|
|
|
|
| 9 |
model.load_model()
|
| 10 |
pre_summary = model.predict(text)
|
| 11 |
return pre_summary
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
if __name__ == '__main__':
|
| 15 |
+
text = make_dataset(split="test")['input_text']
|
| 16 |
+
pre_summary = predict_model(text)
|
| 17 |
+
print(pre_summary)
|
src/models/train_model.py
CHANGED
|
@@ -7,9 +7,12 @@ def train_model():
|
|
| 7 |
"""
|
| 8 |
# Load the data
|
| 9 |
train_df = make_dataset(split = 'train')
|
| 10 |
-
eval_df = make_dataset(split = '
|
| 11 |
|
| 12 |
model = Summarization()
|
| 13 |
model.from_pretrained('t5-base')
|
| 14 |
model.train(train_df=train_df, eval_df=eval_df, batch_size=4, max_epochs=3, use_gpu=True)
|
| 15 |
-
model.save_model()
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
# Load the data
|
| 9 |
train_df = make_dataset(split = 'train')
|
| 10 |
+
eval_df = make_dataset(split = 'val')
|
| 11 |
|
| 12 |
model = Summarization()
|
| 13 |
model.from_pretrained('t5-base')
|
| 14 |
model.train(train_df=train_df, eval_df=eval_df, batch_size=4, max_epochs=3, use_gpu=True)
|
| 15 |
+
model.save_model()
|
| 16 |
+
|
| 17 |
+
if __name__ == '__main__':
|
| 18 |
+
train_model()
|