Spaces:
Runtime error
Runtime error
File size: 5,630 Bytes
df9a42c cb54dbf 12f1cb0 c218419 df9a42c c218419 df9a42c c218419 df9a42c f7fc638 704ef72 65363f9 bd401d6 92639dd bd401d6 20e6100 572a1b8 eced0dd 8c37686 d147f19 965337e e172fe0 5c7f8ce c218419 dbb915b c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 f1f2430 c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 a598c32 d846f4a 38cf30c 79390b9 d4ff594 f0505a0 c218419 d483016 763abdb ba6e3c5 763abdb df9a42c f2606f8 a94f1fc 73dd1a5 279dcba 067d38d b82f5d2 2ec127a 346a532 ab59105 2fca2c2 778a5b6 e97ef55 198680c ff37c40 df9a42c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import argparse
import os
import subprocess
import sys
import yaml
arg_parser = argparse.ArgumentParser(
description="T5 Summarisation Using Pytorch Lightning", prog="t5s"
)
# Command choice
command_subparser = arg_parser.add_subparsers(
dest="command", help="command (refer commands section in documentation)"
)
parser_req = command_subparser.add_parser(
"requirements", help="Install Python Dependencies."
)
parser_start = command_subparser.add_parser("start", help="Define parameters")
parser_start.add_argument(
"-d",
"--dataset",
default="cnn_dailymail",
help="Enter the name of the dataset to be used",
)
parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required")
parser_start.add_argument(
"-n", "--name", default="summarsiation", help="Enter the name of the model"
)
parser_start.add_argument(
"-mt", "--model_type", default="t5", help="Enter the model type"
)
parser_start.add_argument(
"-m",
"--model_name",
default="t5-base",
help="Enter the model to be used eg t5-base",
)
parser_start.add_argument(
"-e", "--epochs", default=5, help="Enter the number of epochs"
)
parser_start.add_argument(
"-lr", "--learning-rate", default=0.0001, help="Enter the number of epochs"
)
parser_start.add_argument(
"-b", "--batch-size", default=2, help="Enter the number of batches"
)
parser_dirs = command_subparser.add_parser(
"dirs",
help="Create directories that are ignored by git but required for the project",
)
parser_push = command_subparser.add_parser(
"push", help="Upload Data to default DVC remote"
)
parser_pull = command_subparser.add_parser(
"pull", help="Download Data from default DVC remote"
)
parser_run = command_subparser.add_parser(
"run",
help="run the DVC pipeline - recompute any modified outputs such as "
"processed data or trained models",
)
parser_visualize = command_subparser.add_parser(
"visualize", help="run the visualization using Streamlit"
)
parser_upload = command_subparser.add_parser(
"upload", help="push the trained model to HF model hub"
)
parser_lint = command_subparser.add_parser("lint", help=" Lint using flake8")
parser_clone = command_subparser.add_parser(
"clone", help="Clone the T5 summarisation repo"
)
class Run(object):
def __init__(self, arguments: dict):
self.arguments = arguments
def execute(self):
arguments = self.arguments
print(f"arguments passed: {arguments['command']}")
# os.chdir('../')
cmd = [
"requirements",
"dirs",
"push",
"pull",
"run",
"visualize",
"upload",
"lint",
]
if arguments["command"] == "clone":
list_files = subprocess.run(
["git", "clone", "https://dagshub.com/gagan3012/summarization.git"]
)
os.chdir("./summarization/")
retval = os.getcwd()
print(retval)
return list_files.returncode
elif arguments["command"] == "start":
os.chdir("./summarization/")
print("""
usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
[-b BATCH_SIZE]
-h, --help show this help message and exit
-d DATASET, --dataset DATASET
Enter the name of the dataset to be used
-s SPLIT, --split SPLIT
Enter the split required
-n NAME, --name NAME Enter the name of the model
-mt MODEL_TYPE, --model_type MODEL_TYPE
Enter the model type
-m MODEL_NAME, --model_name MODEL_NAME
Enter the model to be used eg t5-base
-e EPOCHS, --epochs EPOCHS
Enter the number of epochs
-lr LEARNING_RATE, --learning-rate LEARNING_RATE
Enter the number of epochs
-b BATCH_SIZE, --batch-size BATCH_SIZE
Enter the number of batches
""")
start(arguments=arguments)
elif arguments["command"] in cmd:
list_files = subprocess.run(["make", arguments["command"]])
return list_files.returncode
else:
print("Command not supported")
raise Exception
def start(arguments):
data_params = {"data": arguments["dataset"], "split": arguments["split"]}
model_params = {
"name": arguments["name"],
"model_type": arguments["model_type"],
"model_name": arguments["model_name"],
"epochs": arguments["epochs"],
"learning_rate": arguments["learning_rate"],
"batch_size": arguments["batch_size"],
}
with open("data_params.yml", "w") as f:
yaml.dump(data_params, f)
with open("model_params.yml") as f:
newdct = yaml.safe_load(f)
newdct.update(model_params)
with open("model_params.yml", "w") as f:
yaml.dump(newdct, f)
dicts = {}
dicts.update(newdct)
dicts.update(data_params)
print("Final parameters for the run are: {}".format(dicts))
def parse_args(args):
arguments = vars(arg_parser.parse_args(args=args or ["--help"]))
return arguments
def main(args=None):
if args is None:
args = sys.argv[1:]
parsed_args = parse_args(args=args)
try:
result = Run(arguments=parsed_args).execute()
except Exception as e:
print(str(e))
result = 1
sys.exit(result)
if __name__ == "__main__":
main()
|