Spaces:
Runtime error
Runtime error
File size: 6,019 Bytes
df9a42c 33eec4d df9a42c cb54dbf 12f1cb0 c218419 df9a42c c218419 df9a42c c218419 df9a42c f7fc638 704ef72 65363f9 5d7d48b 65363f9 bd401d6 5d7d48b bd401d6 20e6100 572a1b8 f49c162 eced0dd 8c37686 5d7d48b d147f19 01dffc6 965337e 5d7d48b e172fe0 01dffc6 5c7f8ce c218419 dbb915b c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 5d7d48b c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 df9a42c c218419 5d7d48b df9a42c c218419 5d7d48b c218419 5d7d48b c218419 a598c32 d846f4a 38cf30c 79390b9 d4ff594 5d7d48b f49c162 d4ff594 5d7d48b f49c162 d4ff594 f49c162 d4ff594 f49c162 d4ff594 f49c162 d4ff594 f49c162 d4ff594 f49c162 d4ff594 f49c162 d4ff594 5d7d48b f0505a0 c218419 6a640f3 d483016 763abdb ba6e3c5 763abdb df9a42c f2606f8 a94f1fc 73dd1a5 279dcba 067d38d b82f5d2 2ec127a 346a532 ab59105 2fca2c2 778a5b6 e97ef55 d2a76b6 198680c ff37c40 df9a42c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import argparse
import os
import pprint
import subprocess
import sys
import yaml
arg_parser = argparse.ArgumentParser(
description="T5 Summarisation Using Pytorch Lightning", prog="t5s"
)
# Command choice
command_subparser = arg_parser.add_subparsers(
dest="command", help="command (refer commands section in documentation)"
)
parser_req = command_subparser.add_parser(
"requirements", help="Install Python Dependencies."
)
parser_start = command_subparser.add_parser("start", help="Define parameters")
parser_start.add_argument(
"-d",
"--dataset",
default="cnn_dailymail",
help="Enter the name of the dataset to be used",
type=str,
)
parser_start.add_argument(
"-s", "--split", default=0.001, help="Enter the split required", type=float
)
parser_start.add_argument(
"-n", "--name", default="summarsiation", help="Enter the name of the model"
)
parser_start.add_argument(
"-mt", "--model_type", default="t5", help="Enter the model type", type=str
)
parser_start.add_argument(
"-m",
"--model_name",
default="t5-base",
help="Enter the model to be used eg t5-base",
type=str,
)
parser_start.add_argument(
"-e", "--epochs", default=5, help="Enter the number of epochs", type=int
)
parser_start.add_argument(
"-lr",
"--learning-rate",
default=0.0001,
help="Enter the number of epochs",
type=float,
)
parser_start.add_argument(
"-b", "--batch-size", default=2, help="Enter the number of batches", type=int
)
parser_dirs = command_subparser.add_parser(
"dirs",
help="Create directories that are ignored by git but required for the project",
)
parser_push = command_subparser.add_parser(
"push", help="Upload Data to default DVC remote"
)
parser_pull = command_subparser.add_parser(
"pull", help="Download Data from default DVC remote"
)
parser_run = command_subparser.add_parser(
"run",
help="run the DVC pipeline - recompute any modified outputs such as "
"processed data or trained models",
)
parser_visualize = command_subparser.add_parser(
"visualize", help="run the visualization using Streamlit"
)
parser_upload = command_subparser.add_parser(
"upload", help="push the trained model to HF model hub"
)
parser_lint = command_subparser.add_parser("lint", help=" Lint using flake8")
parser_clone = command_subparser.add_parser(
"clone", help="Clone the T5 summarisation repo"
)
parser_clone.add_argument(
"-u",
"--username",
help="Enter the your DAGsHub username that you have forked the main repo with",
default="gagan3012",
type=str,
)
class Run(object):
def __init__(self, arguments: dict):
self.arguments = arguments
def execute(self):
arguments = self.arguments
print(f"arguments passed: {arguments['command']}")
# os.chdir('../')
cmd = [
"requirements",
"dirs",
"push",
"pull",
"run",
"visualize",
"upload",
"lint",
]
if arguments["command"] == "clone":
username = arguments["username"]
list_files = subprocess.run(
["git", "clone", f"https://dagshub.com/{username}/summarization.git"]
)
os.chdir("./summarization/")
retval = os.getcwd()
print(retval)
return list_files.returncode
elif arguments["command"] == "start":
os.chdir("./summarization/")
print(
"""
usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
[-b BATCH_SIZE]
-h, --help show this help message and exit
-d DATASET, --dataset DATASET
Enter the name of the dataset to be used
-s SPLIT, --split SPLIT
Enter the split required
-n NAME, --name NAME Enter the name of the model
-mt MODEL_TYPE, --model_type MODEL_TYPE
Enter the model type
-m MODEL_NAME, --model_name MODEL_NAME
Enter the model to be used eg t5-base
-e EPOCHS, --epochs EPOCHS
Enter the number of epochs
-lr LEARNING_RATE, --learning-rate LEARNING_RATE
Enter the number of epochs
-b BATCH_SIZE, --batch-size BATCH_SIZE
Enter the number of batches
"""
)
start(arguments=arguments)
elif arguments["command"] in cmd:
os.chdir("./summarization/")
list_files = subprocess.run(["make", arguments["command"]])
return list_files.returncode
else:
print("Command not supported")
raise Exception
def start(arguments):
data_params = {"data": arguments["dataset"], "split": arguments["split"]}
model_params = {
"name": arguments["name"],
"model_type": arguments["model_type"],
"model_name": arguments["model_name"],
"epochs": arguments["epochs"],
"learning_rate": arguments["learning_rate"],
"batch_size": arguments["batch_size"],
}
with open("data_params.yml", "w") as f:
yaml.dump(data_params, f)
with open("model_params.yml") as f:
newdct = yaml.safe_load(f)
newdct.update(model_params)
with open("model_params.yml", "w") as f:
yaml.dump(newdct, f)
dicts = {}
dicts.update(newdct)
dicts.update(data_params)
pprint.pprint("Final parameters for the run are: {}".format(dicts))
def parse_args(args):
arguments = vars(arg_parser.parse_args(args=args or ["--help"]))
return arguments
def main(args=None):
if args is None:
args = sys.argv[1:]
parsed_args = parse_args(args=args)
try:
result = Run(arguments=parsed_args).execute()
except Exception as e:
print(str(e))
result = 1
sys.exit(result)
if __name__ == "__main__":
main()
|