|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
A collection of utilities for comparing `examples/complete_*_example.py` scripts with the capabilities inside of each |
|
`examples/by_feature` example. `compare_against_test` is the main function that should be used when testing, while the |
|
others are used to either get the code that matters, or to preprocess them (such as stripping comments) |
|
""" |
|
|
|
import os |
|
from typing import List |
|
|
|
|
|
def get_function_contents_by_name(lines: List[str], name: str): |
|
""" |
|
Extracts a function from `lines` of segmented source code with the name `name`. |
|
|
|
Args: |
|
lines (`List[str]`): |
|
Source code of a script seperated by line. |
|
name (`str`): |
|
The name of the function to extract. Should be either `training_function` or `main` |
|
""" |
|
if name != "training_function" and name != "main": |
|
raise ValueError(f"Incorrect function name passed: {name}, choose either 'main' or 'training_function'") |
|
good_lines, found_start = [], False |
|
for line in lines: |
|
if not found_start and f"def {name}" in line: |
|
found_start = True |
|
good_lines.append(line) |
|
continue |
|
if found_start: |
|
if name == "training_function" and "def main" in line: |
|
return good_lines |
|
if name == "main" and "if __name__" in line: |
|
return good_lines |
|
good_lines.append(line) |
|
|
|
|
|
def clean_lines(lines: List[str]): |
|
""" |
|
Filters `lines` and removes any entries that start with a comment ('#') or is just a newline ('\n') |
|
|
|
Args: |
|
lines (`List[str]`): |
|
Source code of a script seperated by line. |
|
""" |
|
return [line for line in lines if not line.lstrip().startswith("#") and line != "\n"] |
|
|
|
|
|
def compare_against_test(base_filename: str, feature_filename: str, parser_only: bool, secondary_filename: str = None): |
|
""" |
|
Tests whether the additional code inside of `feature_filename` was implemented in `base_filename`. This should be |
|
used when testing to see if `complete_*_.py` examples have all of the implementations from each of the |
|
`examples/by_feature/*` scripts. |
|
|
|
It utilizes `nlp_example.py` to extract out all of the repeated training code, so that only the new additional code |
|
is examined and checked. If something *other* than `nlp_example.py` should be used, such as `cv_example.py` for the |
|
`complete_cv_example.py` script, it should be passed in for the `secondary_filename` parameter. |
|
|
|
Args: |
|
base_filename (`str` or `os.PathLike`): |
|
The filepath of a single "complete" example script to test, such as `examples/complete_cv_example.py` |
|
feature_filename (`str` or `os.PathLike`): |
|
The filepath of a single feature example script. The contents of this script are checked to see if they |
|
exist in `base_filename` |
|
parser_only (`bool`): |
|
Whether to compare only the `main()` sections in both files, or to compare the contents of |
|
`training_loop()` |
|
secondary_filename (`str`, *optional*): |
|
A potential secondary filepath that should be included in the check. This function extracts the base |
|
functionalities off of "examples/nlp_example.py", so if `base_filename` is a script other than |
|
`complete_nlp_example.py`, the template script should be included here. Such as `examples/cv_example.py` |
|
""" |
|
with open(base_filename, "r") as f: |
|
base_file_contents = f.readlines() |
|
with open(os.path.abspath(os.path.join("examples", "nlp_example.py")), "r") as f: |
|
full_file_contents = f.readlines() |
|
with open(feature_filename, "r") as f: |
|
feature_file_contents = f.readlines() |
|
if secondary_filename is not None: |
|
with open(secondary_filename, "r") as f: |
|
secondary_file_contents = f.readlines() |
|
|
|
|
|
if parser_only: |
|
base_file_func = clean_lines(get_function_contents_by_name(base_file_contents, "main")) |
|
full_file_func = clean_lines(get_function_contents_by_name(full_file_contents, "main")) |
|
feature_file_func = clean_lines(get_function_contents_by_name(feature_file_contents, "main")) |
|
if secondary_filename is not None: |
|
secondary_file_func = clean_lines(get_function_contents_by_name(secondary_file_contents, "main")) |
|
else: |
|
base_file_func = clean_lines(get_function_contents_by_name(base_file_contents, "training_function")) |
|
full_file_func = clean_lines(get_function_contents_by_name(full_file_contents, "training_function")) |
|
feature_file_func = clean_lines(get_function_contents_by_name(feature_file_contents, "training_function")) |
|
if secondary_filename is not None: |
|
secondary_file_func = clean_lines( |
|
get_function_contents_by_name(secondary_file_contents, "training_function") |
|
) |
|
|
|
_dl_line = "train_dataloader, eval_dataloader = get_dataloaders(accelerator, batch_size)\n" |
|
|
|
|
|
new_feature_code = [] |
|
passed_idxs = [] |
|
it = iter(feature_file_func) |
|
for i in range(len(feature_file_func) - 1): |
|
if i not in passed_idxs: |
|
line = next(it) |
|
if (line not in full_file_func) and (line.lstrip() != _dl_line): |
|
if "TESTING_MOCKED_DATALOADERS" not in line: |
|
new_feature_code.append(line) |
|
passed_idxs.append(i) |
|
else: |
|
|
|
_ = next(it) |
|
|
|
|
|
new_full_example_parts = [] |
|
passed_idxs = [] |
|
for i, line in enumerate(base_file_func): |
|
if i not in passed_idxs: |
|
if (line not in full_file_func) and (line.lstrip() != _dl_line): |
|
if "TESTING_MOCKED_DATALOADERS" not in line: |
|
new_full_example_parts.append(line) |
|
passed_idxs.append(i) |
|
|
|
|
|
diff_from_example = [line for line in new_feature_code if line not in new_full_example_parts] |
|
if secondary_filename is not None: |
|
diff_from_two = [line for line in full_file_contents if line not in secondary_file_func] |
|
diff_from_example = [line for line in diff_from_example if line not in diff_from_two] |
|
|
|
return diff_from_example |
|
|