Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -34,29 +34,29 @@ with open('config.json', 'r') as config_file:
|
|
34 |
num_args = len(config)
|
35 |
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
if num_args ==
|
45 |
# cmd args
|
46 |
# sys.argv[0] is the script name, sys.argv[1] is the first argument, etc.
|
47 |
-
should_train_model =
|
48 |
-
train_file =
|
49 |
-
test_file =
|
50 |
-
batch_size_for_trainer = int(
|
51 |
-
should_produce_eval_matrix = int(
|
52 |
-
path_to_save_trained_model_to =
|
53 |
-
|
54 |
-
print(f"should train model? : {
|
55 |
-
print (f"file to train on : {
|
56 |
-
print (f"file to evaluate on : {
|
57 |
-
print (f"batch size : {
|
58 |
-
print (f"should produce eval matrix : {
|
59 |
-
print (f"path to save trained model : {
|
60 |
|
61 |
print(f"should train model? : {should_train_model}")
|
62 |
print (f"file to train on : {train_file}")
|
@@ -331,27 +331,27 @@ if (should_train_model=='1'): #train model
|
|
331 |
# Upload the model and tokenizer to the Hugging Face repository
|
332 |
|
333 |
upload_folder(
|
334 |
-
folder_path=f"{model_save_path}",
|
335 |
-
path_in_repo=f"{model_save_path}",
|
336 |
repo_id=repo_name,
|
337 |
token=api_token,
|
338 |
-
commit_message="Push
|
339 |
#overwrite=True # Force overwrite existing files
|
340 |
)
|
341 |
|
342 |
upload_folder(
|
343 |
-
folder_path="
|
344 |
-
path_in_repo="
|
345 |
repo_id=repo_name,
|
346 |
token=api_token,
|
347 |
-
commit_message="Push
|
348 |
#overwrite=True # Force overwrite existing files
|
349 |
)
|
350 |
|
351 |
else:
|
352 |
print('Load Pre-trained')
|
353 |
-
model_save_path = "./
|
354 |
-
tokenizer_save_path = "./
|
355 |
# RobertaTokenizer.from_pretrained(model_save_path)
|
356 |
model = AutoModelForSequenceClassification.from_pretrained(model_save_path).to('cpu')
|
357 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
|
|
|
34 |
num_args = len(config)
|
35 |
|
36 |
|
37 |
+
arg1 = config.get('arg1', '1')
|
38 |
+
arg2 = config.get('arg2', 'train_fleet')
|
39 |
+
arg3 = config.get('arg3', 'test_fleet')
|
40 |
+
arg4 = config.get('arg4', '1')
|
41 |
+
arg5 = config.get('arg5', '1')
|
42 |
+
arg6 = config.get('arg6', 'saved_fleet_model')
|
43 |
+
|
44 |
+
if num_args == 6:
|
45 |
# cmd args
|
46 |
# sys.argv[0] is the script name, sys.argv[1] is the first argument, etc.
|
47 |
+
should_train_model = arg1 # should train model?
|
48 |
+
train_file = arg2 # training file name
|
49 |
+
test_file = arg3 # eval file name
|
50 |
+
batch_size_for_trainer = int(arg4) # batch sizes to send to trainer
|
51 |
+
should_produce_eval_matrix = int(arg5) # should produce matrix?
|
52 |
+
path_to_save_trained_model_to = arg6
|
53 |
+
|
54 |
+
print(f"should train model? : {arg1}")
|
55 |
+
print (f"file to train on : {arg2}")
|
56 |
+
print (f"file to evaluate on : {arg3}")
|
57 |
+
print (f"batch size : {arg4}")
|
58 |
+
print (f"should produce eval matrix : {arg5}")
|
59 |
+
print (f"path to save trained model : {arg6}")
|
60 |
|
61 |
print(f"should train model? : {should_train_model}")
|
62 |
print (f"file to train on : {train_file}")
|
|
|
331 |
# Upload the model and tokenizer to the Hugging Face repository
|
332 |
|
333 |
upload_folder(
|
334 |
+
folder_path=f"{model_save_path}_model",
|
335 |
+
path_in_repo=f"{model_save_path}_model",
|
336 |
repo_id=repo_name,
|
337 |
token=api_token,
|
338 |
+
commit_message="Push model",
|
339 |
#overwrite=True # Force overwrite existing files
|
340 |
)
|
341 |
|
342 |
upload_folder(
|
343 |
+
folder_path=f"{model_save_path}_tokenizer",
|
344 |
+
path_in_repo=f"{model_save_path}_tokenizer",
|
345 |
repo_id=repo_name,
|
346 |
token=api_token,
|
347 |
+
commit_message="Push tokenizer",
|
348 |
#overwrite=True # Force overwrite existing files
|
349 |
)
|
350 |
|
351 |
else:
|
352 |
print('Load Pre-trained')
|
353 |
+
model_save_path = f"./{model_save_path}_model"
|
354 |
+
tokenizer_save_path = f"./{model_save_path}_tokenizer"
|
355 |
# RobertaTokenizer.from_pretrained(model_save_path)
|
356 |
model = AutoModelForSequenceClassification.from_pretrained(model_save_path).to('cpu')
|
357 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
|