Spaces:
Runtime error
Runtime error
Use environement variables with os.environ function
Browse files- app.py +1 -0
- spanish_medica_llm.py +3 -4
app.py
CHANGED
@@ -40,6 +40,7 @@ def evaluate_model():
|
|
40 |
return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
|
41 |
|
42 |
|
|
|
43 |
def train_model(*inputs):
|
44 |
if "IS_SHARED_UI" in os.environ:
|
45 |
raise gr.Error("This Space only works in duplicated instances")
|
|
|
40 |
return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
|
41 |
|
42 |
|
43 |
+
|
44 |
def train_model(*inputs):
|
45 |
if "IS_SHARED_UI" in os.environ:
|
46 |
raise gr.Error("This Space only works in duplicated instances")
|
spanish_medica_llm.py
CHANGED
@@ -25,7 +25,7 @@ from transformers import (
|
|
25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
26 |
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
|
27 |
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
28 |
-
import wandb
|
29 |
from trl import SFTTrainer
|
30 |
|
31 |
from huggingface_hub import login
|
@@ -504,7 +504,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
|
|
504 |
push_to_hub = True,
|
505 |
hub_private_repo = False,
|
506 |
hub_model_id = HUB_MODEL_ID,
|
507 |
-
warmup_steps
|
508 |
per_device_train_batch_size = MICRO_BATCH_SIZE,
|
509 |
per_device_eval_batch_size=1,
|
510 |
#gradient_checkpointing=True,
|
@@ -518,8 +518,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
|
|
518 |
save_steps = 50, # Save checkpoints every 50 steps
|
519 |
evaluation_strategy = "steps", # Evaluate the model every logging step
|
520 |
eval_steps = 50, # Evaluate and save checkpoints every 50 steps
|
521 |
-
do_eval = True, # Perform evaluation at the end of training
|
522 |
-
#report_to="wandb", # Comment this out if you don't want to use weights & baises
|
523 |
run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" , # Name of the W&B run (optional)
|
524 |
fp16=True, #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
|
525 |
bf16=False
|
|
|
25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
26 |
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
|
27 |
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
28 |
+
#import wandb
|
29 |
from trl import SFTTrainer
|
30 |
|
31 |
from huggingface_hub import login
|
|
|
504 |
push_to_hub = True,
|
505 |
hub_private_repo = False,
|
506 |
hub_model_id = HUB_MODEL_ID,
|
507 |
+
warmup_steps = 5,
|
508 |
per_device_train_batch_size = MICRO_BATCH_SIZE,
|
509 |
per_device_eval_batch_size=1,
|
510 |
#gradient_checkpointing=True,
|
|
|
518 |
save_steps = 50, # Save checkpoints every 50 steps
|
519 |
evaluation_strategy = "steps", # Evaluate the model every logging step
|
520 |
eval_steps = 50, # Evaluate and save checkpoints every 50 steps
|
521 |
+
do_eval = True, # Perform evaluation at the end of training
|
|
|
522 |
run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" , # Name of the W&B run (optional)
|
523 |
fp16=True, #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
|
524 |
bf16=False
|