Spaces:

tejash300
/

docanalyzer

Runtime error

tejash300 commited on Mar 31

Commit

d629e1d

verified ·

1 Parent(s): 64af888

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,7 +70,7 @@ def fine_tune_cuad_model():
     """
     Fine tunes a QA model on the CUAD dataset for clause extraction.
     For testing, we use only 50 training examples (and 10 for validation)
-    and restrict training to 10 steps.
     """
     from datasets import load_dataset
     import numpy as np
@@ -149,20 +149,19 @@ def fine_tune_cuad_model():
     train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
     val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
-    # Set max_steps to 10 for fast testing.
     training_args = TrainingArguments(
         output_dir="./fine_tuned_legal_qa",
-        max_steps=10,
-        evaluation_strategy="steps",
-        eval_steps=5,
         learning_rate=2e-5,
         per_device_train_batch_size=4,
         per_device_eval_batch_size=4,
         num_train_epochs=1,
         weight_decay=0.01,
         logging_steps=1,
-        save_steps=5,
-        load_best_model_at_end=True,
         report_to=[]  # Disable wandb logging
     )
@@ -191,7 +190,7 @@ def fine_tune_cuad_model():
 try:
     try:
         nlp = spacy.load("en_core_web_sm")
-    except:
         spacy.cli.download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
     print("✅ Loading NLP models...")

     """
     Fine tunes a QA model on the CUAD dataset for clause extraction.
     For testing, we use only 50 training examples (and 10 for validation)
+    and restrict training to 1 step with evaluation disabled.
     """
     from datasets import load_dataset
     import numpy as np
     train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
     val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
+    # Set max_steps to 1 for very fast testing and disable evaluation
     training_args = TrainingArguments(
         output_dir="./fine_tuned_legal_qa",
+        max_steps=1,                   # Only one training step
+        evaluation_strategy="no",      # Disable evaluation during training
         learning_rate=2e-5,
         per_device_train_batch_size=4,
         per_device_eval_batch_size=4,
         num_train_epochs=1,
         weight_decay=0.01,
         logging_steps=1,
+        save_steps=1,
+        load_best_model_at_end=False,
         report_to=[]  # Disable wandb logging
     )
 try:
     try:
         nlp = spacy.load("en_core_web_sm")
+    except Exception:
         spacy.cli.download("en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
     print("✅ Loading NLP models...")