submission-template

Sleeping

App Files Files Community

Zen0 commited on Jan 10

Commit

731e8c7

verified ·

1 Parent(s): f1edb98

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +10 -29

tasks/text.py CHANGED Viewed

@@ -9,7 +9,7 @@ from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
-DESCRIPTION = "Random Baseline"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
@@ -18,9 +18,7 @@ async def evaluate_text(request: TextEvaluationRequest):
     """
     Evaluate text classification for climate disinformation detection.
-    Current Model: Random Baseline
-    - Makes random predictions from the label space (0-7)
-    - Used as a baseline for comparison
     """
     # Get space info
     username, space_url = get_space_info()
@@ -55,46 +53,29 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    #--------------------------------------------------------------------------------------------
-    # Load your model and tokenizer from Hugging Face
-    #--------------------------------------------------------------------------------------------
     model_name = "Zen0/FrugalDisinfoHunter"  # Model identifier from Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
-    #--------------------------------------------------------------------------------------------
-    # Load the dataset
-    #--------------------------------------------------------------------------------------------
-    # Assuming 'quotaclimat/frugalaichallenge-text-train' is the dataset you are working with
-    dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
-    # Access the test dataset (you can change this if you want to use a different split)
-    test_dataset = dataset['test']  # Assuming you have a 'test' split available
-    #--------------------------------------------------------------------------------------------
-    # Tokenize the text data
-    #--------------------------------------------------------------------------------------------
-    # Tokenize the test data (the text field contains the quotes)
     test_texts = test_dataset["text"]  # The field 'text' contains the climate quotes
     inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
-    #--------------------------------------------------------------------------------------------
-    # Inference
-    #--------------------------------------------------------------------------------------------
-    # Run inference on the dataset using the model
     with torch.no_grad():  # Disable gradient calculations
         outputs = model(**inputs)
         logits = outputs.logits
-    # Get predictions from the logits (choose the class with the highest logit)
     predictions = torch.argmax(logits, dim=-1).cpu().numpy()  # Convert to numpy array for use
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

 router = APIRouter()
+DESCRIPTION = "FrugalDisinfoHunter Model"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
     """
     Evaluate text classification for climate disinformation detection.
+    Current Model: FrugalDisinfoHunter
     """
     # Get space info
     username, space_url = get_space_info()
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     model_name = "Zen0/FrugalDisinfoHunter"  # Model identifier from Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
     test_texts = test_dataset["text"]  # The field 'text' contains the climate quotes
     inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
+    dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
+    # Access the test dataset
+    test_dataset = dataset['test']
     with torch.no_grad():  # Disable gradient calculations
         outputs = model(**inputs)
         logits = outputs.logits
+    # Get predictions from the logits
     predictions = torch.argmax(logits, dim=-1).cpu().numpy()  # Convert to numpy array for use
+    # Get true labels for accuracy calculation
+    true_labels = test_dataset["label"]  # Extract true labels from the dataset
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------