submission-template

Sleeping

App Files Files Community

Zen0 commited on Jan 10

Commit

b133152

verified ·

1 Parent(s): 839f2b3

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +28 -30

tasks/text.py CHANGED Viewed

@@ -7,6 +7,12 @@ import random
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
 DESCRIPTION = "FrugalDisinfoHunter Model"
@@ -53,48 +59,40 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    #--------------------------------------------------------------------------------------------
-    # Load your model and tokenizer from Hugging Face or local path
-    #--------------------------------------------------------------------------------------------
-    from transformers import AutoTokenizer, AutoModelForSequenceClassification
-    import torch
-    # Load model from Hugging Face (adjust if you uploaded it there)
-    model_name = "Zen0/FrugalDisinfoHunter"  # Replace with your model identifier if different
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
-    #--------------------------------------------------------------------------------------------
     # Load the dataset
-    #--------------------------------------------------------------------------------------------
-    # Assuming 'quotaclimat/frugalaichallenge-text-train' is the dataset you're working with
-    dataset = load_dataset(request.dataset_name)
-    # Access the test dataset (you can change this if you want to use a different split)
-    test_dataset = dataset['test']  # Assuming you have a 'test' split available
-    #--------------------------------------------------------------------------------------------
-    # Tokenize the text data
-    #--------------------------------------------------------------------------------------------
-    # Tokenize the test data (the text field contains the quotes)
-    test_texts = test_dataset["text"]  # The field 'text' contains the climate quotes
     inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
-    #--------------------------------------------------------------------------------------------
-    # Inference
-    #--------------------------------------------------------------------------------------------
     # Run inference on the dataset using the model
     with torch.no_grad():  # Disable gradient calculations
         outputs = model(**inputs)
         logits = outputs.logits
     # Get predictions from the logits (choose the class with the highest logit)
-    predictions = torch.argmax(logits, dim=-1).cpu().numpy()  # Convert to numpy array for use
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import numpy as np
 router = APIRouter()
 DESCRIPTION = "FrugalDisinfoHunter Model"
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    # Model and Tokenizer
+    model_name = "Zen0/FrugalDisinfoHunter"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
     # Load the dataset
+    dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
+    print(dataset.keys())  # Debugging: Check available splits
+    # Assuming 'test' split is available
+    test_dataset = dataset['test']
+    # Convert the label strings to integers
+    test_dataset = test_dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
+    # Tokenize the test data
+    test_texts = test_dataset["text"]  # Extracting the 'text' column (quotes)
     inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
+    # Move model and inputs to GPU if available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    inputs = {key: val.to(device) for key, val in inputs.items()}
     # Run inference on the dataset using the model
     with torch.no_grad():  # Disable gradient calculations
         outputs = model(**inputs)
         logits = outputs.logits
     # Get predictions from the logits (choose the class with the highest logit)
+    predictions = torch.argmax(logits, dim=-1).cpu().numpy()
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE