Zen0 commited on
Commit
b133152
·
verified ·
1 Parent(s): 839f2b3

Update tasks/text.py

Browse files
Files changed (1) hide show
  1. tasks/text.py +28 -30
tasks/text.py CHANGED
@@ -7,6 +7,12 @@ import random
7
  from .utils.evaluation import TextEvaluationRequest
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
 
 
 
 
 
 
10
  router = APIRouter()
11
 
12
  DESCRIPTION = "FrugalDisinfoHunter Model"
@@ -53,48 +59,40 @@ async def evaluate_text(request: TextEvaluationRequest):
53
  # YOUR MODEL INFERENCE CODE HERE
54
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
55
  #--------------------------------------------------------------------------------------------
56
- #--------------------------------------------------------------------------------------------
57
- # Load your model and tokenizer from Hugging Face or local path
58
- #--------------------------------------------------------------------------------------------
59
 
60
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
61
- import torch
62
 
63
- # Load model from Hugging Face (adjust if you uploaded it there)
64
- model_name = "Zen0/FrugalDisinfoHunter" # Replace with your model identifier if different
65
  tokenizer = AutoTokenizer.from_pretrained(model_name)
66
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
67
 
68
- #--------------------------------------------------------------------------------------------
69
  # Load the dataset
70
- #--------------------------------------------------------------------------------------------
71
-
72
- # Assuming 'quotaclimat/frugalaichallenge-text-train' is the dataset you're working with
73
- dataset = load_dataset(request.dataset_name)
74
-
75
- # Access the test dataset (you can change this if you want to use a different split)
76
- test_dataset = dataset['test'] # Assuming you have a 'test' split available
77
-
78
- #--------------------------------------------------------------------------------------------
79
- # Tokenize the text data
80
- #--------------------------------------------------------------------------------------------
81
-
82
- # Tokenize the test data (the text field contains the quotes)
83
- test_texts = test_dataset["text"] # The field 'text' contains the climate quotes
84
-
85
  inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
86
-
87
- #--------------------------------------------------------------------------------------------
88
- # Inference
89
- #--------------------------------------------------------------------------------------------
90
-
 
91
  # Run inference on the dataset using the model
92
  with torch.no_grad(): # Disable gradient calculations
93
  outputs = model(**inputs)
94
  logits = outputs.logits
95
-
96
  # Get predictions from the logits (choose the class with the highest logit)
97
- predictions = torch.argmax(logits, dim=-1).cpu().numpy() # Convert to numpy array for use
 
98
 
99
  #--------------------------------------------------------------------------------------------
100
  # YOUR MODEL INFERENCE STOPS HERE
 
7
  from .utils.evaluation import TextEvaluationRequest
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
+ import torch
12
+
13
+ import numpy as np
14
+
15
+
16
  router = APIRouter()
17
 
18
  DESCRIPTION = "FrugalDisinfoHunter Model"
 
59
  # YOUR MODEL INFERENCE CODE HERE
60
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
61
  #--------------------------------------------------------------------------------------------
 
 
 
62
 
 
 
63
 
64
+ # Model and Tokenizer
65
+ model_name = "Zen0/FrugalDisinfoHunter"
66
  tokenizer = AutoTokenizer.from_pretrained(model_name)
67
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
68
 
 
69
  # Load the dataset
70
+ dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
71
+ print(dataset.keys()) # Debugging: Check available splits
72
+
73
+ # Assuming 'test' split is available
74
+ test_dataset = dataset['test']
75
+
76
+ # Convert the label strings to integers
77
+ test_dataset = test_dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
78
+
79
+ # Tokenize the test data
80
+ test_texts = test_dataset["text"] # Extracting the 'text' column (quotes)
 
 
 
 
81
  inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
82
+
83
+ # Move model and inputs to GPU if available
84
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
+ model.to(device)
86
+ inputs = {key: val.to(device) for key, val in inputs.items()}
87
+
88
  # Run inference on the dataset using the model
89
  with torch.no_grad(): # Disable gradient calculations
90
  outputs = model(**inputs)
91
  logits = outputs.logits
92
+
93
  # Get predictions from the logits (choose the class with the highest logit)
94
+ predictions = torch.argmax(logits, dim=-1).cpu().numpy()
95
+
96
 
97
  #--------------------------------------------------------------------------------------------
98
  # YOUR MODEL INFERENCE STOPS HERE