Spaces:
Paused
Paused
Merge branch 'refs/heads/main' into aysu
Browse files# Conflicts:
# src/deepeval/base_task.py
# src/deepeval/reading_comp_mc.py
- src/deepeval/base_task.py +19 -13
- src/deepeval/commonsense_reasoning_task.py +6 -6
- src/deepeval/complex_reasoning.py +5 -5
- src/deepeval/deepeval_task_manager.py +7 -0
- src/deepeval/nli.py +6 -6
- src/deepeval/reading_comp_mc.py +7 -7
- src/deepeval/sentiment_analysis_task.py +1 -1
- src/deepeval/summarization_task.py +4 -4
- src/deepeval/turkish_general_knowledge_task.py +5 -5
- svc/router.py +14 -10
src/deepeval/base_task.py
CHANGED
@@ -6,6 +6,7 @@ import openai
|
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
|
7 |
import torch
|
8 |
from typing import List
|
|
|
9 |
load_dotenv()
|
10 |
HF_TOKEN=os.getenv("HF_TOKEN")
|
11 |
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
|
@@ -16,7 +17,7 @@ class BaseTask(ABC):
|
|
16 |
def __init__(self, dataset_repo, model_name):
|
17 |
self.dataset_repo = dataset_repo
|
18 |
self.dataset = self.load_dataset_from_hf()
|
19 |
-
self.device = "
|
20 |
self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
|
21 |
openai.api_key = OPENAI_KEY
|
22 |
|
@@ -32,12 +33,15 @@ class BaseTask(ABC):
|
|
32 |
def load_model(model_name: str, device):
|
33 |
"""Loads model and tokenizer once and caches it."""
|
34 |
print(f"Loading model: {model_name}")
|
|
|
35 |
model = AutoModelForCausalLM.from_pretrained(
|
36 |
model_name,
|
37 |
torch_dtype=torch.float16,
|
38 |
device_map=device,
|
39 |
token=HF_TOKEN, # Replace with actual token
|
40 |
)
|
|
|
|
|
41 |
print("Model loaded.")
|
42 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
43 |
return model, tokenizer
|
@@ -49,8 +53,8 @@ class BaseTask(ABC):
|
|
49 |
self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
|
50 |
|
51 |
inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
|
52 |
-
input_ids = inputs.input_ids
|
53 |
-
attention_mask = inputs.attention_mask
|
54 |
|
55 |
if self.model.config.pad_token_id is None:
|
56 |
self.model.config.pad_token_id = self.tokenizer.eos_token_id
|
@@ -94,16 +98,16 @@ class BaseTask(ABC):
|
|
94 |
{"role": "user", "content": f"{msg}"},
|
95 |
]
|
96 |
formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
97 |
-
print(formatted_chat)
|
98 |
inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
|
99 |
-
input_ids = inputs.input_ids
|
100 |
-
attention_mask = inputs.attention_mask
|
101 |
|
102 |
# Generate the sequence of letters starting from 'A'
|
103 |
letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
|
104 |
encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
|
105 |
flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
|
106 |
-
print(flattened_encoded_choices)
|
107 |
|
108 |
allowed_tokens = flattened_encoded_choices
|
109 |
allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
|
@@ -154,8 +158,8 @@ class BaseTask(ABC):
|
|
154 |
)
|
155 |
|
156 |
inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
|
157 |
-
input_ids = inputs.input_ids
|
158 |
-
attention_mask = inputs.attention_mask
|
159 |
|
160 |
output = self.model.generate(
|
161 |
input_ids,
|
@@ -186,13 +190,15 @@ class BaseTask(ABC):
|
|
186 |
:return: Dataset
|
187 |
"""
|
188 |
print("Loading dataset from Hugging Face.")
|
|
|
189 |
dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
|
190 |
print("Dataset loaded.")
|
191 |
|
192 |
-
# Load
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
196 |
return dataset
|
197 |
|
198 |
@abstractmethod
|
|
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
|
7 |
import torch
|
8 |
from typing import List
|
9 |
+
from datetime import datetime
|
10 |
load_dotenv()
|
11 |
HF_TOKEN=os.getenv("HF_TOKEN")
|
12 |
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
17 |
def __init__(self, dataset_repo, model_name):
|
18 |
self.dataset_repo = dataset_repo
|
19 |
self.dataset = self.load_dataset_from_hf()
|
20 |
+
self.device = "auto" if torch.cuda.is_available() else "cpu"
|
21 |
self.model, self.tokenizer = self.get_cached_model(model_name, self.device)
|
22 |
openai.api_key = OPENAI_KEY
|
23 |
|
|
|
33 |
def load_model(model_name: str, device):
|
34 |
"""Loads model and tokenizer once and caches it."""
|
35 |
print(f"Loading model: {model_name}")
|
36 |
+
start_time = datetime.now()
|
37 |
model = AutoModelForCausalLM.from_pretrained(
|
38 |
model_name,
|
39 |
torch_dtype=torch.float16,
|
40 |
device_map=device,
|
41 |
token=HF_TOKEN, # Replace with actual token
|
42 |
)
|
43 |
+
end_time = datetime.now()
|
44 |
+
print(f"Model loaded in {(end_time - start_time).seconds} seconds.")
|
45 |
print("Model loaded.")
|
46 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
47 |
return model, tokenizer
|
|
|
53 |
self.tokenizer.pad_token = self.tokenizer.eos_token # Use EOS token as PAD token
|
54 |
|
55 |
inputs = self.tokenizer(msg, return_tensors="pt", padding=True, truncation=True)
|
56 |
+
input_ids = inputs.input_ids
|
57 |
+
attention_mask = inputs.attention_mask
|
58 |
|
59 |
if self.model.config.pad_token_id is None:
|
60 |
self.model.config.pad_token_id = self.tokenizer.eos_token_id
|
|
|
98 |
{"role": "user", "content": f"{msg}"},
|
99 |
]
|
100 |
formatted_chat = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
101 |
+
#print(formatted_chat)
|
102 |
inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
|
103 |
+
input_ids = inputs.input_ids
|
104 |
+
attention_mask = inputs.attention_mask
|
105 |
|
106 |
# Generate the sequence of letters starting from 'A'
|
107 |
letters = [chr(ord('A') + i) for i in range(len(choices))] # Create option letters A, B, C, D, E, ...
|
108 |
encoded_choices = [self.tokenizer.encode(letter, add_special_tokens=False) for letter in letters]
|
109 |
flattened_encoded_choices = [item for sublist in encoded_choices for item in sublist] # Flatten the list
|
110 |
+
#print(flattened_encoded_choices)
|
111 |
|
112 |
allowed_tokens = flattened_encoded_choices
|
113 |
allowed_tokens += self.get_chat_template_tokens() # Get the special chat tokens
|
|
|
158 |
)
|
159 |
|
160 |
inputs = self.tokenizer(formatted_chat, return_tensors="pt", padding=True, truncation=True)
|
161 |
+
input_ids = inputs.input_ids
|
162 |
+
attention_mask = inputs.attention_mask
|
163 |
|
164 |
output = self.model.generate(
|
165 |
input_ids,
|
|
|
190 |
:return: Dataset
|
191 |
"""
|
192 |
print("Loading dataset from Hugging Face.")
|
193 |
+
start_time = datetime.now()
|
194 |
dataset= load_dataset(self.dataset_repo, token=HF_TOKEN, split="train")
|
195 |
print("Dataset loaded.")
|
196 |
|
197 |
+
# Load 50 from each dataset
|
198 |
+
if len(dataset) > 50:
|
199 |
+
dataset = dataset.shuffle(seed=42).select(range(50))
|
200 |
+
end_time = datetime.now()
|
201 |
+
print(f"Dataset loaded in {(end_time - start_time).seconds} seconds.")
|
202 |
return dataset
|
203 |
|
204 |
@abstractmethod
|
src/deepeval/commonsense_reasoning_task.py
CHANGED
@@ -32,9 +32,9 @@ class CommonsenseReasoningTask(BaseTask):
|
|
32 |
context = row["context"]
|
33 |
|
34 |
# Prints for debugging
|
35 |
-
print(f"Choices: {choices}")
|
36 |
-
print("Type of choices:", type(choices))
|
37 |
-
print("Type of answer:", type(answer))
|
38 |
|
39 |
# Get answer index (starting from 0)
|
40 |
if type(answer) == int:
|
@@ -62,9 +62,9 @@ class CommonsenseReasoningTask(BaseTask):
|
|
62 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
63 |
|
64 |
# Print answers
|
65 |
-
print(f"Correct Answer: {correct_answer_letter}")
|
66 |
-
print(f"Model Answer: {model_answer}")
|
67 |
-
print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
68 |
|
69 |
# Check if correct based on metric
|
70 |
if correct_answer_letter == model_answer_cleaned:
|
|
|
32 |
context = row["context"]
|
33 |
|
34 |
# Prints for debugging
|
35 |
+
# print(f"Choices: {choices}")
|
36 |
+
# print("Type of choices:", type(choices))
|
37 |
+
# print("Type of answer:", type(answer))
|
38 |
|
39 |
# Get answer index (starting from 0)
|
40 |
if type(answer) == int:
|
|
|
62 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
63 |
|
64 |
# Print answers
|
65 |
+
# print(f"Correct Answer: {correct_answer_letter}")
|
66 |
+
# print(f"Model Answer: {model_answer}")
|
67 |
+
# print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
68 |
|
69 |
# Check if correct based on metric
|
70 |
if correct_answer_letter == model_answer_cleaned:
|
src/deepeval/complex_reasoning.py
CHANGED
@@ -33,8 +33,8 @@ class ComplexReasoningTask(BaseTask):
|
|
33 |
correct_answers.append(correct_answer_letter)
|
34 |
|
35 |
# Prints for debugging
|
36 |
-
print(f"Choices: {choices}")
|
37 |
-
print("Type of choices:", type(choices))
|
38 |
|
39 |
|
40 |
# Construct the prompt/message
|
@@ -50,9 +50,9 @@ class ComplexReasoningTask(BaseTask):
|
|
50 |
if correct_answer_letter == model_answer_cleaned:
|
51 |
true += 1
|
52 |
# Print answers
|
53 |
-
print(f"Correct Answer: {correct_answer_letter}")
|
54 |
-
print(f"Model Answer: {model_answer}")
|
55 |
-
print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
56 |
|
57 |
print("Answers:", correct_answers)
|
58 |
print("Results:", responses)
|
|
|
33 |
correct_answers.append(correct_answer_letter)
|
34 |
|
35 |
# Prints for debugging
|
36 |
+
# print(f"Choices: {choices}")
|
37 |
+
# print("Type of choices:", type(choices))
|
38 |
|
39 |
|
40 |
# Construct the prompt/message
|
|
|
50 |
if correct_answer_letter == model_answer_cleaned:
|
51 |
true += 1
|
52 |
# Print answers
|
53 |
+
# print(f"Correct Answer: {correct_answer_letter}")
|
54 |
+
# print(f"Model Answer: {model_answer}")
|
55 |
+
# print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
56 |
|
57 |
print("Answers:", correct_answers)
|
58 |
print("Results:", responses)
|
src/deepeval/deepeval_task_manager.py
CHANGED
@@ -22,6 +22,7 @@ from src.deepeval.sts import STSTask
|
|
22 |
from src.deepeval.mmlu import MMLUTask
|
23 |
from src.deepeval.bias import BiasTask
|
24 |
from typing import List
|
|
|
25 |
load_dotenv()
|
26 |
HF_TOKEN=os.getenv("HF_TOKEN")
|
27 |
|
@@ -73,15 +74,21 @@ class DeepEvalTaskManager:
|
|
73 |
def run_tasks(self):
|
74 |
"""Execute validated tasks in order."""
|
75 |
results = {}
|
|
|
76 |
for task_name, task_method in self.tasks_to_run.items():
|
77 |
try:
|
|
|
78 |
print("Running task: ", task_name)
|
79 |
task_enum = getattr(Task, task_name)
|
80 |
task_value = task_enum.value
|
81 |
results[task_value] = task_method() # Call the stored method reference
|
|
|
|
|
82 |
except Exception as e:
|
83 |
print(f"Error At Task: {task_name} - {e}")
|
84 |
continue
|
|
|
|
|
85 |
print("All tasks completed.")
|
86 |
return results
|
87 |
|
|
|
22 |
from src.deepeval.mmlu import MMLUTask
|
23 |
from src.deepeval.bias import BiasTask
|
24 |
from typing import List
|
25 |
+
from datetime import datetime
|
26 |
load_dotenv()
|
27 |
HF_TOKEN=os.getenv("HF_TOKEN")
|
28 |
|
|
|
74 |
def run_tasks(self):
|
75 |
"""Execute validated tasks in order."""
|
76 |
results = {}
|
77 |
+
total_start_time = datetime.now()
|
78 |
for task_name, task_method in self.tasks_to_run.items():
|
79 |
try:
|
80 |
+
start_time = datetime.now()
|
81 |
print("Running task: ", task_name)
|
82 |
task_enum = getattr(Task, task_name)
|
83 |
task_value = task_enum.value
|
84 |
results[task_value] = task_method() # Call the stored method reference
|
85 |
+
end_time = datetime.now()
|
86 |
+
print(f"Task {task_name} completed in {(end_time - start_time).seconds} seconds.")
|
87 |
except Exception as e:
|
88 |
print(f"Error At Task: {task_name} - {e}")
|
89 |
continue
|
90 |
+
total_end_time = datetime.now()
|
91 |
+
print(f"All tasks completed in {(total_end_time - total_start_time).seconds} seconds.")
|
92 |
print("All tasks completed.")
|
93 |
return results
|
94 |
|
src/deepeval/nli.py
CHANGED
@@ -36,9 +36,9 @@ class NLITask(BaseTask):
|
|
36 |
|
37 |
|
38 |
# Prints for debugging
|
39 |
-
print(f"Choices: {choices}")
|
40 |
-
print("Type of choices:", type(choices))
|
41 |
-
print("Label:", label)
|
42 |
|
43 |
# Construct the prompt/message
|
44 |
instruction = ""
|
@@ -53,9 +53,9 @@ class NLITask(BaseTask):
|
|
53 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
54 |
|
55 |
# Print answers
|
56 |
-
print(f"Correct Answer: {correct_answer_letter}")
|
57 |
-
print(f"Model Answer: {model_answer}")
|
58 |
-
print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
59 |
|
60 |
# Check if correct based on metric
|
61 |
if correct_answer_letter == model_answer_cleaned:
|
|
|
36 |
|
37 |
|
38 |
# Prints for debugging
|
39 |
+
# print(f"Choices: {choices}")
|
40 |
+
# print("Type of choices:", type(choices))
|
41 |
+
# print("Label:", label)
|
42 |
|
43 |
# Construct the prompt/message
|
44 |
instruction = ""
|
|
|
53 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
54 |
|
55 |
# Print answers
|
56 |
+
# print(f"Correct Answer: {correct_answer_letter}")
|
57 |
+
# print(f"Model Answer: {model_answer}")
|
58 |
+
# print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
59 |
|
60 |
# Check if correct based on metric
|
61 |
if correct_answer_letter == model_answer_cleaned:
|
src/deepeval/reading_comp_mc.py
CHANGED
@@ -32,9 +32,9 @@ class ReadingComprehensionMCTask(BaseTask):
|
|
32 |
question_about_the_text = row["question_about_the_text"]
|
33 |
|
34 |
# Prints for debugging
|
35 |
-
print(f"Choices: {choices}")
|
36 |
-
print("Type of choices:", type(choices))
|
37 |
-
print("Type of answer:", type(answer))
|
38 |
|
39 |
# Get answer index (starting from 0)
|
40 |
if type(answer) == int:
|
@@ -45,6 +45,7 @@ class ReadingComprehensionMCTask(BaseTask):
|
|
45 |
answer_index = answer_index - 1 # Because the answer is 1-indexed
|
46 |
correct_answer_letter = chr(65 + answer_index)
|
47 |
|
|
|
48 |
# Construct the prompt/message
|
49 |
instruction = ""
|
50 |
prompt = f"Paragraf:\n{text}\nSoru:{question_about_the_text}\nSeçenekler:\n{formatted_choices}\n{instruction}\n"
|
@@ -56,10 +57,9 @@ class ReadingComprehensionMCTask(BaseTask):
|
|
56 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper().replace(':','')
|
57 |
|
58 |
# Print answers
|
59 |
-
print(f"Correct Answer: {correct_answer_letter}")
|
60 |
-
print(f"Model Answer: {model_answer}")
|
61 |
-
print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
62 |
-
print(f"Result: {correct_answer_letter == model_answer_cleaned}")
|
63 |
|
64 |
# Check if correct based on metric
|
65 |
if correct_answer_letter == model_answer_cleaned:
|
|
|
32 |
question_about_the_text = row["question_about_the_text"]
|
33 |
|
34 |
# Prints for debugging
|
35 |
+
# print(f"Choices: {choices}")
|
36 |
+
# print("Type of choices:", type(choices))
|
37 |
+
# print("Type of answer:", type(answer))
|
38 |
|
39 |
# Get answer index (starting from 0)
|
40 |
if type(answer) == int:
|
|
|
45 |
answer_index = answer_index - 1 # Because the answer is 1-indexed
|
46 |
correct_answer_letter = chr(65 + answer_index)
|
47 |
|
48 |
+
|
49 |
# Construct the prompt/message
|
50 |
instruction = ""
|
51 |
prompt = f"Paragraf:\n{text}\nSoru:{question_about_the_text}\nSeçenekler:\n{formatted_choices}\n{instruction}\n"
|
|
|
57 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper().replace(':','')
|
58 |
|
59 |
# Print answers
|
60 |
+
# print(f"Correct Answer: {correct_answer_letter}")
|
61 |
+
# print(f"Model Answer: {model_answer}")
|
62 |
+
# print(f"Model Answer Cleaned: {model_answer_cleaned}")
|
|
|
63 |
|
64 |
# Check if correct based on metric
|
65 |
if correct_answer_letter == model_answer_cleaned:
|
src/deepeval/sentiment_analysis_task.py
CHANGED
@@ -23,7 +23,7 @@ class SentimentAnalysisTask(BaseTask):
|
|
23 |
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}\n {formatted_choices}"
|
24 |
messages = prompt
|
25 |
answer = self.generate_response_mcqa_multi_token(messages, choices=choices)
|
26 |
-
print("Answer:", answer)
|
27 |
responses.append(answer)
|
28 |
correct_answer_letter = "A" if row["sentiment"] == "positive" else "B" if row["sentiment"] == "negative" else "C" if row["sentiment"] == "neutral" else None
|
29 |
model_answer_cleaned = answer.strip().replace('\n', '').replace(' ', '').upper()
|
|
|
23 |
prompt = f"Verilen metin hangi duyguyu ifade ediyor? {sentence}\n {formatted_choices}"
|
24 |
messages = prompt
|
25 |
answer = self.generate_response_mcqa_multi_token(messages, choices=choices)
|
26 |
+
#print("Answer:", answer)
|
27 |
responses.append(answer)
|
28 |
correct_answer_letter = "A" if row["sentiment"] == "positive" else "B" if row["sentiment"] == "negative" else "C" if row["sentiment"] == "neutral" else None
|
29 |
model_answer_cleaned = answer.strip().replace('\n', '').replace(' ', '').upper()
|
src/deepeval/summarization_task.py
CHANGED
@@ -23,8 +23,8 @@ class SummarizationTask(BaseTask):
|
|
23 |
)
|
24 |
|
25 |
generated_summary = self.generate_response(prompt, max_new_tokens=200)
|
26 |
-
print(f"Text: {text_data}\n")
|
27 |
-
print(f"Summary: {generated_summary}\n")
|
28 |
test_case = LLMTestCase(input=text_data, actual_output=generated_summary)
|
29 |
|
30 |
metric = SummarizationMetric(
|
@@ -33,8 +33,8 @@ class SummarizationTask(BaseTask):
|
|
33 |
)
|
34 |
metric.measure(test_case)
|
35 |
|
36 |
-
print(f"Reason: {metric.reason}")
|
37 |
-
print(f"Score Breakdown: {metric.score_breakdown}")
|
38 |
results.append({
|
39 |
"index": i,
|
40 |
"score": metric.score,
|
|
|
23 |
)
|
24 |
|
25 |
generated_summary = self.generate_response(prompt, max_new_tokens=200)
|
26 |
+
# print(f"Text: {text_data}\n")
|
27 |
+
# print(f"Summary: {generated_summary}\n")
|
28 |
test_case = LLMTestCase(input=text_data, actual_output=generated_summary)
|
29 |
|
30 |
metric = SummarizationMetric(
|
|
|
33 |
)
|
34 |
metric.measure(test_case)
|
35 |
|
36 |
+
# print(f"Reason: {metric.reason}")
|
37 |
+
# print(f"Score Breakdown: {metric.score_breakdown}")
|
38 |
results.append({
|
39 |
"index": i,
|
40 |
"score": metric.score,
|
src/deepeval/turkish_general_knowledge_task.py
CHANGED
@@ -24,8 +24,8 @@ class TurkishGeneralKnowledgeTask(BaseTask):
|
|
24 |
answer_index = row["answer"] # Assuming it's zero-based index
|
25 |
difficulty = row["difficulty"]
|
26 |
|
27 |
-
print(f"Choices: {choices}")
|
28 |
-
print("Type of choices:", type(choices))
|
29 |
# Categorize difficulty
|
30 |
if difficulty <= 3:
|
31 |
category = 'easy'
|
@@ -44,15 +44,15 @@ class TurkishGeneralKnowledgeTask(BaseTask):
|
|
44 |
#"""
|
45 |
model_answer = self.generate_response_mcqa_multi_token(message, choices=choices, max_new_tokens=2)
|
46 |
responses.append(model_answer)
|
47 |
-
print(f"Correct Answer: {choices[answer_index]}")
|
48 |
-
print(f"Model Answer: {model_answer}")
|
49 |
|
50 |
#TODO: Make the cleaning in the mcqa function
|
51 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
52 |
|
53 |
# Check if the answer is correct
|
54 |
correct_answer_letter = chr(65 + answer_index)
|
55 |
-
print("Correct Answer Letter:", correct_answer_letter)
|
56 |
|
57 |
if correct_answer_letter == model_answer_cleaned:
|
58 |
true += 1
|
|
|
24 |
answer_index = row["answer"] # Assuming it's zero-based index
|
25 |
difficulty = row["difficulty"]
|
26 |
|
27 |
+
# print(f"Choices: {choices}")
|
28 |
+
# print("Type of choices:", type(choices))
|
29 |
# Categorize difficulty
|
30 |
if difficulty <= 3:
|
31 |
category = 'easy'
|
|
|
44 |
#"""
|
45 |
model_answer = self.generate_response_mcqa_multi_token(message, choices=choices, max_new_tokens=2)
|
46 |
responses.append(model_answer)
|
47 |
+
# print(f"Correct Answer: {choices[answer_index]}")
|
48 |
+
# print(f"Model Answer: {model_answer}")
|
49 |
|
50 |
#TODO: Make the cleaning in the mcqa function
|
51 |
model_answer_cleaned = model_answer.strip().replace('\n', '').replace(' ', '').upper()
|
52 |
|
53 |
# Check if the answer is correct
|
54 |
correct_answer_letter = chr(65 + answer_index)
|
55 |
+
# print("Correct Answer Letter:", correct_answer_letter)
|
56 |
|
57 |
if correct_answer_letter == model_answer_cleaned:
|
58 |
true += 1
|
svc/router.py
CHANGED
@@ -53,6 +53,7 @@ async def deep_eval_status():
|
|
53 |
@router.get("/deepeval/hardware")
|
54 |
def hardware_status():
|
55 |
info = get_gpu_tier()
|
|
|
56 |
return info
|
57 |
|
58 |
@router.post("/chat", response_model=TaskResponse)
|
@@ -155,17 +156,20 @@ def get_gpu_tier():
|
|
155 |
if not torch.cuda.is_available():
|
156 |
return {"gpu": "CPU", "tier": "cpu"}
|
157 |
|
158 |
-
|
|
|
159 |
|
160 |
-
#
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
return {"gpu": "Tesla T4", "tier": "t4-medium"}
|
164 |
-
elif "
|
165 |
-
return {"gpu": "NVIDIA L4", "tier": "l4x1"}
|
166 |
-
elif "l40s" in gpu_name:
|
167 |
-
return {"gpu": "NVIDIA L40S", "tier": "l40sx1"}
|
168 |
-
elif "a10g" in gpu_name:
|
169 |
return {"gpu": "NVIDIA A10G", "tier": "a10g"}
|
170 |
else:
|
171 |
-
return {"gpu":
|
|
|
53 |
@router.get("/deepeval/hardware")
|
54 |
def hardware_status():
|
55 |
info = get_gpu_tier()
|
56 |
+
print("Hardware Response:", info)
|
57 |
return info
|
58 |
|
59 |
@router.post("/chat", response_model=TaskResponse)
|
|
|
156 |
if not torch.cuda.is_available():
|
157 |
return {"gpu": "CPU", "tier": "cpu"}
|
158 |
|
159 |
+
device_count = torch.cuda.device_count()
|
160 |
+
gpu_names = [torch.cuda.get_device_name(i).lower() for i in range(device_count)]
|
161 |
|
162 |
+
# Count how many of each GPU type we care about
|
163 |
+
l4_count = sum("l4" in name and "l40s" not in name for name in gpu_names)
|
164 |
+
l40s_count = sum("l40s" in name for name in gpu_names)
|
165 |
+
|
166 |
+
if l4_count == device_count:
|
167 |
+
return {"gpu": "NVIDIA L4", "tier": f"l4x{l4_count}"}
|
168 |
+
elif l40s_count == device_count:
|
169 |
+
return {"gpu": "NVIDIA L40S", "tier": f"l40sx{l40s_count}"}
|
170 |
+
elif "t4" in gpu_names[0]:
|
171 |
return {"gpu": "Tesla T4", "tier": "t4-medium"}
|
172 |
+
elif "a10g" in gpu_names[0]:
|
|
|
|
|
|
|
|
|
173 |
return {"gpu": "NVIDIA A10G", "tier": "a10g"}
|
174 |
else:
|
175 |
+
return {"gpu": gpu_names[0], "tier": "unknown"}
|