Spaces:
Sleeping
Sleeping
LucasAguetai
commited on
Commit
·
2974ccd
1
Parent(s):
f234dc4
add score to squeezeBert
Browse files- modeles.py +25 -5
modeles.py
CHANGED
|
@@ -7,8 +7,7 @@ def loadSqueeze():
|
|
| 7 |
return tokenizer, model
|
| 8 |
|
| 9 |
def squeezebert(context, question, model, tokenizer):
|
| 10 |
-
#
|
| 11 |
-
# Tokenize the input question-context pair
|
| 12 |
inputs = tokenizer.encode_plus(question, context, max_length=512, truncation=True, padding=True, return_tensors='pt')
|
| 13 |
|
| 14 |
# Send inputs to the same device as your model
|
|
@@ -20,13 +19,34 @@ def squeezebert(context, question, model, tokenizer):
|
|
| 20 |
|
| 21 |
# Extract the start and end positions of the answer in the tokens
|
| 22 |
answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Convert token indices to the actual answer text
|
| 27 |
answer_tokens = inputs['input_ids'][0, answer_start_index:answer_end_index]
|
| 28 |
answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
|
|
|
|
| 7 |
return tokenizer, model
|
| 8 |
|
| 9 |
def squeezebert(context, question, model, tokenizer):
|
| 10 |
+
# Tokenize the input question-context pair
|
|
|
|
| 11 |
inputs = tokenizer.encode_plus(question, context, max_length=512, truncation=True, padding=True, return_tensors='pt')
|
| 12 |
|
| 13 |
# Send inputs to the same device as your model
|
|
|
|
| 19 |
|
| 20 |
# Extract the start and end positions of the answer in the tokens
|
| 21 |
answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits
|
| 22 |
+
|
| 23 |
+
# Calculate probabilities from logits
|
| 24 |
+
answer_start_prob = torch.softmax(answer_start_scores, dim=-1)
|
| 25 |
+
answer_end_prob = torch.softmax(answer_end_scores, dim=-1)
|
| 26 |
+
|
| 27 |
+
# Find the most likely start and end positions
|
| 28 |
+
answer_start_index = torch.argmax(answer_start_prob) # Most likely start of answer
|
| 29 |
+
answer_end_index = torch.argmax(answer_end_prob) + 1 # Most likely end of answer; +1 for inclusive slicing
|
| 30 |
+
|
| 31 |
+
# Extract the highest probability scores
|
| 32 |
+
start_score = answer_start_prob.max().item() # Highest probability of start
|
| 33 |
+
end_score = answer_end_prob.max().item() # Highest probability of end
|
| 34 |
+
|
| 35 |
+
# Combine the scores into a singular score
|
| 36 |
+
combined_score = (start_score * end_score) ** 0.5 # Geometric mean of start and end scores
|
| 37 |
|
| 38 |
# Convert token indices to the actual answer text
|
| 39 |
answer_tokens = inputs['input_ids'][0, answer_start_index:answer_end_index]
|
| 40 |
answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
|
| 41 |
+
|
| 42 |
+
# Return the answer, its positions, and the combined score
|
| 43 |
+
return {
|
| 44 |
+
"answer": answer,
|
| 45 |
+
"start": answer_start_index.item(),
|
| 46 |
+
"end": answer_end_index.item(),
|
| 47 |
+
"score": combined_score
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
|
| 51 |
|
| 52 |
|