File size: 14,886 Bytes
d742fde e05ebfd c0f38e6 e05ebfd c0f38e6 e05ebfd c0f38e6 e05ebfd c0f38e6 e05ebfd c0f38e6 e05ebfd c0f38e6 e05ebfd d742fde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
---
license: apache-2.0
---
from gliner import GLiNER
from utca.core import RenameAttribute
from utca.implementation.predictors import GLiNERPredictor, GLiNERPredictorConfig
from utca.implementation.tasks import (
GLiNER as UTCAGLiNER,
GLiNERPreprocessor,
GLiNERRelationExtraction,
GLiNERRelationExtractionPreprocessor,
)
import time
from typing import Dict, List
import json
def measure_time(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time of {func.__name__}: {execution_time:.6f} seconds")
return result
return wrapper
class GLiNERTester:
def __init__(self, model_name: str = r"C:\Users\doren\PycharmProjects\GlinerFineTuning\data\checkpoint-100000", device: str = "cuda:0"):
# Initialize the basic model for most tasks
self.model = GLiNER.from_pretrained(model_name)
# Initialize the relation extraction pipeline
self.predictor = GLiNERPredictor(
GLiNERPredictorConfig(
model_name=model_name,
device=device
)
)
# Build the relation extraction pipeline
self.relation_pipe = (
UTCAGLiNER(
predictor=self.predictor,
preprocess=GLiNERPreprocessor(threshold=0.5)
)
| RenameAttribute("output", "entities")
| GLiNERRelationExtraction(
predictor=self.predictor,
preprocess=(
GLiNERPreprocessor(threshold=0.5)
| GLiNERRelationExtractionPreprocessor()
)
)
)
self.results = {}
@measure_time
def test_ner(self) -> Dict:
"""Test Named Entity Recognition capabilities"""
print("\nTesting NER...")
text = """
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
chief executive officer, president and chief software architect, while also being the largest
individual shareholder until May 2014.
"""
labels = ["founder", "computer", "software", "position", "date"]
start_time = time.time()
entities = self.model.predict_entities(text, labels)
duration = time.time() - start_time
return {
"task": "ner",
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
"duration": duration
}
@measure_time
def test_relation_extraction(self) -> Dict:
"""Test Relation Extraction capabilities"""
print("\nTesting Relation Extraction...")
text = """
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
chief executive officer, president and chief software architect.
"""
start_time = time.time()
result = self.relation_pipe.run({
"text": text,
"labels": ["organisation", "founder", "position", "date"],
"relations": [{
"relation": "founder",
"pairs_filter": [("organisation", "founder")],
"distance_threshold": 100,
}, {
"relation": "inception date",
"pairs_filter": [("organisation", "date")],
}, {
"relation": "held position",
"pairs_filter": [("founder", "position")],
}]
})
duration = time.time() - start_time
return {
"task": "relation_extraction",
"relations": result["output"],
"duration": duration
}
@measure_time
def test_qa(self) -> Dict:
"""Test Question Answering capabilities"""
print("\nTesting Question Answering...")
question = "Who was the CEO of Microsoft?"
text = """
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
chief executive officer, president and chief software architect, while also being the largest
individual shareholder until May 2014.
"""
input_ = question + text
labels = ["answer"]
start_time = time.time()
answers = self.model.predict_entities(input_, labels)
duration = time.time() - start_time
return {
"task": "question_answering",
"answers": [{"text": a["text"], "score": a["score"]} for a in answers],
"duration": duration
}
@measure_time
def test_summarization(self) -> Dict:
"""Test Summarization capabilities"""
print("\nTesting Summarization...")
text = """
Several studies have reported its pharmacological activities, including anti-inflammatory,
antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma
MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed
a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway.
"""
prompt = "Summarize the given text, highlighting the most important information:\n"
input_ = prompt + text
labels = ["summary"]
start_time = time.time()
summaries = self.model.predict_entities(input_, labels, threshold=0.1)
duration = time.time() - start_time
return {
"task": "summarization",
"summaries": [{"text": s["text"], "score": s["score"]} for s in summaries],
"duration": duration
}
@measure_time
def test_sentiment_extraction(self) -> Dict:
"""Test Sentiment Extraction capabilities"""
print("\nTesting Sentiment Extraction...")
text = """
I recently purchased the Sony WH-1000XM4 headphones and I'm thoroughly impressed.
The noise-canceling is excellent, though the price is a bit high. The sound quality is amazing
but the app could use some improvements.
"""
labels = ["positive sentiment", "negative sentiment"]
start_time = time.time()
sentiments = self.model.predict_entities(text, labels)
duration = time.time() - start_time
return {
"task": "sentiment_extraction",
"sentiments": [{"text": s["text"], "label": s["label"], "score": s["score"]} for s in sentiments],
"duration": duration
}
@measure_time
def test_entity_disambiguation(self) -> Dict:
"""Test Entity Disambiguation capabilities"""
print("\nTesting Entity Disambiguation...")
text = """
Paris is the capital of France. Paris Hilton is an American media personality.
Mercury is a planet in our solar system. Mercury is also a chemical element.
"""
labels = ["location Paris", "person Paris", "planet Mercury", "element Mercury"]
start_time = time.time()
entities = self.model.predict_entities(text, labels)
duration = time.time() - start_time
return {
"task": "entity_disambiguation",
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
"duration": duration
}
def run_all_tests(self) -> Dict:
"""Run all available tests and store results"""
print("Starting GLiNER comprehensive test suite...")
self.results = {
"ner": self.test_ner(),
"relation_extraction": self.test_relation_extraction(),
"qa": self.test_qa(),
"summarization": self.test_summarization(),
"sentiment_extraction": self.test_sentiment_extraction(),
"entity_disambiguation": self.test_entity_disambiguation()
}
# Save results to JSON file
with open('gliner_test_results.json', 'w') as f:
json.dump(self.results, f, indent=4)
print("\nAll tests completed. Results saved to 'gliner_test_results.json'")
return self.results
def main():
# Initialize tester with GPU if available
try:
tester = GLiNERTester(device="cuda:0")
print("Using GPU for testing")
except:
tester = GLiNERTester(device="cpu")
print("Using CPU for testing")
# Run all tests
results = tester.run_all_tests()
# Print summary of results
print("\nTest Summary:")
for task, result in results.items():
print(f"\n{task.upper()}:")
print(f"Duration: {result['duration']:.2f} seconds")
print(f"Results: ", result)
if 'entities' in result:
print(f"Found {len(result['entities'])} entities")
elif 'answers' in result:
print(f"Found {len(result['answers'])} answers")
elif 'summaries' in result:
print(f"Generated {len(result['summaries'])} summary segments")
elif 'sentiments' in result:
print(f"Found {len(result['sentiments'])} sentiment expressions")
if __name__ == "__main__":
main()
Test Summary:
NER:
Duration: 0.41 seconds
Results: {'task': 'ner', 'entities': [{'text': 'Bill Gates', 'label': 'founder', 'score': 0.999995768070221}, {'text': 'Paul Allen', 'label': 'founder', 'score': 0.9999948740005493}, {'text': 'April 4, 1975', 'label': 'date', 'score': 0.9999996423721313}, {'text': 'BASIC interpreters', 'label': 'software', 'score': 0.9999961853027344}, {'text': 'Altair 8800', 'label': 'computer', 'score': 0.9999923706054688}, {'text': 'chairman', 'label': 'position', 'score': 0.9999326467514038}, {'text': 'chief executive officer', 'label': 'position', 'score': 0.9999247193336487}, {'text': 'president', 'label': 'position', 'score': 0.9999806880950928}, {'text': 'chief software architect', 'label': 'position', 'score': 0.9999625086784363}, {'text': 'largest \n individual shareholder', 'label': 'position', 'score': 0.9741785526275635}], 'duration': 0.4105691909790039}
Found 10 entities
RELATION_EXTRACTION:
Duration: 0.31 seconds
Results: {'task': 'relation_extraction', 'relations': [{'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'founder', 'target': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'score': 0.9999523162841797}, {'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'founder', 'target': {'start': 49, 'end': 59, 'span': 'Paul Allen', 'score': 0.9999998807907104, 'entity': 'founder'}, 'score': 0.999999463558197}, {'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'inception date', 'target': {'start': 63, 'end': 76, 'span': 'April 4, 1975', 'score': 1.0, 'entity': 'date'}, 'score': 0.9999998807907104}, {'source': {'start': 167, 'end': 176, 'span': 'Microsoft', 'score': 0.9999998807907104, 'entity': 'organisation'}, 'relation': 'inception date', 'target': {'start': 63, 'end': 76, 'span': 'April 4, 1975', 'score': 1.0, 'entity': 'date'}, 'score': 0.9999998807907104}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 206, 'end': 214, 'span': 'chairman', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.999997615814209}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 225, 'end': 248, 'span': 'chief executive officer', 'score': 0.9999997019767761, 'entity': 'position'}, 'score': 0.9999843835830688}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 250, 'end': 259, 'span': 'president', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.9999969005584717}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 264, 'end': 288, 'span': 'chief software architect', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.9999908208847046}], 'duration': 0.30675745010375977}
QA:
Duration: 0.48 seconds
Results: {'task': 'question_answering', 'answers': [{'text': 'Bill Gates', 'score': 0.9978553056716919}], 'duration': 0.4841444492340088}
Found 1 answers
SUMMARIZATION:
Duration: 0.42 seconds
Results: {'task': 'summarization', 'summaries': [{'text': 'Several studies have reported its pharmacological activities, including anti-inflammatory, \n antimicrobial, and antitumoral effects.', 'score': 0.8983121514320374}, {'text': 'The effect of E-anethole was studied in the osteosarcoma \n MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay.', 'score': 0.7457365393638611}, {'text': '25 μM with apoptosis induction through the mitochondrial-mediated pathway.', 'score': 0.8508360981941223}], 'duration': 0.41564154624938965}
Generated 3 summary segments
SENTIMENT_EXTRACTION:
Duration: 0.36 seconds
Results: {'task': 'sentiment_extraction', 'sentiments': [{'text': 'impressed', 'label': 'positive sentiment', 'score': 0.7771905660629272}, {'text': 'excellent', 'label': 'positive sentiment', 'score': 0.6963109374046326}, {'text': 'price is a bit high', 'label': 'negative sentiment', 'score': 0.8551780581474304}, {'text': 'amazing', 'label': 'positive sentiment', 'score': 0.6874173879623413}, {'text': 'app could use some improvements', 'label': 'negative sentiment', 'score': 0.7845857739448547}], 'duration': 0.358095645904541}
Found 5 sentiment expressions
ENTITY_DISAMBIGUATION:
Duration: 0.32 seconds
Results: {'task': 'entity_disambiguation', 'entities': [{'text': 'capital of France', 'label': 'location Paris', 'score': 0.8064324855804443}, {'text': 'Paris Hilton', 'label': 'person Paris', 'score': 0.9987842440605164}, {'text': 'Mercury', 'label': 'planet Mercury', 'score': 0.9934960603713989}, {'text': 'Mercury', 'label': 'planet Mercury', 'score': 0.9940248131752014}, {'text': 'chemical element', 'label': 'element Mercury', 'score': 0.9640767574310303}], 'duration': 0.32335710525512695}
Found 5 entities
|