|
--- |
|
license: apache-2.0 |
|
--- |
|
|
|
from gliner import GLiNER |
|
|
|
from utca.core import RenameAttribute |
|
|
|
from utca.implementation.predictors import GLiNERPredictor, GLiNERPredictorConfig |
|
|
|
from utca.implementation.tasks import ( |
|
GLiNER as UTCAGLiNER, |
|
GLiNERPreprocessor, |
|
GLiNERRelationExtraction, |
|
GLiNERRelationExtractionPreprocessor, |
|
) |
|
|
|
import time |
|
|
|
from typing import Dict, List |
|
|
|
import json |
|
|
|
def measure_time(func): |
|
def wrapper(*args, **kwargs): |
|
start_time = time.time() |
|
result = func(*args, **kwargs) |
|
end_time = time.time() |
|
execution_time = end_time - start_time |
|
print(f"Execution time of {func.__name__}: {execution_time:.6f} seconds") |
|
return result |
|
|
|
return wrapper |
|
|
|
|
|
|
|
class GLiNERTester: |
|
def __init__(self, model_name: str = r"C:\Users\doren\PycharmProjects\GlinerFineTuning\data\checkpoint-100000", device: str = "cuda:0"): |
|
# Initialize the basic model for most tasks |
|
self.model = GLiNER.from_pretrained(model_name) |
|
|
|
# Initialize the relation extraction pipeline |
|
self.predictor = GLiNERPredictor( |
|
GLiNERPredictorConfig( |
|
model_name=model_name, |
|
device=device |
|
) |
|
) |
|
|
|
# Build the relation extraction pipeline |
|
self.relation_pipe = ( |
|
UTCAGLiNER( |
|
predictor=self.predictor, |
|
preprocess=GLiNERPreprocessor(threshold=0.5) |
|
) |
|
| RenameAttribute("output", "entities") |
|
| GLiNERRelationExtraction( |
|
predictor=self.predictor, |
|
preprocess=( |
|
GLiNERPreprocessor(threshold=0.5) |
|
| GLiNERRelationExtractionPreprocessor() |
|
) |
|
) |
|
) |
|
|
|
self.results = {} |
|
@measure_time |
|
def test_ner(self) -> Dict: |
|
"""Test Named Entity Recognition capabilities""" |
|
print("\nTesting NER...") |
|
|
|
text = """ |
|
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters |
|
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, |
|
chief executive officer, president and chief software architect, while also being the largest |
|
individual shareholder until May 2014. |
|
""" |
|
|
|
labels = ["founder", "computer", "software", "position", "date"] |
|
|
|
start_time = time.time() |
|
entities = self.model.predict_entities(text, labels) |
|
duration = time.time() - start_time |
|
|
|
return { |
|
"task": "ner", |
|
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities], |
|
"duration": duration |
|
} |
|
@measure_time |
|
def test_relation_extraction(self) -> Dict: |
|
"""Test Relation Extraction capabilities""" |
|
print("\nTesting Relation Extraction...") |
|
|
|
text = """ |
|
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters |
|
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, |
|
chief executive officer, president and chief software architect. |
|
""" |
|
|
|
start_time = time.time() |
|
result = self.relation_pipe.run({ |
|
"text": text, |
|
"labels": ["organisation", "founder", "position", "date"], |
|
"relations": [{ |
|
"relation": "founder", |
|
"pairs_filter": [("organisation", "founder")], |
|
"distance_threshold": 100, |
|
}, { |
|
"relation": "inception date", |
|
"pairs_filter": [("organisation", "date")], |
|
}, { |
|
"relation": "held position", |
|
"pairs_filter": [("founder", "position")], |
|
}] |
|
}) |
|
duration = time.time() - start_time |
|
|
|
return { |
|
"task": "relation_extraction", |
|
"relations": result["output"], |
|
"duration": duration |
|
} |
|
@measure_time |
|
def test_qa(self) -> Dict: |
|
"""Test Question Answering capabilities""" |
|
print("\nTesting Question Answering...") |
|
|
|
question = "Who was the CEO of Microsoft?" |
|
text = """ |
|
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters |
|
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, |
|
chief executive officer, president and chief software architect, while also being the largest |
|
individual shareholder until May 2014. |
|
""" |
|
|
|
input_ = question + text |
|
labels = ["answer"] |
|
|
|
start_time = time.time() |
|
answers = self.model.predict_entities(input_, labels) |
|
duration = time.time() - start_time |
|
|
|
|
|
|
|
return { |
|
"task": "question_answering", |
|
"answers": [{"text": a["text"], "score": a["score"]} for a in answers], |
|
"duration": duration |
|
} |
|
@measure_time |
|
def test_summarization(self) -> Dict: |
|
"""Test Summarization capabilities""" |
|
print("\nTesting Summarization...") |
|
|
|
text = """ |
|
Several studies have reported its pharmacological activities, including anti-inflammatory, |
|
antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma |
|
MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed |
|
a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway. |
|
""" |
|
|
|
prompt = "Summarize the given text, highlighting the most important information:\n" |
|
input_ = prompt + text |
|
labels = ["summary"] |
|
|
|
start_time = time.time() |
|
summaries = self.model.predict_entities(input_, labels, threshold=0.1) |
|
duration = time.time() - start_time |
|
|
|
return { |
|
"task": "summarization", |
|
"summaries": [{"text": s["text"], "score": s["score"]} for s in summaries], |
|
"duration": duration |
|
} |
|
@measure_time |
|
def test_sentiment_extraction(self) -> Dict: |
|
"""Test Sentiment Extraction capabilities""" |
|
print("\nTesting Sentiment Extraction...") |
|
|
|
text = """ |
|
I recently purchased the Sony WH-1000XM4 headphones and I'm thoroughly impressed. |
|
The noise-canceling is excellent, though the price is a bit high. The sound quality is amazing |
|
but the app could use some improvements. |
|
""" |
|
|
|
labels = ["positive sentiment", "negative sentiment"] |
|
|
|
start_time = time.time() |
|
sentiments = self.model.predict_entities(text, labels) |
|
duration = time.time() - start_time |
|
|
|
return { |
|
"task": "sentiment_extraction", |
|
"sentiments": [{"text": s["text"], "label": s["label"], "score": s["score"]} for s in sentiments], |
|
"duration": duration |
|
} |
|
@measure_time |
|
def test_entity_disambiguation(self) -> Dict: |
|
"""Test Entity Disambiguation capabilities""" |
|
print("\nTesting Entity Disambiguation...") |
|
|
|
text = """ |
|
Paris is the capital of France. Paris Hilton is an American media personality. |
|
Mercury is a planet in our solar system. Mercury is also a chemical element. |
|
""" |
|
|
|
labels = ["location Paris", "person Paris", "planet Mercury", "element Mercury"] |
|
|
|
start_time = time.time() |
|
entities = self.model.predict_entities(text, labels) |
|
duration = time.time() - start_time |
|
|
|
return { |
|
"task": "entity_disambiguation", |
|
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities], |
|
"duration": duration |
|
} |
|
|
|
def run_all_tests(self) -> Dict: |
|
"""Run all available tests and store results""" |
|
print("Starting GLiNER comprehensive test suite...") |
|
|
|
self.results = { |
|
"ner": self.test_ner(), |
|
"relation_extraction": self.test_relation_extraction(), |
|
"qa": self.test_qa(), |
|
"summarization": self.test_summarization(), |
|
"sentiment_extraction": self.test_sentiment_extraction(), |
|
"entity_disambiguation": self.test_entity_disambiguation() |
|
} |
|
|
|
# Save results to JSON file |
|
with open('gliner_test_results.json', 'w') as f: |
|
json.dump(self.results, f, indent=4) |
|
|
|
print("\nAll tests completed. Results saved to 'gliner_test_results.json'") |
|
return self.results |
|
|
|
|
|
def main(): |
|
# Initialize tester with GPU if available |
|
try: |
|
tester = GLiNERTester(device="cuda:0") |
|
print("Using GPU for testing") |
|
except: |
|
tester = GLiNERTester(device="cpu") |
|
print("Using CPU for testing") |
|
|
|
# Run all tests |
|
results = tester.run_all_tests() |
|
|
|
# Print summary of results |
|
print("\nTest Summary:") |
|
for task, result in results.items(): |
|
print(f"\n{task.upper()}:") |
|
print(f"Duration: {result['duration']:.2f} seconds") |
|
print(f"Results: ", result) |
|
if 'entities' in result: |
|
print(f"Found {len(result['entities'])} entities") |
|
elif 'answers' in result: |
|
print(f"Found {len(result['answers'])} answers") |
|
elif 'summaries' in result: |
|
print(f"Generated {len(result['summaries'])} summary segments") |
|
elif 'sentiments' in result: |
|
print(f"Found {len(result['sentiments'])} sentiment expressions") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|
|
Test Summary: |
|
|
|
NER: |
|
Duration: 0.41 seconds |
|
Results: {'task': 'ner', 'entities': [{'text': 'Bill Gates', 'label': 'founder', 'score': 0.999995768070221}, {'text': 'Paul Allen', 'label': 'founder', 'score': 0.9999948740005493}, {'text': 'April 4, 1975', 'label': 'date', 'score': 0.9999996423721313}, {'text': 'BASIC interpreters', 'label': 'software', 'score': 0.9999961853027344}, {'text': 'Altair 8800', 'label': 'computer', 'score': 0.9999923706054688}, {'text': 'chairman', 'label': 'position', 'score': 0.9999326467514038}, {'text': 'chief executive officer', 'label': 'position', 'score': 0.9999247193336487}, {'text': 'president', 'label': 'position', 'score': 0.9999806880950928}, {'text': 'chief software architect', 'label': 'position', 'score': 0.9999625086784363}, {'text': 'largest \n individual shareholder', 'label': 'position', 'score': 0.9741785526275635}], 'duration': 0.4105691909790039} |
|
Found 10 entities |
|
|
|
RELATION_EXTRACTION: |
|
Duration: 0.31 seconds |
|
Results: {'task': 'relation_extraction', 'relations': [{'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'founder', 'target': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'score': 0.9999523162841797}, {'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'founder', 'target': {'start': 49, 'end': 59, 'span': 'Paul Allen', 'score': 0.9999998807907104, 'entity': 'founder'}, 'score': 0.999999463558197}, {'source': {'start': 9, 'end': 18, 'span': 'Microsoft', 'score': 0.9999996423721313, 'entity': 'organisation'}, 'relation': 'inception date', 'target': {'start': 63, 'end': 76, 'span': 'April 4, 1975', 'score': 1.0, 'entity': 'date'}, 'score': 0.9999998807907104}, {'source': {'start': 167, 'end': 176, 'span': 'Microsoft', 'score': 0.9999998807907104, 'entity': 'organisation'}, 'relation': 'inception date', 'target': {'start': 63, 'end': 76, 'span': 'April 4, 1975', 'score': 1.0, 'entity': 'date'}, 'score': 0.9999998807907104}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 206, 'end': 214, 'span': 'chairman', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.999997615814209}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 225, 'end': 248, 'span': 'chief executive officer', 'score': 0.9999997019767761, 'entity': 'position'}, 'score': 0.9999843835830688}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 250, 'end': 259, 'span': 'president', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.9999969005584717}, {'source': {'start': 34, 'end': 44, 'span': 'Bill Gates', 'score': 0.9999998211860657, 'entity': 'founder'}, 'relation': 'held position', 'target': {'start': 264, 'end': 288, 'span': 'chief software architect', 'score': 0.9999998807907104, 'entity': 'position'}, 'score': 0.9999908208847046}], 'duration': 0.30675745010375977} |
|
|
|
QA: |
|
Duration: 0.48 seconds |
|
Results: {'task': 'question_answering', 'answers': [{'text': 'Bill Gates', 'score': 0.9978553056716919}], 'duration': 0.4841444492340088} |
|
Found 1 answers |
|
|
|
SUMMARIZATION: |
|
Duration: 0.42 seconds |
|
Results: {'task': 'summarization', 'summaries': [{'text': 'Several studies have reported its pharmacological activities, including anti-inflammatory, \n antimicrobial, and antitumoral effects.', 'score': 0.8983121514320374}, {'text': 'The effect of E-anethole was studied in the osteosarcoma \n MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay.', 'score': 0.7457365393638611}, {'text': '25 μM with apoptosis induction through the mitochondrial-mediated pathway.', 'score': 0.8508360981941223}], 'duration': 0.41564154624938965} |
|
Generated 3 summary segments |
|
|
|
SENTIMENT_EXTRACTION: |
|
Duration: 0.36 seconds |
|
Results: {'task': 'sentiment_extraction', 'sentiments': [{'text': 'impressed', 'label': 'positive sentiment', 'score': 0.7771905660629272}, {'text': 'excellent', 'label': 'positive sentiment', 'score': 0.6963109374046326}, {'text': 'price is a bit high', 'label': 'negative sentiment', 'score': 0.8551780581474304}, {'text': 'amazing', 'label': 'positive sentiment', 'score': 0.6874173879623413}, {'text': 'app could use some improvements', 'label': 'negative sentiment', 'score': 0.7845857739448547}], 'duration': 0.358095645904541} |
|
Found 5 sentiment expressions |
|
|
|
ENTITY_DISAMBIGUATION: |
|
Duration: 0.32 seconds |
|
Results: {'task': 'entity_disambiguation', 'entities': [{'text': 'capital of France', 'label': 'location Paris', 'score': 0.8064324855804443}, {'text': 'Paris Hilton', 'label': 'person Paris', 'score': 0.9987842440605164}, {'text': 'Mercury', 'label': 'planet Mercury', 'score': 0.9934960603713989}, {'text': 'Mercury', 'label': 'planet Mercury', 'score': 0.9940248131752014}, {'text': 'chemical element', 'label': 'element Mercury', 'score': 0.9640767574310303}], 'duration': 0.32335710525512695} |
|
Found 5 entities |
|
|