Update README.md
Browse files
README.md
CHANGED
@@ -2,6 +2,268 @@
|
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
Test Summary:
|
6 |
|
7 |
NER:
|
|
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
|
5 |
+
from gliner import GLiNER
|
6 |
+
from utca.core import RenameAttribute
|
7 |
+
from utca.implementation.predictors import GLiNERPredictor, GLiNERPredictorConfig
|
8 |
+
from utca.implementation.tasks import (
|
9 |
+
GLiNER as UTCAGLiNER,
|
10 |
+
GLiNERPreprocessor,
|
11 |
+
GLiNERRelationExtraction,
|
12 |
+
GLiNERRelationExtractionPreprocessor,
|
13 |
+
)
|
14 |
+
import time
|
15 |
+
from typing import Dict, List
|
16 |
+
import json
|
17 |
+
|
18 |
+
def measure_time(func):
|
19 |
+
def wrapper(*args, **kwargs):
|
20 |
+
start_time = time.time()
|
21 |
+
result = func(*args, **kwargs)
|
22 |
+
end_time = time.time()
|
23 |
+
execution_time = end_time - start_time
|
24 |
+
print(f"Execution time of {func.__name__}: {execution_time:.6f} seconds")
|
25 |
+
return result
|
26 |
+
|
27 |
+
return wrapper
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
class GLiNERTester:
|
32 |
+
def __init__(self, model_name: str = r"C:\Users\doren\PycharmProjects\GlinerFineTuning\data\checkpoint-100000", device: str = "cuda:0"):
|
33 |
+
# Initialize the basic model for most tasks
|
34 |
+
self.model = GLiNER.from_pretrained(model_name)
|
35 |
+
|
36 |
+
# Initialize the relation extraction pipeline
|
37 |
+
self.predictor = GLiNERPredictor(
|
38 |
+
GLiNERPredictorConfig(
|
39 |
+
model_name=model_name,
|
40 |
+
device=device
|
41 |
+
)
|
42 |
+
)
|
43 |
+
|
44 |
+
# Build the relation extraction pipeline
|
45 |
+
self.relation_pipe = (
|
46 |
+
UTCAGLiNER(
|
47 |
+
predictor=self.predictor,
|
48 |
+
preprocess=GLiNERPreprocessor(threshold=0.5)
|
49 |
+
)
|
50 |
+
| RenameAttribute("output", "entities")
|
51 |
+
| GLiNERRelationExtraction(
|
52 |
+
predictor=self.predictor,
|
53 |
+
preprocess=(
|
54 |
+
GLiNERPreprocessor(threshold=0.5)
|
55 |
+
| GLiNERRelationExtractionPreprocessor()
|
56 |
+
)
|
57 |
+
)
|
58 |
+
)
|
59 |
+
|
60 |
+
self.results = {}
|
61 |
+
@measure_time
|
62 |
+
def test_ner(self) -> Dict:
|
63 |
+
"""Test Named Entity Recognition capabilities"""
|
64 |
+
print("\nTesting NER...")
|
65 |
+
|
66 |
+
text = """
|
67 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
|
68 |
+
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
|
69 |
+
chief executive officer, president and chief software architect, while also being the largest
|
70 |
+
individual shareholder until May 2014.
|
71 |
+
"""
|
72 |
+
|
73 |
+
labels = ["founder", "computer", "software", "position", "date"]
|
74 |
+
|
75 |
+
start_time = time.time()
|
76 |
+
entities = self.model.predict_entities(text, labels)
|
77 |
+
duration = time.time() - start_time
|
78 |
+
|
79 |
+
return {
|
80 |
+
"task": "ner",
|
81 |
+
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
|
82 |
+
"duration": duration
|
83 |
+
}
|
84 |
+
@measure_time
|
85 |
+
def test_relation_extraction(self) -> Dict:
|
86 |
+
"""Test Relation Extraction capabilities"""
|
87 |
+
print("\nTesting Relation Extraction...")
|
88 |
+
|
89 |
+
text = """
|
90 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
|
91 |
+
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
|
92 |
+
chief executive officer, president and chief software architect.
|
93 |
+
"""
|
94 |
+
|
95 |
+
start_time = time.time()
|
96 |
+
result = self.relation_pipe.run({
|
97 |
+
"text": text,
|
98 |
+
"labels": ["organisation", "founder", "position", "date"],
|
99 |
+
"relations": [{
|
100 |
+
"relation": "founder",
|
101 |
+
"pairs_filter": [("organisation", "founder")],
|
102 |
+
"distance_threshold": 100,
|
103 |
+
}, {
|
104 |
+
"relation": "inception date",
|
105 |
+
"pairs_filter": [("organisation", "date")],
|
106 |
+
}, {
|
107 |
+
"relation": "held position",
|
108 |
+
"pairs_filter": [("founder", "position")],
|
109 |
+
}]
|
110 |
+
})
|
111 |
+
duration = time.time() - start_time
|
112 |
+
|
113 |
+
return {
|
114 |
+
"task": "relation_extraction",
|
115 |
+
"relations": result["output"],
|
116 |
+
"duration": duration
|
117 |
+
}
|
118 |
+
@measure_time
|
119 |
+
def test_qa(self) -> Dict:
|
120 |
+
"""Test Question Answering capabilities"""
|
121 |
+
print("\nTesting Question Answering...")
|
122 |
+
|
123 |
+
question = "Who was the CEO of Microsoft?"
|
124 |
+
text = """
|
125 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters
|
126 |
+
for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
|
127 |
+
chief executive officer, president and chief software architect, while also being the largest
|
128 |
+
individual shareholder until May 2014.
|
129 |
+
"""
|
130 |
+
|
131 |
+
input_ = question + text
|
132 |
+
labels = ["answer"]
|
133 |
+
|
134 |
+
start_time = time.time()
|
135 |
+
answers = self.model.predict_entities(input_, labels)
|
136 |
+
duration = time.time() - start_time
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
return {
|
141 |
+
"task": "question_answering",
|
142 |
+
"answers": [{"text": a["text"], "score": a["score"]} for a in answers],
|
143 |
+
"duration": duration
|
144 |
+
}
|
145 |
+
@measure_time
|
146 |
+
def test_summarization(self) -> Dict:
|
147 |
+
"""Test Summarization capabilities"""
|
148 |
+
print("\nTesting Summarization...")
|
149 |
+
|
150 |
+
text = """
|
151 |
+
Several studies have reported its pharmacological activities, including anti-inflammatory,
|
152 |
+
antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma
|
153 |
+
MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed
|
154 |
+
a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway.
|
155 |
+
"""
|
156 |
+
|
157 |
+
prompt = "Summarize the given text, highlighting the most important information:\n"
|
158 |
+
input_ = prompt + text
|
159 |
+
labels = ["summary"]
|
160 |
+
|
161 |
+
start_time = time.time()
|
162 |
+
summaries = self.model.predict_entities(input_, labels, threshold=0.1)
|
163 |
+
duration = time.time() - start_time
|
164 |
+
|
165 |
+
return {
|
166 |
+
"task": "summarization",
|
167 |
+
"summaries": [{"text": s["text"], "score": s["score"]} for s in summaries],
|
168 |
+
"duration": duration
|
169 |
+
}
|
170 |
+
@measure_time
|
171 |
+
def test_sentiment_extraction(self) -> Dict:
|
172 |
+
"""Test Sentiment Extraction capabilities"""
|
173 |
+
print("\nTesting Sentiment Extraction...")
|
174 |
+
|
175 |
+
text = """
|
176 |
+
I recently purchased the Sony WH-1000XM4 headphones and I'm thoroughly impressed.
|
177 |
+
The noise-canceling is excellent, though the price is a bit high. The sound quality is amazing
|
178 |
+
but the app could use some improvements.
|
179 |
+
"""
|
180 |
+
|
181 |
+
labels = ["positive sentiment", "negative sentiment"]
|
182 |
+
|
183 |
+
start_time = time.time()
|
184 |
+
sentiments = self.model.predict_entities(text, labels)
|
185 |
+
duration = time.time() - start_time
|
186 |
+
|
187 |
+
return {
|
188 |
+
"task": "sentiment_extraction",
|
189 |
+
"sentiments": [{"text": s["text"], "label": s["label"], "score": s["score"]} for s in sentiments],
|
190 |
+
"duration": duration
|
191 |
+
}
|
192 |
+
@measure_time
|
193 |
+
def test_entity_disambiguation(self) -> Dict:
|
194 |
+
"""Test Entity Disambiguation capabilities"""
|
195 |
+
print("\nTesting Entity Disambiguation...")
|
196 |
+
|
197 |
+
text = """
|
198 |
+
Paris is the capital of France. Paris Hilton is an American media personality.
|
199 |
+
Mercury is a planet in our solar system. Mercury is also a chemical element.
|
200 |
+
"""
|
201 |
+
|
202 |
+
labels = ["location Paris", "person Paris", "planet Mercury", "element Mercury"]
|
203 |
+
|
204 |
+
start_time = time.time()
|
205 |
+
entities = self.model.predict_entities(text, labels)
|
206 |
+
duration = time.time() - start_time
|
207 |
+
|
208 |
+
return {
|
209 |
+
"task": "entity_disambiguation",
|
210 |
+
"entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
|
211 |
+
"duration": duration
|
212 |
+
}
|
213 |
+
|
214 |
+
def run_all_tests(self) -> Dict:
|
215 |
+
"""Run all available tests and store results"""
|
216 |
+
print("Starting GLiNER comprehensive test suite...")
|
217 |
+
|
218 |
+
self.results = {
|
219 |
+
"ner": self.test_ner(),
|
220 |
+
"relation_extraction": self.test_relation_extraction(),
|
221 |
+
"qa": self.test_qa(),
|
222 |
+
"summarization": self.test_summarization(),
|
223 |
+
"sentiment_extraction": self.test_sentiment_extraction(),
|
224 |
+
"entity_disambiguation": self.test_entity_disambiguation()
|
225 |
+
}
|
226 |
+
|
227 |
+
# Save results to JSON file
|
228 |
+
with open('gliner_test_results.json', 'w') as f:
|
229 |
+
json.dump(self.results, f, indent=4)
|
230 |
+
|
231 |
+
print("\nAll tests completed. Results saved to 'gliner_test_results.json'")
|
232 |
+
return self.results
|
233 |
+
|
234 |
+
|
235 |
+
def main():
|
236 |
+
# Initialize tester with GPU if available
|
237 |
+
try:
|
238 |
+
tester = GLiNERTester(device="cuda:0")
|
239 |
+
print("Using GPU for testing")
|
240 |
+
except:
|
241 |
+
tester = GLiNERTester(device="cpu")
|
242 |
+
print("Using CPU for testing")
|
243 |
+
|
244 |
+
# Run all tests
|
245 |
+
results = tester.run_all_tests()
|
246 |
+
|
247 |
+
# Print summary of results
|
248 |
+
print("\nTest Summary:")
|
249 |
+
for task, result in results.items():
|
250 |
+
print(f"\n{task.upper()}:")
|
251 |
+
print(f"Duration: {result['duration']:.2f} seconds")
|
252 |
+
print(f"Results: ", result)
|
253 |
+
if 'entities' in result:
|
254 |
+
print(f"Found {len(result['entities'])} entities")
|
255 |
+
elif 'answers' in result:
|
256 |
+
print(f"Found {len(result['answers'])} answers")
|
257 |
+
elif 'summaries' in result:
|
258 |
+
print(f"Generated {len(result['summaries'])} summary segments")
|
259 |
+
elif 'sentiments' in result:
|
260 |
+
print(f"Found {len(result['sentiments'])} sentiment expressions")
|
261 |
+
|
262 |
+
|
263 |
+
if __name__ == "__main__":
|
264 |
+
main()
|
265 |
+
|
266 |
+
|
267 |
Test Summary:
|
268 |
|
269 |
NER:
|