ClovenDoug commited on
Commit
e05ebfd
·
verified ·
1 Parent(s): d742fde

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +262 -0
README.md CHANGED
@@ -2,6 +2,268 @@
2
  license: apache-2.0
3
  ---
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  Test Summary:
6
 
7
  NER:
 
2
  license: apache-2.0
3
  ---
4
 
5
+ from gliner import GLiNER
6
+ from utca.core import RenameAttribute
7
+ from utca.implementation.predictors import GLiNERPredictor, GLiNERPredictorConfig
8
+ from utca.implementation.tasks import (
9
+ GLiNER as UTCAGLiNER,
10
+ GLiNERPreprocessor,
11
+ GLiNERRelationExtraction,
12
+ GLiNERRelationExtractionPreprocessor,
13
+ )
14
+ import time
15
+ from typing import Dict, List
16
+ import json
17
+
18
+ def measure_time(func):
19
+ def wrapper(*args, **kwargs):
20
+ start_time = time.time()
21
+ result = func(*args, **kwargs)
22
+ end_time = time.time()
23
+ execution_time = end_time - start_time
24
+ print(f"Execution time of {func.__name__}: {execution_time:.6f} seconds")
25
+ return result
26
+
27
+ return wrapper
28
+
29
+
30
+
31
+ class GLiNERTester:
32
+ def __init__(self, model_name: str = r"C:\Users\doren\PycharmProjects\GlinerFineTuning\data\checkpoint-100000", device: str = "cuda:0"):
33
+ # Initialize the basic model for most tasks
34
+ self.model = GLiNER.from_pretrained(model_name)
35
+
36
+ # Initialize the relation extraction pipeline
37
+ self.predictor = GLiNERPredictor(
38
+ GLiNERPredictorConfig(
39
+ model_name=model_name,
40
+ device=device
41
+ )
42
+ )
43
+
44
+ # Build the relation extraction pipeline
45
+ self.relation_pipe = (
46
+ UTCAGLiNER(
47
+ predictor=self.predictor,
48
+ preprocess=GLiNERPreprocessor(threshold=0.5)
49
+ )
50
+ | RenameAttribute("output", "entities")
51
+ | GLiNERRelationExtraction(
52
+ predictor=self.predictor,
53
+ preprocess=(
54
+ GLiNERPreprocessor(threshold=0.5)
55
+ | GLiNERRelationExtractionPreprocessor()
56
+ )
57
+ )
58
+ )
59
+
60
+ self.results = {}
61
+ @measure_time
62
+ def test_ner(self) -> Dict:
63
+ """Test Named Entity Recognition capabilities"""
64
+ print("\nTesting NER...")
65
+
66
+ text = """
67
+ Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
68
+ for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
69
+ chief executive officer, president and chief software architect, while also being the largest
70
+ individual shareholder until May 2014.
71
+ """
72
+
73
+ labels = ["founder", "computer", "software", "position", "date"]
74
+
75
+ start_time = time.time()
76
+ entities = self.model.predict_entities(text, labels)
77
+ duration = time.time() - start_time
78
+
79
+ return {
80
+ "task": "ner",
81
+ "entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
82
+ "duration": duration
83
+ }
84
+ @measure_time
85
+ def test_relation_extraction(self) -> Dict:
86
+ """Test Relation Extraction capabilities"""
87
+ print("\nTesting Relation Extraction...")
88
+
89
+ text = """
90
+ Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
91
+ for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
92
+ chief executive officer, president and chief software architect.
93
+ """
94
+
95
+ start_time = time.time()
96
+ result = self.relation_pipe.run({
97
+ "text": text,
98
+ "labels": ["organisation", "founder", "position", "date"],
99
+ "relations": [{
100
+ "relation": "founder",
101
+ "pairs_filter": [("organisation", "founder")],
102
+ "distance_threshold": 100,
103
+ }, {
104
+ "relation": "inception date",
105
+ "pairs_filter": [("organisation", "date")],
106
+ }, {
107
+ "relation": "held position",
108
+ "pairs_filter": [("founder", "position")],
109
+ }]
110
+ })
111
+ duration = time.time() - start_time
112
+
113
+ return {
114
+ "task": "relation_extraction",
115
+ "relations": result["output"],
116
+ "duration": duration
117
+ }
118
+ @measure_time
119
+ def test_qa(self) -> Dict:
120
+ """Test Question Answering capabilities"""
121
+ print("\nTesting Question Answering...")
122
+
123
+ question = "Who was the CEO of Microsoft?"
124
+ text = """
125
+ Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters
126
+ for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
127
+ chief executive officer, president and chief software architect, while also being the largest
128
+ individual shareholder until May 2014.
129
+ """
130
+
131
+ input_ = question + text
132
+ labels = ["answer"]
133
+
134
+ start_time = time.time()
135
+ answers = self.model.predict_entities(input_, labels)
136
+ duration = time.time() - start_time
137
+
138
+
139
+
140
+ return {
141
+ "task": "question_answering",
142
+ "answers": [{"text": a["text"], "score": a["score"]} for a in answers],
143
+ "duration": duration
144
+ }
145
+ @measure_time
146
+ def test_summarization(self) -> Dict:
147
+ """Test Summarization capabilities"""
148
+ print("\nTesting Summarization...")
149
+
150
+ text = """
151
+ Several studies have reported its pharmacological activities, including anti-inflammatory,
152
+ antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma
153
+ MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed
154
+ a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway.
155
+ """
156
+
157
+ prompt = "Summarize the given text, highlighting the most important information:\n"
158
+ input_ = prompt + text
159
+ labels = ["summary"]
160
+
161
+ start_time = time.time()
162
+ summaries = self.model.predict_entities(input_, labels, threshold=0.1)
163
+ duration = time.time() - start_time
164
+
165
+ return {
166
+ "task": "summarization",
167
+ "summaries": [{"text": s["text"], "score": s["score"]} for s in summaries],
168
+ "duration": duration
169
+ }
170
+ @measure_time
171
+ def test_sentiment_extraction(self) -> Dict:
172
+ """Test Sentiment Extraction capabilities"""
173
+ print("\nTesting Sentiment Extraction...")
174
+
175
+ text = """
176
+ I recently purchased the Sony WH-1000XM4 headphones and I'm thoroughly impressed.
177
+ The noise-canceling is excellent, though the price is a bit high. The sound quality is amazing
178
+ but the app could use some improvements.
179
+ """
180
+
181
+ labels = ["positive sentiment", "negative sentiment"]
182
+
183
+ start_time = time.time()
184
+ sentiments = self.model.predict_entities(text, labels)
185
+ duration = time.time() - start_time
186
+
187
+ return {
188
+ "task": "sentiment_extraction",
189
+ "sentiments": [{"text": s["text"], "label": s["label"], "score": s["score"]} for s in sentiments],
190
+ "duration": duration
191
+ }
192
+ @measure_time
193
+ def test_entity_disambiguation(self) -> Dict:
194
+ """Test Entity Disambiguation capabilities"""
195
+ print("\nTesting Entity Disambiguation...")
196
+
197
+ text = """
198
+ Paris is the capital of France. Paris Hilton is an American media personality.
199
+ Mercury is a planet in our solar system. Mercury is also a chemical element.
200
+ """
201
+
202
+ labels = ["location Paris", "person Paris", "planet Mercury", "element Mercury"]
203
+
204
+ start_time = time.time()
205
+ entities = self.model.predict_entities(text, labels)
206
+ duration = time.time() - start_time
207
+
208
+ return {
209
+ "task": "entity_disambiguation",
210
+ "entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
211
+ "duration": duration
212
+ }
213
+
214
+ def run_all_tests(self) -> Dict:
215
+ """Run all available tests and store results"""
216
+ print("Starting GLiNER comprehensive test suite...")
217
+
218
+ self.results = {
219
+ "ner": self.test_ner(),
220
+ "relation_extraction": self.test_relation_extraction(),
221
+ "qa": self.test_qa(),
222
+ "summarization": self.test_summarization(),
223
+ "sentiment_extraction": self.test_sentiment_extraction(),
224
+ "entity_disambiguation": self.test_entity_disambiguation()
225
+ }
226
+
227
+ # Save results to JSON file
228
+ with open('gliner_test_results.json', 'w') as f:
229
+ json.dump(self.results, f, indent=4)
230
+
231
+ print("\nAll tests completed. Results saved to 'gliner_test_results.json'")
232
+ return self.results
233
+
234
+
235
+ def main():
236
+ # Initialize tester with GPU if available
237
+ try:
238
+ tester = GLiNERTester(device="cuda:0")
239
+ print("Using GPU for testing")
240
+ except:
241
+ tester = GLiNERTester(device="cpu")
242
+ print("Using CPU for testing")
243
+
244
+ # Run all tests
245
+ results = tester.run_all_tests()
246
+
247
+ # Print summary of results
248
+ print("\nTest Summary:")
249
+ for task, result in results.items():
250
+ print(f"\n{task.upper()}:")
251
+ print(f"Duration: {result['duration']:.2f} seconds")
252
+ print(f"Results: ", result)
253
+ if 'entities' in result:
254
+ print(f"Found {len(result['entities'])} entities")
255
+ elif 'answers' in result:
256
+ print(f"Found {len(result['answers'])} answers")
257
+ elif 'summaries' in result:
258
+ print(f"Generated {len(result['summaries'])} summary segments")
259
+ elif 'sentiments' in result:
260
+ print(f"Found {len(result['sentiments'])} sentiment expressions")
261
+
262
+
263
+ if __name__ == "__main__":
264
+ main()
265
+
266
+
267
  Test Summary:
268
 
269
  NER: