ClovenDoug
/

gliner-finetuned

PyTorch

Model card Files Files and versions Community

ClovenDoug commited on Dec 15, 2024

Commit

e05ebfd

verified ·

1 Parent(s): d742fde

Update README.md

Browse files

Files changed (1) hide show

README.md +262 -0

README.md CHANGED Viewed

@@ -2,6 +2,268 @@
 license: apache-2.0
 ---
 Test Summary:
 NER:

 license: apache-2.0
 ---
+from gliner import GLiNER
+from utca.core import RenameAttribute
+from utca.implementation.predictors import GLiNERPredictor, GLiNERPredictorConfig
+from utca.implementation.tasks import (
+    GLiNER as UTCAGLiNER,
+    GLiNERPreprocessor,
+    GLiNERRelationExtraction,
+    GLiNERRelationExtractionPreprocessor,
+)
+import time
+from typing import Dict, List
+import json
+def measure_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(f"Execution time of {func.__name__}: {execution_time:.6f} seconds")
+        return result
+    return wrapper
+class GLiNERTester:
+    def __init__(self, model_name: str = r"C:\Users\doren\PycharmProjects\GlinerFineTuning\data\checkpoint-100000", device: str = "cuda:0"):
+        # Initialize the basic model for most tasks
+        self.model = GLiNER.from_pretrained(model_name)
+        # Initialize the relation extraction pipeline
+        self.predictor = GLiNERPredictor(
+            GLiNERPredictorConfig(
+                model_name=model_name,
+                device=device
+            )
+        )
+        # Build the relation extraction pipeline
+        self.relation_pipe = (
+                UTCAGLiNER(
+                    predictor=self.predictor,
+                    preprocess=GLiNERPreprocessor(threshold=0.5)
+                )
+                | RenameAttribute("output", "entities")
+                | GLiNERRelationExtraction(
+            predictor=self.predictor,
+            preprocess=(
+                    GLiNERPreprocessor(threshold=0.5)
+                    | GLiNERRelationExtractionPreprocessor()
+            )
+        )
+        )
+        self.results = {}
+    @measure_time
+    def test_ner(self) -> Dict:
+        """Test Named Entity Recognition capabilities"""
+        print("\nTesting NER...")
+        text = """
+        Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
+        for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
+        chief executive officer, president and chief software architect, while also being the largest
+        individual shareholder until May 2014.
+        """
+        labels = ["founder", "computer", "software", "position", "date"]
+        start_time = time.time()
+        entities = self.model.predict_entities(text, labels)
+        duration = time.time() - start_time
+        return {
+            "task": "ner",
+            "entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
+            "duration": duration
+        }
+    @measure_time
+    def test_relation_extraction(self) -> Dict:
+        """Test Relation Extraction capabilities"""
+        print("\nTesting Relation Extraction...")
+        text = """
+        Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters
+        for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
+        chief executive officer, president and chief software architect.
+        """
+        start_time = time.time()
+        result = self.relation_pipe.run({
+            "text": text,
+            "labels": ["organisation", "founder", "position", "date"],
+            "relations": [{
+                "relation": "founder",
+                "pairs_filter": [("organisation", "founder")],
+                "distance_threshold": 100,
+            }, {
+                "relation": "inception date",
+                "pairs_filter": [("organisation", "date")],
+            }, {
+                "relation": "held position",
+                "pairs_filter": [("founder", "position")],
+            }]
+        })
+        duration = time.time() - start_time
+        return {
+            "task": "relation_extraction",
+            "relations": result["output"],
+            "duration": duration
+        }
+    @measure_time
+    def test_qa(self) -> Dict:
+        """Test Question Answering capabilities"""
+        print("\nTesting Question Answering...")
+        question = "Who was the CEO of Microsoft?"
+        text = """
+        Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters
+        for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman,
+        chief executive officer, president and chief software architect, while also being the largest
+        individual shareholder until May 2014.
+        """
+        input_ = question + text
+        labels = ["answer"]
+        start_time = time.time()
+        answers = self.model.predict_entities(input_, labels)
+        duration = time.time() - start_time
+        return {
+            "task": "question_answering",
+            "answers": [{"text": a["text"], "score": a["score"]} for a in answers],
+            "duration": duration
+        }
+    @measure_time
+    def test_summarization(self) -> Dict:
+        """Test Summarization capabilities"""
+        print("\nTesting Summarization...")
+        text = """
+        Several studies have reported its pharmacological activities, including anti-inflammatory,
+        antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma
+        MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed
+        a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway.
+        """
+        prompt = "Summarize the given text, highlighting the most important information:\n"
+        input_ = prompt + text
+        labels = ["summary"]
+        start_time = time.time()
+        summaries = self.model.predict_entities(input_, labels, threshold=0.1)
+        duration = time.time() - start_time
+        return {
+            "task": "summarization",
+            "summaries": [{"text": s["text"], "score": s["score"]} for s in summaries],
+            "duration": duration
+        }
+    @measure_time
+    def test_sentiment_extraction(self) -> Dict:
+        """Test Sentiment Extraction capabilities"""
+        print("\nTesting Sentiment Extraction...")
+        text = """
+        I recently purchased the Sony WH-1000XM4 headphones and I'm thoroughly impressed.
+        The noise-canceling is excellent, though the price is a bit high. The sound quality is amazing
+        but the app could use some improvements.
+        """
+        labels = ["positive sentiment", "negative sentiment"]
+        start_time = time.time()
+        sentiments = self.model.predict_entities(text, labels)
+        duration = time.time() - start_time
+        return {
+            "task": "sentiment_extraction",
+            "sentiments": [{"text": s["text"], "label": s["label"], "score": s["score"]} for s in sentiments],
+            "duration": duration
+        }
+    @measure_time
+    def test_entity_disambiguation(self) -> Dict:
+        """Test Entity Disambiguation capabilities"""
+        print("\nTesting Entity Disambiguation...")
+        text = """
+        Paris is the capital of France. Paris Hilton is an American media personality.
+        Mercury is a planet in our solar system. Mercury is also a chemical element.
+        """
+        labels = ["location Paris", "person Paris", "planet Mercury", "element Mercury"]
+        start_time = time.time()
+        entities = self.model.predict_entities(text, labels)
+        duration = time.time() - start_time
+        return {
+            "task": "entity_disambiguation",
+            "entities": [{"text": e["text"], "label": e["label"], "score": e["score"]} for e in entities],
+            "duration": duration
+        }
+    def run_all_tests(self) -> Dict:
+        """Run all available tests and store results"""
+        print("Starting GLiNER comprehensive test suite...")
+        self.results = {
+            "ner": self.test_ner(),
+            "relation_extraction": self.test_relation_extraction(),
+            "qa": self.test_qa(),
+            "summarization": self.test_summarization(),
+            "sentiment_extraction": self.test_sentiment_extraction(),
+            "entity_disambiguation": self.test_entity_disambiguation()
+        }
+        # Save results to JSON file
+        with open('gliner_test_results.json', 'w') as f:
+            json.dump(self.results, f, indent=4)
+        print("\nAll tests completed. Results saved to 'gliner_test_results.json'")
+        return self.results
+def main():
+    # Initialize tester with GPU if available
+    try:
+        tester = GLiNERTester(device="cuda:0")
+        print("Using GPU for testing")
+    except:
+        tester = GLiNERTester(device="cpu")
+        print("Using CPU for testing")
+    # Run all tests
+    results = tester.run_all_tests()
+    # Print summary of results
+    print("\nTest Summary:")
+    for task, result in results.items():
+        print(f"\n{task.upper()}:")
+        print(f"Duration: {result['duration']:.2f} seconds")
+        print(f"Results: ", result)
+        if 'entities' in result:
+            print(f"Found {len(result['entities'])} entities")
+        elif 'answers' in result:
+            print(f"Found {len(result['answers'])} answers")
+        elif 'summaries' in result:
+            print(f"Generated {len(result['summaries'])} summary segments")
+        elif 'sentiments' in result:
+            print(f"Found {len(result['sentiments'])} sentiment expressions")
+if __name__ == "__main__":
+    main()
 Test Summary:
 NER: