File size: 2,057 Bytes
7fc87fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import logging
from datetime import datetime

class HeuristicManager:
    def __init__(self, model, log_file="heuristic_log.txt", min_similarity_threshold=0.5, min_new_data_len=50):
        self.model = model
        self.min_similarity_threshold = min_similarity_threshold
        self.min_new_data_len = min_new_data_len
        self.log_file = log_file
        logging.basicConfig(filename=self.log_file, level=logging.INFO)

    def log(self, message):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        logging.info(f"[{timestamp}] {message}")
        print(f"[{timestamp}] {message}")

    def check_similarity(self, test_terms):
        triggers = []
        for term in test_terms:
            try:
                sim = self.model.wv.most_similar(term)[0][1]
                if sim < self.min_similarity_threshold:
                    triggers.append(f"Low similarity for '{term}': {sim}")
            except KeyError:
                triggers.append(f"'{term}' not in vocabulary")
        return triggers

    def check_metadata(self, metadata):
        triggers = []
        if any(keyword in str(metadata).lower() for keyword in ["haplogroup b", "eastasia", "asian"]):
            triggers.append("Detected new haplogroup or regional bias: 'Asian' or 'B'")
        return triggers

    def check_new_data_volume(self, new_data):
        if len(new_data) < self.min_new_data_len:
            return ["Not enough new data to justify retraining"]
        return []

    def should_retrain(self, test_terms, new_data, metadata):
        triggers = []
        triggers += self.check_similarity(test_terms)
        triggers += self.check_metadata(metadata)
        triggers += self.check_new_data_volume(new_data)

        if triggers:
            self.log("Retraining triggered due to:")
            for trigger in triggers:
                self.log(f" - {trigger}")
            return True
        else:
            self.log("No retraining needed.")
            return False