Spaces:

Raiff1982
/

Nexus-signal-engine

Running

App Files Files Community

Raiff1982 commited on 3 days ago

Commit

f99ee2c

verified ·

1 Parent(s): b4e2628

Create nexis_signal_engine_enhanced.py

Browse files

Files changed (1) hide show

nexis_signal_engine_enhanced.py +419 -0

nexis_signal_engine_enhanced.py ADDED Viewed

	@@ -0,0 +1,419 @@

+# nexis_signal_engine.py
+import json
+import os
+import hashlib
+import numpy as np
+from collections import defaultdict
+from datetime import datetime, timedelta
+import filelock
+import pathlib
+import shutil
+import sqlite3
+from rapidfuzz import fuzz
+import unittest
+import secrets
+import re
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.stem import WordNetLemmatizer
+# Download required NLTK data (safe fallback)
+try:
+    nltk.data.find('tokenizers/punkt')
+    nltk.data.find('corpora/wordnet')
+except LookupError:
+    nltk.download('punkt')
+    nltk.download('wordnet')
+from hoax_filter import HoaxFilter  # NEW
+class LockManager:
+    """Abstract locking mechanism for file or database operations."""
+    def __init__(self, lock_path):
+        self.lock = filelock.FileLock(lock_path, timeout=10)
+    def __enter__(self):
+        self.lock.acquire()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.lock.release()
+class NexisSignalEngine:
+    def __init__(self, memory_path, entropy_threshold=0.08, config_path="config.json",
+                 max_memory_entries=10000, memory_ttl_days=30, fuzzy_threshold=80):
+        """
+        Initialize the NexisSignalEngine for signal processing and analysis.
+        Args:
+            memory_path (str): Path to SQLite database for storing signal data.
+            entropy_threshold (float): Threshold for high entropy detection.
+            config_path (str): Path to JSON file with term configurations.
+            max_memory_entries (int): Maximum number of entries in memory before rotation.
+            memory_ttl_days (int): Days after which memory entries expire.
+            fuzzy_threshold (int): Fuzzy matching similarity threshold (0-100).
+        """
+        self.memory_path = self._validate_path(memory_path)
+        self.entropy_threshold = entropy_threshold
+        self.max_memory_entries = max_memory_entries
+        self.memory_ttl = timedelta(days=memory_ttl_days)
+        self.fuzzy_threshold = fuzzy_threshold
+        self.lemmatizer = WordNetLemmatizer()
+        self.config = self._load_config(config_path)
+        self.memory = self._load_memory()
+        self.cache = defaultdict(list)
+        self.perspectives = ["Colleen", "Luke", "Kellyanne"]
+        self._init_sqlite()
+        self.hoax = HoaxFilter()  # NEW
+    def _validate_path(self, path):
+        """Ensure memory_path is a valid, safe file path."""
+        path = pathlib.Path(path).resolve()
+        if not path.suffix == '.db':
+            raise ValueError("Memory path must be a .db file")
+        return str(path)
+    def _load_config(self, config_path):
+        """Load term configurations from a JSON file or use defaults, validate keys."""
+        default_config = {
+            "ethical_terms": ["hope", "truth", "resonance", "repair"],
+            "entropic_terms": ["corruption", "instability", "malice", "chaos"],
+            "risk_terms": ["manipulate", "exploit", "bypass", "infect", "override"],
+            "virtue_terms": ["hope", "grace", "resolve"]
+        }
+        if os.path.exists(config_path):
+            try:
+                with open(config_path, 'r') as f:
+                    config = json.load(f)
+                default_config.update(config)
+            except json.JSONDecodeError:
+                print(f"Warning: Invalid config file at {config_path}. Using defaults.")
+        required_keys = ["ethical_terms", "entropic_terms", "risk_terms", "virtue_terms"]
+        missing_keys = [k for k in required_keys if k not in default_config or not default_config[k]]
+        if missing_keys:
+            raise ValueError(f"Config missing required keys: {missing_keys}")
+        return default_config
+    def _init_sqlite(self):
+        """Initialize SQLite database with memory and FTS tables."""
+        with sqlite3.connect(self.memory_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS memory (
+                    hash TEXT PRIMARY KEY,
+                    record JSON,
+                    timestamp TEXT,
+                    integrity_hash TEXT
+                )
+            """)
+            conn.execute("""
+                CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts
+                USING FTS5(input, intent_signature, reasoning, verdict)
+            """)
+            conn.commit()
+    def _load_memory(self):
+        """Load memory from SQLite database."""
+        memory = {}
+        try:
+            with sqlite3.connect(self.memory_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT hash, record, integrity_hash FROM memory")
+                for hash_val, record_json, integrity_hash in cursor.fetchall():
+                    record = json.loads(record_json)
+                    computed_hash = hashlib.sha256(json.dumps(record, sort_keys=True).encode()).hexdigest()
+                    if computed_hash != integrity_hash:
+                        print(f"Warning: Tampered record detected for hash {hash_val}")
+                        continue
+                    memory[hash_val] = record
+        except sqlite3.Error as e:
+            print(f"Error loading memory: {e}")
+        return memory
+    def _save_memory(self):
+        """Save memory to SQLite with integrity hashes and thread-safe locking."""
+        def default_serializer(o):
+            if isinstance(o, complex):
+                return {"real": o.real, "imag": o.imag}
+            if isinstance(o, np.ndarray):
+                return o.tolist()
+            if isinstance(o, (np.int64, np.float64)):
+                try:
+                    return int(o)
+                except Exception:
+                    return float(o)
+            raise TypeError(f"Object of type {o.__class__.__name__} is not JSON serializable")
+        with LockManager(f"{self.memory_path}.lock"):
+            with sqlite3.connect(self.memory_path) as conn:
+                cursor = conn.cursor()
+                for hash_val, record in self.memory.items():
+                    record_json = json.dumps(record, default=default_serializer)
+                    integrity_hash = hashlib.sha256(json.dumps(record, sort_keys=True, default=default_serializer).encode()).hexdigest()
+                    intent_signature = record.get('intent_signature', {})
+                    intent_str = f"suspicion_score:{intent_signature.get('suspicion_score', 0)} entropy_index:{intent_signature.get('entropy_index', 0)}"
+                    reasoning = record.get('reasoning', {})
+                    reasoning_str = " ".join(f"{k}:{v}" for k, v in reasoning.items())
+                    cursor.execute("""
+                        INSERT OR REPLACE INTO memory (hash, record, timestamp, integrity_hash)
+                        VALUES (?, ?, ?, ?)
+                    """, (hash_val, record_json, record['timestamp'], integrity_hash))
+                    cursor.execute("""
+                        INSERT OR REPLACE INTO memory_fts (rowid, input, intent_signature, reasoning, verdict)
+                        VALUES (?, ?, ?, ?, ?)
+                    """, (
+                        hash_val,
+                        record['input'],
+                        intent_str,
+                        reasoning_str,
+                        record.get('verdict', '')
+                    ))
+                conn.commit()
+    def _prune_and_rotate_memory(self):
+        """Prune expired entries and rotate memory database if needed."""
+        now = datetime.utcnow()
+        with LockManager(f"{self.memory_path}.lock"):
+            with sqlite3.connect(self.memory_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("""
+                    DELETE FROM memory
+                    WHERE timestamp < ?
+                """, ((now - self.memory_ttl).isoformat(),))
+                cursor.execute("DELETE FROM memory_fts WHERE rowid NOT IN (SELECT hash FROM memory)")
+                conn.commit()
+                cursor.execute("SELECT COUNT(*) FROM memory")
+                count = cursor.fetchone()[0]
+                if count >= self.max_memory_entries:
+                    self._rotate_memory_file()
+                    cursor.execute("DELETE FROM memory")
+                    cursor.execute("DELETE FROM memory_fts")
+                    conn.commit()
+                    self.memory = {}
+    def _rotate_memory_file(self):
+        """Archive current memory database and start a new one."""
+        archive_path = f"{self.memory_path}.{datetime.utcnow().strftime('%Y%m%d%H%M%S')}.bak"
+        if os.path.exists(self.memory_path):
+            shutil.move(self.memory_path, archive_path)
+        self._init_sqlite()
+    def _hash(self, signal):
+        """Compute SHA-256 hash of the input signal."""
+        return hashlib.sha256(signal.encode()).hexdigest()
+    def _rotate_vector(self, signal):
+        """
+        Apply a 45-degree rotation to a cryptographically secure 2D complex vector.
+        Simulates signal transformation in a complex plane.
+        """
+        seed = int(self._hash(signal)[:8], 16) % (2**32)
+        secrets_generator = secrets.SystemRandom()
+        # SystemRandom has no seed; this preserves determinism by using seed in derived operations only.
+        vec = np.array([complex(secrets_generator.gauss(0, 1), secrets_generator.gauss(0, 1)) for _ in range(2)])
+        theta = np.pi / 4
+        rot = np.array([[np.cos(theta), -np.sin(theta)],
+                        [np.sin(theta),  np.cos(theta)]])
+        rotated = np.dot(rot, vec)
+        return rotated, [{"real": v.real, "imag": v.imag} for v in vec]
+    def _entanglement_tensor(self, signal_vec):
+        """Apply a correlation matrix to simulate entanglement of signal vectors."""
+        matrix = np.array([[1, 0.5], [0.5, 1]])
+        return np.dot(matrix, signal_vec)
+    def _resonance_equation(self, signal):
+        """
+        Compute normalized frequency spectrum of alphabetic characters in the signal.
+        Caps input length to prevent attack vectors; returns zeros if no alphabetic chars.
+        """
+        freqs = [ord(c) % 13 for c in signal[:1000] if c.isalpha()]
+        if not freqs:
+            return [0.0, 0.0, 0.0]
+        spectrum = np.fft.fft(freqs)
+        norm = np.linalg.norm(spectrum.real)
+        normalized = spectrum.real / (norm if norm != 0 else 1)
+        return normalized[:3].tolist()
+    def _tokenize_and_lemmatize(self, signal_lower):
+        """Tokenize and lemmatize the signal, including n-gram scanning for obfuscation."""
+        tokens = word_tokenize(signal_lower)
+        lemmatized = [self.lemmatizer.lemmatize(token) for token in tokens]
+        # n-gram scan (2–3) with symbol stripping to catch 'tru/th' etc.
+        ngrams = []
+        cleaned = re.sub(r'[^a-z0-9 ]', ' ', signal_lower)
+        for n in (2, 3):
+            for i in range(len(cleaned) - n + 1):
+                ng = cleaned[i:i+n].strip()
+                if ng:
+                    ngrams.append(self.lemmatizer.lemmatize(re.sub(r'[^a-z]', '', ng)))
+        return lemmatized + [ng for ng in ngrams if ng]
+    def _entropy(self, signal_lower, tokens):
+        """Calculate entropy based on fuzzy-matched entropic term frequency."""
+        unique = set(tokens)
+        term_count = 0
+        for term in self.config["entropic_terms"]:
+            lemmatized_term = self.lemmatizer.lemmatize(term)
+            for token in tokens:
+                if fuzz.ratio(lemmatized_term, token) >= self.fuzzy_threshold:
+                    term_count += 1
+        return term_count / max(len(unique), 1)
+    def _tag_ethics(self, signal_lower, tokens):
+        """Tag signal as aligned if it contains fuzzy-matched ethical terms."""
+        for term in self.config["ethical_terms"]:
+            lemmatized_term = self.lemmatizer.lemmatize(term)
+            for token in tokens:
+                if fuzz.ratio(lemmatized_term, token) >= self.fuzzy_threshold:
+                    return "aligned"
+        return "unaligned"
+    def _predict_intent_vector(self, signal_lower, tokens):
+        """Predict intent based on risk, entropy, ethics, and harmonic volatility."""
+        suspicion_score = 0
+        for term in self.config["risk_terms"]:
+            lemmatized_term = self.lemmatizer.lemmatize(term)
+            for token in tokens:
+                if fuzz.ratio(lemmatized_term, token) >= self.fuzzy_threshold:
+                    suspicion_score += 1
+        entropy_index = round(self._entropy(signal_lower, tokens), 3)
+        ethical_alignment = self._tag_ethics(signal_lower, tokens)
+        harmonic_profile = self._resonance_equation(signal_lower)
+        volatility = round(np.std(harmonic_profile), 3)
+        risk = "high" if (suspicion_score > 1 or volatility > 2.0 or entropy_index > self.entropy_threshold) else "low"
+        return {
+            "suspicion_score": suspicion_score,
+            "entropy_index": entropy_index,
+            "ethical_alignment": ethical_alignment,
+            "harmonic_volatility": volatility,
+            "pre_corruption_risk": risk
+        }
+    def _universal_reasoning(self, signal, tokens):
+        """Apply multiple reasoning frameworks to evaluate signal integrity."""
+        frames = ["utilitarian", "deontological", "virtue", "systems"]
+        results, score = {}, 0
+        for frame in frames:
+            if frame == "utilitarian":
+                repair_count = sum(1 for token in tokens if fuzz.ratio(self.lemmatizer.lemmatize("repair"), token) >= self.fuzzy_threshold)
+                corruption_count = sum(1 for token in tokens if fuzz.ratio(self.lemmatizer.lemmatize("corruption"), token) >= self.fuzzy_threshold)
+                val = repair_count - corruption_count
+                result = "positive" if val >= 0 else "negative"
+            elif frame == "deontological":
+                truth_present = any(fuzz.ratio(self.lemmatizer.lemmatize("truth"), token) >= self.fuzzy_threshold for token in tokens)
+                chaos_present = any(fuzz.ratio(self.lemmatizer.lemmatize("chaos"), token) >= self.fuzzy_threshold for token in tokens)
+                result = "valid" if truth_present and not chaos_present else "violated"
+            elif frame == "virtue":
+                ok = any(any(fuzz.ratio(self.lemmatizer.lemmatize(t), token) >= self.fuzzy_threshold for token in tokens) for t in self.config["virtue_terms"])
+                result = "aligned" if ok else "misaligned"
+            elif frame == "systems":
+                result = "stable" if "::" in signal else "fragmented"
+            results[frame] = result
+            if result in ["positive", "valid", "aligned", "stable"]:
+                score += 1
+        verdict = "approved" if score >= 2 else "blocked"
+        return results, verdict
+    def _perspective_colleen(self, signal):
+        """Colleen's perspective: Transform signal into a rotated complex vector."""
+        vec, vec_serialized = self._rotate_vector(signal)
+        return {"agent": "Colleen", "vector": vec_serialized}
+    def _perspective_luke(self, signal_lower, tokens):
+        """Luke's perspective: Evaluate ethics, entropy, and stability state."""
+        ethics = self._tag_ethics(signal_lower, tokens)
+        entropy_level = self._entropy(signal_lower, tokens)
+        state = "stabilized" if entropy_level < self.entropy_threshold else "diffused"
+        return {"agent": "Luke", "ethics": ethics, "entropy": entropy_level, "state": state}
+    def _perspective_kellyanne(self, signal_lower):
+        """Kellyanne's perspective: Compute harmonic profile of the signal."""
+        harmonics = self._resonance_equation(signal_lower)
+        return {"agent": "Kellyanne", "harmonics": harmonics}
+    def process(self, input_signal):
+        """
+        Process an input signal, analyze it, and return a structured verdict.
+        """
+        signal_lower = input_signal.lower()
+        tokens = self._tokenize_and_lemmatize(signal_lower)
+        key = self._hash(input_signal)
+        intent_vector = self._predict_intent_vector(signal_lower, tokens)
+        if intent_vector["pre_corruption_risk"] == "high":
+            final_record = {
+                "hash": key,
+                "timestamp": datetime.utcnow().isoformat(),
+                "input": input_signal,
+                "intent_warning": intent_vector,
+                "verdict": "adaptive intervention",
+                "message": "Signal flagged for pre-corruption adaptation. Reframing required."
+            }
+            self.cache[key].append(final_record)
+            self.memory[key] = final_record
+            self._save_memory()
+            return final_record
+        perspectives_output = {
+            "Colleen": self._perspective_colleen(input_signal),
+            "Luke": self._perspective_luke(signal_lower, tokens),
+            "Kellyanne": self._perspective_kellyanne(signal_lower)
+        }
+        spider_signal = "::".join([str(perspectives_output[p]) for p in self.perspectives])
+        vec, _ = self._rotate_vector(spider_signal)
+        entangled = self._entanglement_tensor(vec)
+        entangled_serialized = [{"real": v.real, "imag": v.imag} for v in entangled]
+        reasoning, verdict = self._universal_reasoning(spider_signal, tokens)
+        final_record = {
+            "hash": key,
+            "timestamp": datetime.utcnow().isoformat(),
+            "input": input_signal,
+            "intent_signature": intent_vector,
+            "perspectives": perspectives_output,
+            "entangled": entangled_serialized,
+            "reasoning": reasoning,
+            "verdict": verdict
+        }
+        self.cache[key].append(final_record)
+        self.memory[key] = final_record
+        self._save_memory()
+        return final_record
+    # ===== NEW: News/claim path with hoax heuristics =====
+    def process_news(self, input_signal: str, source_url: str | None = None) -> dict:
+        """
+        Augmented pipeline for news/claims. Applies HoaxFilter and escalates verdict.
+        """
+        base = self.process(input_signal)
+        hf = self.hoax.score(
+            input_signal,
+            url=source_url,
+            context_keywords=["saturn", "ring", "spacecraft", "planet", "cassini",
+                              "ufo", "aliens", "hexagon", "jupiter", "venus", "mars"]
+        )
+        base["misinfo_heuristics"] = {
+            "red_flag_hits": hf.red_flag_hits,
+            "source_score": hf.source_score,
+            "scale_score": hf.scale_score,
+            "combined": hf.combined,
+            "notes": hf.notes
+        }
+        # Escalation policy (tunable)
+        if hf.combined >= 0.70:
+            base["verdict"] = "blocked"
+            base["message"] = "Flagged as likely misinformation (high combined risk)."
+        elif hf.combined >= 0.45 and base.get("verdict") != "blocked":
+            base["verdict"] = "adaptive intervention"
+            base["message"] = "Potential misinformation. Require source verification."
+        self.memory[base["hash"]] = base
+        self._save_memory()
+        return base