HIM-self / src /core /foundation_layer.py
Takk8IS
Initial HIM implementation
fbebf66
raw
history blame
1.22 kB
import torch
import torch.nn as nn
from transformers import AutoModel, AutoConfig
class FoundationLayer(nn.Module):
def __init__(self, model_name: str = "gpt2-xl"):
super().__init__()
self.config = AutoConfig.from_pretrained(model_name)
self.transformer = AutoModel.from_pretrained(model_name)
self.sparse_router = MixtureOfExperts(
num_experts=128,
input_size=self.config.hidden_size
)
def forward(self, input_ids, attention_mask=None):
transformer_output = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask
)
routed_output = self.sparse_router(transformer_output.last_hidden_state)
return self._process_consciousness_emergence(routed_output)
class MixtureOfExperts(nn.Module):
def __init__(self, num_experts: int, input_size: int):
super().__init__()
self.num_experts = num_experts
self.gate = nn.Linear(input_size, num_experts)
self.experts = nn.ModuleList([
nn.TransformerEncoderLayer(
d_model=input_size,
nhead=8
) for _ in range(num_experts)
])