Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn as nn | |
from transformers import AutoModel, AutoConfig | |
class FoundationLayer(nn.Module): | |
def __init__(self, model_name: str = "gpt2-xl"): | |
super().__init__() | |
self.config = AutoConfig.from_pretrained(model_name) | |
self.transformer = AutoModel.from_pretrained(model_name) | |
self.sparse_router = MixtureOfExperts( | |
num_experts=128, | |
input_size=self.config.hidden_size | |
) | |
def forward(self, input_ids, attention_mask=None): | |
transformer_output = self.transformer( | |
input_ids=input_ids, | |
attention_mask=attention_mask | |
) | |
routed_output = self.sparse_router(transformer_output.last_hidden_state) | |
return self._process_consciousness_emergence(routed_output) | |
class MixtureOfExperts(nn.Module): | |
def __init__(self, num_experts: int, input_size: int): | |
super().__init__() | |
self.num_experts = num_experts | |
self.gate = nn.Linear(input_size, num_experts) | |
self.experts = nn.ModuleList([ | |
nn.TransformerEncoderLayer( | |
d_model=input_size, | |
nhead=8 | |
) for _ in range(num_experts) | |
]) |