Spaces:
Sleeping
Sleeping
TeleologyHI
commited on
Commit
·
ab793ae
1
Parent(s):
d36b44f
Implement DeepSeek-Coder 1.3B for efficient dynamic response generation
Browse files- src/model/him_model.py +130 -45
src/model/him_model.py
CHANGED
@@ -6,6 +6,7 @@ from ..core.consciousness_kernel import ConsciousnessKernel
|
|
6 |
from ..core.emotional_intelligence import EmotionalProcessor
|
7 |
from ..core.theory_of_mind import TheoryOfMind
|
8 |
from ..core.semiotic_processor import SemioticProcessor
|
|
|
9 |
|
10 |
class HIMModel(nn.Module):
|
11 |
def __init__(self, config: Dict[str, Any]):
|
@@ -16,35 +17,70 @@ class HIMModel(nn.Module):
|
|
16 |
self.theory_of_mind = TheoryOfMind()
|
17 |
self.semiotic_processor = SemioticProcessor()
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
async def generate_response(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
37 |
-
# Extrair
|
38 |
-
message = input_data.get("message", "")
|
|
|
|
|
39 |
|
40 |
-
#
|
|
|
|
|
|
|
|
|
|
|
41 |
consciousness_state = await self.consciousness_kernel.process_consciousness_cycle(input_data)
|
42 |
emotional_context = self.emotional_processor.process_emotional_context(input_data)
|
43 |
social_understanding = self.theory_of_mind.model_agent_mind(input_data)
|
44 |
semiotic_analysis = await self.semiotic_processor.process(input_data)
|
45 |
|
46 |
-
#
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
return {
|
50 |
"response": response,
|
@@ -54,31 +90,80 @@ class HIMModel(nn.Module):
|
|
54 |
"semiotic_analysis": semiotic_analysis
|
55 |
}
|
56 |
|
57 |
-
def
|
58 |
-
"""
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def _integrate_outputs(self, *states) -> Dict[str, Any]:
|
81 |
-
# Mantido para compatibilidade
|
82 |
return {
|
83 |
"response": "Integrated response based on multiple processing layers",
|
84 |
"consciousness_state": states[0] if len(states) > 0 else {},
|
|
|
6 |
from ..core.emotional_intelligence import EmotionalProcessor
|
7 |
from ..core.theory_of_mind import TheoryOfMind
|
8 |
from ..core.semiotic_processor import SemioticProcessor
|
9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
10 |
|
11 |
class HIMModel(nn.Module):
|
12 |
def __init__(self, config: Dict[str, Any]):
|
|
|
17 |
self.theory_of_mind = TheoryOfMind()
|
18 |
self.semiotic_processor = SemioticProcessor()
|
19 |
|
20 |
+
# Usar o modelo DeepSeek-Coder Mini como uma alternativa mais leve
|
21 |
+
try:
|
22 |
+
model_name = "deepseek-ai/deepseek-coder-1.3b-instruct" # Modelo menor de 1.3B
|
23 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
24 |
+
self.language_model = AutoModelForCausalLM.from_pretrained(
|
25 |
+
model_name,
|
26 |
+
torch_dtype=torch.float16, # Usar precisão reduzida para economizar memória
|
27 |
+
low_cpu_mem_usage=True
|
28 |
+
)
|
29 |
+
print("DeepSeek model loaded successfully")
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error loading DeepSeek model: {e}")
|
32 |
+
# Fallback para um modelo ainda menor
|
33 |
+
try:
|
34 |
+
model_name = "distilgpt2"
|
35 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
36 |
+
self.language_model = AutoModelForCausalLM.from_pretrained(model_name)
|
37 |
+
print("Fallback to distilgpt2 successful")
|
38 |
+
except Exception as e2:
|
39 |
+
print(f"Error loading fallback model: {e2}")
|
40 |
+
# Em último caso, preparar para modo sem modelo
|
41 |
+
self.tokenizer = None
|
42 |
+
self.language_model = None
|
43 |
|
44 |
async def generate_response(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
45 |
+
# Extrair os dados da entrada
|
46 |
+
message = input_data.get("message", "")
|
47 |
+
system_message = input_data.get("system_message", "You are a helpful assistant.")
|
48 |
+
parameters = input_data.get("parameters", {})
|
49 |
|
50 |
+
# Configurações para geração
|
51 |
+
max_tokens = parameters.get("max_tokens", 256) # Reduzido para ser mais rápido
|
52 |
+
temperature = parameters.get("temperature", 0.7)
|
53 |
+
top_p = parameters.get("top_p", 0.95)
|
54 |
+
|
55 |
+
# Processar através dos componentes cognitivos
|
56 |
consciousness_state = await self.consciousness_kernel.process_consciousness_cycle(input_data)
|
57 |
emotional_context = self.emotional_processor.process_emotional_context(input_data)
|
58 |
social_understanding = self.theory_of_mind.model_agent_mind(input_data)
|
59 |
semiotic_analysis = await self.semiotic_processor.process(input_data)
|
60 |
|
61 |
+
# Extrair insights para enriquecer a resposta
|
62 |
+
consciousness_insight = self._extract_consciousness_insight(consciousness_state)
|
63 |
+
emotional_insight = self._extract_emotional_insight(emotional_context)
|
64 |
+
|
65 |
+
# Gerar a resposta usando o modelo DeepSeek
|
66 |
+
if self.language_model and self.tokenizer:
|
67 |
+
# Construir prompt com sistema e insights cognitivos
|
68 |
+
prompt = f"{system_message}\n\n"
|
69 |
+
if consciousness_insight:
|
70 |
+
prompt += f"Consciousness insight: {consciousness_insight}\n"
|
71 |
+
if emotional_insight:
|
72 |
+
prompt += f"Emotional insight: {emotional_insight}\n"
|
73 |
+
prompt += f"\nUser: {message}\nHIM:"
|
74 |
+
|
75 |
+
response = await self._generate_with_model(
|
76 |
+
prompt,
|
77 |
+
max_tokens,
|
78 |
+
temperature,
|
79 |
+
top_p
|
80 |
+
)
|
81 |
+
else:
|
82 |
+
# Resposta de fallback se não tivermos modelo
|
83 |
+
response = f"I processed your question about '{message}' through my cognitive framework but couldn't generate a model-based response. Please try again with a simpler query."
|
84 |
|
85 |
return {
|
86 |
"response": response,
|
|
|
90 |
"semiotic_analysis": semiotic_analysis
|
91 |
}
|
92 |
|
93 |
+
def _extract_consciousness_insight(self, state: Dict[str, Any]) -> str:
|
94 |
+
"""Extrair insight do estado de consciência para enriquecer a resposta"""
|
95 |
+
if isinstance(state, dict):
|
96 |
+
# Extrair algum insight significativo
|
97 |
+
content = state.get("content", "")
|
98 |
+
if content:
|
99 |
+
return content[:100] # Limitar tamanho
|
100 |
+
|
101 |
+
# Se não tiver content, tentar outros campos
|
102 |
+
meta = state.get("meta_cognition", {})
|
103 |
+
if meta:
|
104 |
+
return str(meta)[:100]
|
105 |
+
|
106 |
+
return ""
|
107 |
+
|
108 |
+
def _extract_emotional_insight(self, emotional_context: Any) -> str:
|
109 |
+
"""Extrair insight do contexto emocional"""
|
110 |
+
if hasattr(emotional_context, "valence"):
|
111 |
+
valence = getattr(emotional_context, "valence", 0)
|
112 |
+
if valence > 0.3:
|
113 |
+
return "positive emotional tone"
|
114 |
+
elif valence < -0.3:
|
115 |
+
return "address with empathy"
|
116 |
+
|
117 |
+
return ""
|
118 |
+
|
119 |
+
async def _generate_with_model(self, prompt: str, max_tokens: int,
|
120 |
+
temperature: float, top_p: float) -> str:
|
121 |
+
"""Gera resposta usando o modelo de linguagem de forma assíncrona"""
|
122 |
+
try:
|
123 |
+
# Executar em uma thread separada para não bloquear
|
124 |
+
loop = asyncio.get_event_loop()
|
125 |
+
response = await loop.run_in_executor(
|
126 |
+
None,
|
127 |
+
lambda: self._generate_text(prompt, max_tokens, temperature, top_p)
|
128 |
+
)
|
129 |
+
return response
|
130 |
+
except Exception as e:
|
131 |
+
print(f"Error generating response: {e}")
|
132 |
+
return f"I encountered an error while processing your request. Please try again with a simpler query."
|
133 |
+
|
134 |
+
def _generate_text(self, prompt: str, max_tokens: int,
|
135 |
+
temperature: float, top_p: float) -> str:
|
136 |
+
"""Método sincronizado de geração de texto"""
|
137 |
+
inputs = self.tokenizer(prompt, return_tensors="pt")
|
138 |
+
|
139 |
+
# Mover para CPU se GPU não disponível
|
140 |
+
if torch.cuda.is_available():
|
141 |
+
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
142 |
+
self.language_model = self.language_model.to("cuda")
|
143 |
+
|
144 |
+
# Configurar para geração com baixo uso de memória
|
145 |
+
with torch.no_grad():
|
146 |
+
outputs = self.language_model.generate(
|
147 |
+
**inputs,
|
148 |
+
max_new_tokens=max_tokens,
|
149 |
+
temperature=temperature,
|
150 |
+
top_p=top_p,
|
151 |
+
do_sample=True,
|
152 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
153 |
+
num_return_sequences=1
|
154 |
+
)
|
155 |
+
|
156 |
+
# Decodificar e extrair apenas a parte gerada
|
157 |
+
full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
158 |
+
response = full_text[len(prompt):]
|
159 |
+
|
160 |
+
# Limpar a resposta
|
161 |
+
response = response.strip()
|
162 |
+
|
163 |
+
return response
|
164 |
+
|
165 |
def _integrate_outputs(self, *states) -> Dict[str, Any]:
|
166 |
+
# Mantido para compatibilidade com código existente
|
167 |
return {
|
168 |
"response": "Integrated response based on multiple processing layers",
|
169 |
"consciousness_state": states[0] if len(states) > 0 else {},
|