JairoDanielMT commited on
Commit
f942f85
·
verified ·
1 Parent(s): 3643ab8

Update core/pipeline/edullm_rag_pipeline.py

Browse files
Files changed (1) hide show
  1. core/pipeline/edullm_rag_pipeline.py +83 -83
core/pipeline/edullm_rag_pipeline.py CHANGED
@@ -1,83 +1,83 @@
1
- # edullm_rag_pipeline.py
2
-
3
- # =========================
4
- # 📦 IMPORTACIONES
5
- # =========================
6
- import os
7
- from typing import Union
8
-
9
- from dotenv import load_dotenv
10
- from loguru import logger
11
- from pydantic import BaseModel
12
-
13
- from core.pipeline.utils import limitar_contexto, limpiar_contexto_bruto, validar_input
14
- from vectorstore.embeddings import EmbeddingManager
15
- from vectorstore.distance_strategy import DistanceStrategyManager
16
- from vectorstore.vectorstore_manager import VectorStoreManager
17
- from llm.llm_manager import LLMManager
18
-
19
- # =========================
20
- # ⚙️ CONFIGURACIÓN INICIAL
21
- # =========================
22
- load_dotenv(dotenv_path="config/.env")
23
- VECTORSTORE_PATH = os.getenv("VECTORSTORE_PATH", "docs/")
24
- VECTORSTORE_NAME = os.getenv("VECTORSTORE_NAME", "edullm_store")
25
-
26
- # =========================
27
- # 🚀 INICIALIZACIÓN DE COMPONENTES
28
- # =========================
29
- embeddings = EmbeddingManager.get_embeddings()
30
- strategy_mgr = DistanceStrategyManager()
31
- vector_mgr = VectorStoreManager(path=VECTORSTORE_PATH, name=VECTORSTORE_NAME)
32
- llm_manager = LLMManager()
33
-
34
-
35
- # =========================
36
- # 📄 MODELOS
37
- # =========================
38
- class Documento(BaseModel):
39
- contenido: str
40
- fuente: str
41
- puntaje: float
42
-
43
-
44
- # =========================
45
- # 🛠️ FUNCIONES UTILITARIAS
46
- # =========================
47
-
48
-
49
- def init_vectorstore(force_rebuild: bool = False):
50
- """Inicializa o reconstruye el vectorstore si es necesario."""
51
- if force_rebuild or not vector_mgr.exist_vectorstore():
52
- vector_mgr.create_vectorstore()
53
-
54
-
55
- # =========================
56
- # 🎯 PIPELINE PRINCIPAL
57
- # =========================
58
- def edullm_rag_pipeline(
59
- input_data: Union[str, bytes], top_k: int = 4, search_type: str = "similarity"
60
- ) -> str:
61
- """Pipeline RAG para procesamiento multimodal y generación de respuesta educativa."""
62
- if not validar_input(input_data):
63
- logger.error("❌ Entrada inválida. Debes proporcionar texto o imagen válida.")
64
- return "Error: Entrada no válida."
65
-
66
- if isinstance(input_data, bytes):
67
- return llm_manager.generate_response(
68
- user_query="Procesa la imagen adjunta y responde según el contexto educativo.",
69
- image=input_data,
70
- )
71
-
72
- retriever = vector_mgr.as_retriever(search_type=search_type, k=top_k)
73
- docs = retriever.invoke(input_data)
74
-
75
- if not docs:
76
- contexto_final = "No se encontró contexto relevante."
77
- logger.warning("⚠️ Sin resultados en FAISS para la consulta.")
78
- else:
79
- contexto_bruto = "\n\n".join(d.page_content for d in docs)
80
- contexto_limpio = limpiar_contexto_bruto(contexto_bruto)
81
- contexto_final = limitar_contexto(contexto_limpio)
82
-
83
- return llm_manager.generate_response(user_query=input_data, context=contexto_final)
 
1
+ # edullm_rag_pipeline.py
2
+
3
+ # =========================
4
+ # 📦 IMPORTACIONES
5
+ # =========================
6
+ import os
7
+ from typing import Union
8
+
9
+ from dotenv import load_dotenv
10
+ from loguru import logger
11
+ from pydantic import BaseModel
12
+
13
+ from core.pipeline.utils import limitar_contexto, limpiar_contexto_bruto, validar_input
14
+ from core.vectorstore.embeddings import EmbeddingManager
15
+ from core.vectorstore.distance_strategy import DistanceStrategyManager
16
+ from core.vectorstore.vectorstore_manager import VectorStoreManager
17
+ from core.llm.llm_manager import LLMManager
18
+
19
+ # =========================
20
+ # ⚙️ CONFIGURACIÓN INICIAL
21
+ # =========================
22
+ load_dotenv(dotenv_path="config/.env")
23
+ VECTORSTORE_PATH = os.getenv("VECTORSTORE_PATH", "docs/")
24
+ VECTORSTORE_NAME = os.getenv("VECTORSTORE_NAME", "edullm_store")
25
+
26
+ # =========================
27
+ # 🚀 INICIALIZACIÓN DE COMPONENTES
28
+ # =========================
29
+ embeddings = EmbeddingManager.get_embeddings()
30
+ strategy_mgr = DistanceStrategyManager()
31
+ vector_mgr = VectorStoreManager(path=VECTORSTORE_PATH, name=VECTORSTORE_NAME)
32
+ llm_manager = LLMManager()
33
+
34
+
35
+ # =========================
36
+ # 📄 MODELOS
37
+ # =========================
38
+ class Documento(BaseModel):
39
+ contenido: str
40
+ fuente: str
41
+ puntaje: float
42
+
43
+
44
+ # =========================
45
+ # 🛠️ FUNCIONES UTILITARIAS
46
+ # =========================
47
+
48
+
49
+ def init_vectorstore(force_rebuild: bool = False):
50
+ """Inicializa o reconstruye el vectorstore si es necesario."""
51
+ if force_rebuild or not vector_mgr.exist_vectorstore():
52
+ vector_mgr.create_vectorstore()
53
+
54
+
55
+ # =========================
56
+ # 🎯 PIPELINE PRINCIPAL
57
+ # =========================
58
+ def edullm_rag_pipeline(
59
+ input_data: Union[str, bytes], top_k: int = 4, search_type: str = "similarity"
60
+ ) -> str:
61
+ """Pipeline RAG para procesamiento multimodal y generación de respuesta educativa."""
62
+ if not validar_input(input_data):
63
+ logger.error("❌ Entrada inválida. Debes proporcionar texto o imagen válida.")
64
+ return "Error: Entrada no válida."
65
+
66
+ if isinstance(input_data, bytes):
67
+ return llm_manager.generate_response(
68
+ user_query="Procesa la imagen adjunta y responde según el contexto educativo.",
69
+ image=input_data,
70
+ )
71
+
72
+ retriever = vector_mgr.as_retriever(search_type=search_type, k=top_k)
73
+ docs = retriever.invoke(input_data)
74
+
75
+ if not docs:
76
+ contexto_final = "No se encontró contexto relevante."
77
+ logger.warning("⚠️ Sin resultados en FAISS para la consulta.")
78
+ else:
79
+ contexto_bruto = "\n\n".join(d.page_content for d in docs)
80
+ contexto_limpio = limpiar_contexto_bruto(contexto_bruto)
81
+ contexto_final = limitar_contexto(contexto_limpio)
82
+
83
+ return llm_manager.generate_response(user_query=input_data, context=contexto_final)