JairoDanielMT commited on
Commit
ec2d469
·
verified ·
1 Parent(s): 3ffbfe0

Update core/integrations/telegram_bot.py

Browse files
Files changed (1) hide show
  1. core/integrations/telegram_bot.py +238 -237
core/integrations/telegram_bot.py CHANGED
@@ -1,237 +1,238 @@
1
- # core/integrations/telegram_bot.py
2
- import os
3
- import re
4
- import tempfile
5
- import time
6
-
7
- import fitz # PyMuPDF
8
- from docx import Document
9
- from dotenv import load_dotenv
10
- from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
11
- from telegram.ext import (
12
- ApplicationBuilder,
13
- CallbackQueryHandler,
14
- CommandHandler,
15
- ContextTypes,
16
- MessageHandler,
17
- filters,
18
- )
19
-
20
- from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
21
- from core.logging.usage_logger import registrar_uso
22
- from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline
23
-
24
- # ==== CONFIGURACIÓN GENERAL ====
25
- load_dotenv(dotenv_path="config/.env")
26
- TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
27
- DOCX_FILENAME = "material_educativo.docx"
28
- FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"
29
-
30
- if not TELEGRAM_TOKEN:
31
- raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")
32
-
33
-
34
- # ==== FUNCIONES AUXILIARES ====
35
- def extract_text_from_pdf(file_path):
36
- text = ""
37
- with fitz.open(file_path) as pdf:
38
- for page in pdf:
39
- text += page.get_text()
40
- return text.strip()
41
-
42
-
43
- def extract_text_from_docx(file_path):
44
- doc = Document(file_path)
45
- return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
46
-
47
-
48
- def extract_text_from_txt(file_path):
49
- with open(file_path, "r", encoding="utf-8") as f:
50
- return f.read().strip()
51
-
52
-
53
- def escape_markdown(text: str) -> str:
54
- """
55
- Escapa caracteres especiales para MarkdownV2 de Telegram.
56
- """
57
- escape_chars = r"_*[]()~`>#+-=|{}.!"
58
- return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)
59
-
60
-
61
- def detectar_tipo_entrada(user_input) -> str:
62
- if isinstance(user_input, str):
63
- return "Texto"
64
- elif isinstance(user_input, bytes):
65
- return "Imagen"
66
- else:
67
- return "Otro"
68
-
69
-
70
- # ==== COMANDO /start ====
71
- async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
72
- await update.message.reply_text(
73
- "👋 Bienvenido a *EduLLM Bot*.\n\n"
74
- "Acepto: *Texto*, *Imagen*, *PDF*, *DOCX* o *TXT*.\n"
75
- "Generaré material educativo listo para descargar en DOCX.",
76
- parse_mode="Markdown",
77
- )
78
-
79
-
80
- # ==== MANEJO DE MENSAJES ====
81
- async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
82
- user_input = ""
83
-
84
- try:
85
- if update.message.text:
86
- user_input = update.message.text
87
-
88
- elif update.message.photo:
89
- photo = update.message.photo[-1]
90
- file = await photo.get_file()
91
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
92
- await file.download_to_drive(temp_img.name)
93
- with open(temp_img.name, "rb") as img_file:
94
- user_input = img_file.read()
95
-
96
- elif update.message.document:
97
- file = await update.message.document.get_file()
98
- ext = update.message.document.file_name.split(".")[-1].lower()
99
-
100
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
101
- await file.download_to_drive(tmp_doc.name)
102
-
103
- if ext == "pdf":
104
- extracted_text = extract_text_from_pdf(tmp_doc.name)
105
- elif ext == "docx":
106
- extracted_text = extract_text_from_docx(tmp_doc.name)
107
- elif ext == "txt":
108
- extracted_text = extract_text_from_txt(tmp_doc.name)
109
- else:
110
- await enviar_mensaje_formato_no_soportado(update)
111
- return
112
-
113
- mensaje_texto = update.message.caption or ""
114
- user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()
115
-
116
- elif update.message.audio or update.message.voice or update.message.video:
117
- await update.message.reply_text(
118
- "🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
119
- parse_mode="Markdown",
120
- )
121
- return
122
-
123
- elif update.message.sticker:
124
- await update.message.reply_text(
125
- "🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
126
- )
127
- return
128
-
129
- elif update.message.location:
130
- await update.message.reply_text(
131
- "📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
132
- )
133
- return
134
-
135
- elif update.message.contact:
136
- await update.message.reply_text(
137
- "📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
138
- )
139
- return
140
-
141
- elif update.message.animation:
142
- await update.message.reply_text(
143
- "🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
144
- )
145
- return
146
-
147
- else:
148
- await enviar_mensaje_formato_no_soportado(update)
149
- return
150
-
151
- finally:
152
- for temp_var in ["temp_img", "tmp_doc"]:
153
- if temp_var in locals() and os.path.exists(locals()[temp_var].name):
154
- os.remove(locals()[temp_var].name)
155
-
156
- if not user_input:
157
- await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
158
- return
159
-
160
- await update.message.reply_text("⏳ Generando tu material educativo...")
161
- start_time = time.time()
162
- try:
163
- resultado_md = edullm_rag_pipeline(user_input)
164
- exito = True
165
- except Exception as e:
166
- resultado_md = f"❌ Error: {str(e)}"
167
- exito = False
168
- duracion = time.time() - start_time
169
- registrar_uso(
170
- user_id=update.effective_user.id,
171
- username=update.effective_user.username,
172
- tipo_entrada=detectar_tipo_entrada(user_input),
173
- duracion_segundos=duracion,
174
- exito=exito,
175
- )
176
- context.user_data["ultimo_markdown"] = resultado_md
177
-
178
- preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
179
- preview_safe = escape_markdown(preview)
180
- await update.message.reply_text(
181
- f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
182
- )
183
-
184
- botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
185
- await update.message.reply_text(
186
- "¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
187
- )
188
-
189
-
190
- # ==== MENSAJE DE FORMATO NO SOPORTADO ====
191
- async def enviar_mensaje_formato_no_soportado(update: Update):
192
- await update.message.reply_photo(
193
- photo=InputFile(FORMAT_WARNING_IMAGE),
194
- caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
195
- parse_mode=None,
196
- )
197
-
198
-
199
- # ==== CALLBACK BOTONES ====
200
- async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
201
- query = update.callback_query
202
- await query.answer()
203
-
204
- if query.data == "descargar_docx":
205
- markdown_content = context.user_data.get("ultimo_markdown")
206
- if not markdown_content:
207
- await query.edit_message_text("⚠️ No hay material disponible para convertir.")
208
- return
209
-
210
- resultado = procesar_markdown(markdown_content)
211
- if "error" in resultado:
212
- await query.edit_message_text("❌ Error al generar el archivo DOCX.")
213
- return
214
-
215
- file_id = resultado["file_id"]
216
- file_response = gestionar_descarga(file_id)
217
-
218
- if isinstance(file_response, dict):
219
- await query.edit_message_text(f"⚠️ {file_response.get('error')}")
220
- else:
221
- await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
222
- await context.bot.send_document(
223
- chat_id=query.message.chat_id,
224
- document=file_response.path,
225
- filename=DOCX_FILENAME,
226
- )
227
-
228
-
229
- # ==== INICIAR BOT ====
230
- def start_bot():
231
- app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
232
- app.add_handler(CommandHandler("start", start))
233
- app.add_handler(MessageHandler(filters.ALL, handle_message))
234
- app.add_handler(CallbackQueryHandler(button_handler))
235
-
236
- print("🤖 EduLLM Bot en ejecución...")
237
- app.run_polling()
 
 
1
+ # core/integrations/telegram_bot.py
2
+ import os
3
+ import re
4
+ import tempfile
5
+ import time
6
+
7
+ import fitz # PyMuPDF
8
+ from docx import Document
9
+ from dotenv import load_dotenv
10
+ from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
11
+ from telegram.ext import (
12
+ ApplicationBuilder,
13
+ CallbackQueryHandler,
14
+ CommandHandler,
15
+ ContextTypes,
16
+ MessageHandler,
17
+ filters,
18
+ )
19
+
20
+ from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
21
+ from core.logging.usage_logger import registrar_uso
22
+ from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline
23
+
24
+ # ==== CONFIGURACIÓN GENERAL ====
25
+ load_dotenv(dotenv_path="config/.env")
26
+ TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
27
+ DOCX_FILENAME = "material_educativo.docx"
28
+ FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"
29
+
30
+ if not TELEGRAM_TOKEN:
31
+ raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")
32
+
33
+
34
+ # ==== FUNCIONES AUXILIARES ====
35
+ def extract_text_from_pdf(file_path):
36
+ text = ""
37
+ with fitz.open(file_path) as pdf:
38
+ for page in pdf:
39
+ text += page.get_text()
40
+ return text.strip()
41
+
42
+
43
+ def extract_text_from_docx(file_path):
44
+ doc = Document(file_path)
45
+ return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
46
+
47
+
48
+ def extract_text_from_txt(file_path):
49
+ with open(file_path, "r", encoding="utf-8") as f:
50
+ return f.read().strip()
51
+
52
+
53
+ def escape_markdown(text: str) -> str:
54
+ """
55
+ Escapa caracteres especiales para MarkdownV2 de Telegram.
56
+ """
57
+ escape_chars = r"_*[]()~`>#+-=|{}.!"
58
+ return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)
59
+
60
+
61
+ def detectar_tipo_entrada(user_input) -> str:
62
+ if isinstance(user_input, str):
63
+ return "Texto"
64
+ elif isinstance(user_input, bytes):
65
+ return "Imagen"
66
+ else:
67
+ return "Otro"
68
+
69
+
70
+ # ==== COMANDO /start ====
71
+ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
72
+ await update.message.reply_text(
73
+ "👋 Bienvenido a *EduLLM Bot*.\n\n"
74
+ "Acepto: *Texto*, *Imagen*, *PDF*, *DOCX* o *TXT*.\n"
75
+ "Generaré material educativo listo para descargar en DOCX.",
76
+ parse_mode="Markdown",
77
+ )
78
+
79
+
80
+ # ==== MANEJO DE MENSAJES ====
81
+ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
82
+ user_input = ""
83
+
84
+ try:
85
+ if update.message.text:
86
+ user_input = update.message.text
87
+
88
+ elif update.message.photo:
89
+ photo = update.message.photo[-1]
90
+ file = await photo.get_file()
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
92
+ await file.download_to_drive(temp_img.name)
93
+ with open(temp_img.name, "rb") as img_file:
94
+ user_input = img_file.read()
95
+
96
+ elif update.message.document:
97
+ file = await update.message.document.get_file()
98
+ ext = update.message.document.file_name.split(".")[-1].lower()
99
+
100
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
101
+ await file.download_to_drive(tmp_doc.name)
102
+
103
+ if ext == "pdf":
104
+ extracted_text = extract_text_from_pdf(tmp_doc.name)
105
+ elif ext == "docx":
106
+ extracted_text = extract_text_from_docx(tmp_doc.name)
107
+ elif ext == "txt":
108
+ extracted_text = extract_text_from_txt(tmp_doc.name)
109
+ else:
110
+ await enviar_mensaje_formato_no_soportado(update)
111
+ return
112
+
113
+ mensaje_texto = update.message.caption or ""
114
+ user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()
115
+
116
+ elif update.message.audio or update.message.voice or update.message.video:
117
+ await update.message.reply_text(
118
+ "🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
119
+ parse_mode="Markdown",
120
+ )
121
+ return
122
+
123
+ elif update.message.sticker:
124
+ await update.message.reply_text(
125
+ "🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
126
+ )
127
+ return
128
+
129
+ elif update.message.location:
130
+ await update.message.reply_text(
131
+ "📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
132
+ )
133
+ return
134
+
135
+ elif update.message.contact:
136
+ await update.message.reply_text(
137
+ "📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
138
+ )
139
+ return
140
+
141
+ elif update.message.animation:
142
+ await update.message.reply_text(
143
+ "🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
144
+ )
145
+ return
146
+
147
+ else:
148
+ await enviar_mensaje_formato_no_soportado(update)
149
+ return
150
+
151
+ finally:
152
+ for temp_var in ["temp_img", "tmp_doc"]:
153
+ if temp_var in locals() and os.path.exists(locals()[temp_var].name):
154
+ os.remove(locals()[temp_var].name)
155
+
156
+ if not user_input:
157
+ await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
158
+ return
159
+
160
+ await update.message.reply_text("⏳ Generando tu material educativo...")
161
+ start_time = time.time()
162
+ try:
163
+ resultado_md = edullm_rag_pipeline(user_input)
164
+ exito = True
165
+ except Exception as e:
166
+ resultado_md = f"❌ Error: {str(e)}"
167
+ exito = False
168
+ duracion = time.time() - start_time
169
+ registrar_uso(
170
+ user_id=update.effective_user.id,
171
+ username=update.effective_user.username,
172
+ tipo_entrada=detectar_tipo_entrada(user_input),
173
+ duracion_segundos=duracion,
174
+ exito=exito,
175
+ )
176
+ context.user_data["ultimo_markdown"] = resultado_md
177
+
178
+ preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
179
+ preview_safe = escape_markdown(preview)
180
+ await update.message.reply_text(
181
+ f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
182
+ )
183
+
184
+ botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
185
+ await update.message.reply_text(
186
+ "¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
187
+ )
188
+
189
+
190
+ # ==== MENSAJE DE FORMATO NO SOPORTADO ====
191
+ async def enviar_mensaje_formato_no_soportado(update: Update):
192
+ await update.message.reply_photo(
193
+ photo=InputFile(FORMAT_WARNING_IMAGE),
194
+ caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
195
+ parse_mode=None,
196
+ )
197
+
198
+
199
+ # ==== CALLBACK BOTONES ====
200
+ async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
201
+ query = update.callback_query
202
+ await query.answer()
203
+
204
+ if query.data == "descargar_docx":
205
+ markdown_content = context.user_data.get("ultimo_markdown")
206
+ if not markdown_content:
207
+ await query.edit_message_text("⚠️ No hay material disponible para convertir.")
208
+ return
209
+
210
+ resultado = procesar_markdown(markdown_content)
211
+ if "error" in resultado:
212
+ await query.edit_message_text("❌ Error al generar el archivo DOCX.")
213
+ return
214
+
215
+ file_id = resultado["file_id"]
216
+ file_response = gestionar_descarga(file_id)
217
+
218
+ if isinstance(file_response, dict):
219
+ await query.edit_message_text(f"⚠️ {file_response.get('error')}")
220
+ else:
221
+ await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
222
+ await context.bot.send_document(
223
+ chat_id=query.message.chat_id,
224
+ document=file_response.path,
225
+ filename=DOCX_FILENAME,
226
+ )
227
+
228
+
229
+ # ==== INICIAR BOT ====
230
+ async def start_bot():
231
+ app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
232
+ app.add_handler(CommandHandler("start", start))
233
+ app.add_handler(MessageHandler(filters.ALL, handle_message))
234
+ app.add_handler(CallbackQueryHandler(button_handler))
235
+
236
+ print("🤖 EduLLM Bot en ejecución...")
237
+ await app.run_polling()
238
+