Spaces:

JairoDanielMT
/

edullm

Running

App Files Files Community

JairoDanielMT commited on Jun 3

Commit

ec2d469

verified ·

1 Parent(s): 3ffbfe0

Update core/integrations/telegram_bot.py

Browse files

Files changed (1) hide show

core/integrations/telegram_bot.py +238 -237

core/integrations/telegram_bot.py CHANGED Viewed

@@ -1,237 +1,238 @@
-# core/integrations/telegram_bot.py
-import os
-import re
-import tempfile
-import time
-import fitz  # PyMuPDF
-from docx import Document
-from dotenv import load_dotenv
-from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
-from telegram.ext import (
-    ApplicationBuilder,
-    CallbackQueryHandler,
-    CommandHandler,
-    ContextTypes,
-    MessageHandler,
-    filters,
-)
-from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
-from core.logging.usage_logger import registrar_uso
-from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline
-# ==== CONFIGURACIÓN GENERAL ====
-load_dotenv(dotenv_path="config/.env")
-TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
-DOCX_FILENAME = "material_educativo.docx"
-FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"
-if not TELEGRAM_TOKEN:
-    raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")
-# ==== FUNCIONES AUXILIARES ====
-def extract_text_from_pdf(file_path):
-    text = ""
-    with fitz.open(file_path) as pdf:
-        for page in pdf:
-            text += page.get_text()
-    return text.strip()
-def extract_text_from_docx(file_path):
-    doc = Document(file_path)
-    return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
-def extract_text_from_txt(file_path):
-    with open(file_path, "r", encoding="utf-8") as f:
-        return f.read().strip()
-def escape_markdown(text: str) -> str:
-    """
-    Escapa caracteres especiales para MarkdownV2 de Telegram.
-    """
-    escape_chars = r"_*[]()~`>#+-=|{}.!"
-    return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)
-def detectar_tipo_entrada(user_input) -> str:
-    if isinstance(user_input, str):
-        return "Texto"
-    elif isinstance(user_input, bytes):
-        return "Imagen"
-    else:
-        return "Otro"
-# ==== COMANDO /start ====
-async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    await update.message.reply_text(
-        "👋 Bienvenido a *EduLLM Bot*.\n\n"
-        "Acepto: *Texto*, *Imagen*, *PDF*, *DOCX* o *TXT*.\n"
-        "Generaré material educativo listo para descargar en DOCX.",
-        parse_mode="Markdown",
-    )
-# ==== MANEJO DE MENSAJES ====
-async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    user_input = ""
-    try:
-        if update.message.text:
-            user_input = update.message.text
-        elif update.message.photo:
-            photo = update.message.photo[-1]
-            file = await photo.get_file()
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
-                await file.download_to_drive(temp_img.name)
-                with open(temp_img.name, "rb") as img_file:
-                    user_input = img_file.read()
-        elif update.message.document:
-            file = await update.message.document.get_file()
-            ext = update.message.document.file_name.split(".")[-1].lower()
-            with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
-                await file.download_to_drive(tmp_doc.name)
-                if ext == "pdf":
-                    extracted_text = extract_text_from_pdf(tmp_doc.name)
-                elif ext == "docx":
-                    extracted_text = extract_text_from_docx(tmp_doc.name)
-                elif ext == "txt":
-                    extracted_text = extract_text_from_txt(tmp_doc.name)
-                else:
-                    await enviar_mensaje_formato_no_soportado(update)
-                    return
-                mensaje_texto = update.message.caption or ""
-                user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()
-        elif update.message.audio or update.message.voice or update.message.video:
-            await update.message.reply_text(
-                "🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
-                parse_mode="Markdown",
-            )
-            return
-        elif update.message.sticker:
-            await update.message.reply_text(
-                "🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
-            )
-            return
-        elif update.message.location:
-            await update.message.reply_text(
-                "📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
-            )
-            return
-        elif update.message.contact:
-            await update.message.reply_text(
-                "📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
-            )
-            return
-        elif update.message.animation:
-            await update.message.reply_text(
-                "🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
-            )
-            return
-        else:
-            await enviar_mensaje_formato_no_soportado(update)
-            return
-    finally:
-        for temp_var in ["temp_img", "tmp_doc"]:
-            if temp_var in locals() and os.path.exists(locals()[temp_var].name):
-                os.remove(locals()[temp_var].name)
-    if not user_input:
-        await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
-        return
-    await update.message.reply_text("⏳ Generando tu material educativo...")
-    start_time = time.time()
-    try:
-        resultado_md = edullm_rag_pipeline(user_input)
-        exito = True
-    except Exception as e:
-        resultado_md = f"❌ Error: {str(e)}"
-        exito = False
-    duracion = time.time() - start_time
-    registrar_uso(
-        user_id=update.effective_user.id,
-        username=update.effective_user.username,
-        tipo_entrada=detectar_tipo_entrada(user_input),
-        duracion_segundos=duracion,
-        exito=exito,
-    )
-    context.user_data["ultimo_markdown"] = resultado_md
-    preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
-    preview_safe = escape_markdown(preview)
-    await update.message.reply_text(
-        f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
-    )
-    botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
-    await update.message.reply_text(
-        "¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
-    )
-# ==== MENSAJE DE FORMATO NO SOPORTADO ====
-async def enviar_mensaje_formato_no_soportado(update: Update):
-    await update.message.reply_photo(
-        photo=InputFile(FORMAT_WARNING_IMAGE),
-        caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
-        parse_mode=None,
-    )
-# ==== CALLBACK BOTONES ====
-async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    query = update.callback_query
-    await query.answer()
-    if query.data == "descargar_docx":
-        markdown_content = context.user_data.get("ultimo_markdown")
-        if not markdown_content:
-            await query.edit_message_text("⚠️ No hay material disponible para convertir.")
-            return
-        resultado = procesar_markdown(markdown_content)
-        if "error" in resultado:
-            await query.edit_message_text("❌ Error al generar el archivo DOCX.")
-            return
-        file_id = resultado["file_id"]
-        file_response = gestionar_descarga(file_id)
-        if isinstance(file_response, dict):
-            await query.edit_message_text(f"⚠️ {file_response.get('error')}")
-        else:
-            await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
-            await context.bot.send_document(
-                chat_id=query.message.chat_id,
-                document=file_response.path,
-                filename=DOCX_FILENAME,
-            )
-# ==== INICIAR BOT ====
-def start_bot():
-    app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
-    app.add_handler(CommandHandler("start", start))
-    app.add_handler(MessageHandler(filters.ALL, handle_message))
-    app.add_handler(CallbackQueryHandler(button_handler))
-    print("🤖 EduLLM Bot en ejecución...")
-    app.run_polling()

+# core/integrations/telegram_bot.py
+import os
+import re
+import tempfile
+import time
+import fitz  # PyMuPDF
+from docx import Document
+from dotenv import load_dotenv
+from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
+from telegram.ext import (
+    ApplicationBuilder,
+    CallbackQueryHandler,
+    CommandHandler,
+    ContextTypes,
+    MessageHandler,
+    filters,
+)
+from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
+from core.logging.usage_logger import registrar_uso
+from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline
+# ==== CONFIGURACIÓN GENERAL ====
+load_dotenv(dotenv_path="config/.env")
+TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
+DOCX_FILENAME = "material_educativo.docx"
+FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"
+if not TELEGRAM_TOKEN:
+    raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")
+# ==== FUNCIONES AUXILIARES ====
+def extract_text_from_pdf(file_path):
+    text = ""
+    with fitz.open(file_path) as pdf:
+        for page in pdf:
+            text += page.get_text()
+    return text.strip()
+def extract_text_from_docx(file_path):
+    doc = Document(file_path)
+    return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
+def extract_text_from_txt(file_path):
+    with open(file_path, "r", encoding="utf-8") as f:
+        return f.read().strip()
+def escape_markdown(text: str) -> str:
+    """
+    Escapa caracteres especiales para MarkdownV2 de Telegram.
+    """
+    escape_chars = r"_*[]()~`>#+-=|{}.!"
+    return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)
+def detectar_tipo_entrada(user_input) -> str:
+    if isinstance(user_input, str):
+        return "Texto"
+    elif isinstance(user_input, bytes):
+        return "Imagen"
+    else:
+        return "Otro"
+# ==== COMANDO /start ====
+async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    await update.message.reply_text(
+        "👋 Bienvenido a *EduLLM Bot*.\n\n"
+        "Acepto: *Texto*, *Imagen*, *PDF*, *DOCX* o *TXT*.\n"
+        "Generaré material educativo listo para descargar en DOCX.",
+        parse_mode="Markdown",
+    )
+# ==== MANEJO DE MENSAJES ====
+async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    user_input = ""
+    try:
+        if update.message.text:
+            user_input = update.message.text
+        elif update.message.photo:
+            photo = update.message.photo[-1]
+            file = await photo.get_file()
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
+                await file.download_to_drive(temp_img.name)
+                with open(temp_img.name, "rb") as img_file:
+                    user_input = img_file.read()
+        elif update.message.document:
+            file = await update.message.document.get_file()
+            ext = update.message.document.file_name.split(".")[-1].lower()
+            with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
+                await file.download_to_drive(tmp_doc.name)
+                if ext == "pdf":
+                    extracted_text = extract_text_from_pdf(tmp_doc.name)
+                elif ext == "docx":
+                    extracted_text = extract_text_from_docx(tmp_doc.name)
+                elif ext == "txt":
+                    extracted_text = extract_text_from_txt(tmp_doc.name)
+                else:
+                    await enviar_mensaje_formato_no_soportado(update)
+                    return
+                mensaje_texto = update.message.caption or ""
+                user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()
+        elif update.message.audio or update.message.voice or update.message.video:
+            await update.message.reply_text(
+                "🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
+                parse_mode="Markdown",
+            )
+            return
+        elif update.message.sticker:
+            await update.message.reply_text(
+                "🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
+            )
+            return
+        elif update.message.location:
+            await update.message.reply_text(
+                "📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
+            )
+            return
+        elif update.message.contact:
+            await update.message.reply_text(
+                "📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
+            )
+            return
+        elif update.message.animation:
+            await update.message.reply_text(
+                "🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
+            )
+            return
+        else:
+            await enviar_mensaje_formato_no_soportado(update)
+            return
+    finally:
+        for temp_var in ["temp_img", "tmp_doc"]:
+            if temp_var in locals() and os.path.exists(locals()[temp_var].name):
+                os.remove(locals()[temp_var].name)
+    if not user_input:
+        await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
+        return
+    await update.message.reply_text("⏳ Generando tu material educativo...")
+    start_time = time.time()
+    try:
+        resultado_md = edullm_rag_pipeline(user_input)
+        exito = True
+    except Exception as e:
+        resultado_md = f"❌ Error: {str(e)}"
+        exito = False
+    duracion = time.time() - start_time
+    registrar_uso(
+        user_id=update.effective_user.id,
+        username=update.effective_user.username,
+        tipo_entrada=detectar_tipo_entrada(user_input),
+        duracion_segundos=duracion,
+        exito=exito,
+    )
+    context.user_data["ultimo_markdown"] = resultado_md
+    preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
+    preview_safe = escape_markdown(preview)
+    await update.message.reply_text(
+        f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
+    )
+    botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
+    await update.message.reply_text(
+        "¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
+    )
+# ==== MENSAJE DE FORMATO NO SOPORTADO ====
+async def enviar_mensaje_formato_no_soportado(update: Update):
+    await update.message.reply_photo(
+        photo=InputFile(FORMAT_WARNING_IMAGE),
+        caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
+        parse_mode=None,
+    )
+# ==== CALLBACK BOTONES ====
+async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    query = update.callback_query
+    await query.answer()
+    if query.data == "descargar_docx":
+        markdown_content = context.user_data.get("ultimo_markdown")
+        if not markdown_content:
+            await query.edit_message_text("⚠️ No hay material disponible para convertir.")
+            return
+        resultado = procesar_markdown(markdown_content)
+        if "error" in resultado:
+            await query.edit_message_text("❌ Error al generar el archivo DOCX.")
+            return
+        file_id = resultado["file_id"]
+        file_response = gestionar_descarga(file_id)
+        if isinstance(file_response, dict):
+            await query.edit_message_text(f"⚠️ {file_response.get('error')}")
+        else:
+            await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
+            await context.bot.send_document(
+                chat_id=query.message.chat_id,
+                document=file_response.path,
+                filename=DOCX_FILENAME,
+            )
+# ==== INICIAR BOT ====
+async def start_bot():
+    app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
+    app.add_handler(CommandHandler("start", start))
+    app.add_handler(MessageHandler(filters.ALL, handle_message))
+    app.add_handler(CallbackQueryHandler(button_handler))
+    print("🤖 EduLLM Bot en ejecución...")
+    await app.run_polling()