|
|
|
import modal |
|
import sys |
|
from pathlib import Path |
|
import os |
|
import traceback |
|
|
|
|
|
PYTHON_VERSION = "3.10" |
|
APP_NAME = "bioprocess-custom-eq-agent-modal" |
|
LOCAL_APP_DIR = Path(__file__).parent |
|
REMOTE_APP_DIR = "/app" |
|
|
|
stub = modal.Stub(APP_NAME) |
|
|
|
app_image = ( |
|
modal.Image.debian_slim(python_version=PYTHON_VERSION) |
|
.pip_install_from_requirements(LOCAL_APP_DIR / "requirements.txt") |
|
.copy_mount( |
|
modal.Mount.from_local_dir(LOCAL_APP_DIR, remote_path=REMOTE_APP_DIR) |
|
) |
|
.env({ |
|
"PYTHONPATH": REMOTE_APP_DIR, |
|
"HF_HOME": "/cache/huggingface", |
|
"HF_HUB_CACHE": "/cache/huggingface/hub", |
|
"TRANSFORMERS_CACHE": "/cache/huggingface/hub", |
|
"MPLCONFIGDIR": "/tmp/matplotlib_cache" |
|
}) |
|
.run_commands( |
|
"apt-get update && apt-get install -y git git-lfs && rm -rf /var/lib/apt/lists/*", |
|
"mkdir -p /cache/huggingface/hub /tmp/matplotlib_cache" |
|
) |
|
) |
|
|
|
|
|
@stub.function( |
|
image=app_image, |
|
gpu="any", |
|
secrets=[modal.Secret.from_name("huggingface-read-token", optional=True)], |
|
timeout=600, |
|
volumes={"/cache/huggingface": modal.Volume.persisted(f"{APP_NAME}-hf-cache-vol")} |
|
) |
|
def generate_analysis_llm_modal_remote(prompt: str, model_path_config: str, max_new_tokens_config: int) -> str: |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
hf_token = os.environ.get("HUGGING_FACE_TOKEN") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"LLM Modal Func: Usando dispositivo: {device}") |
|
print(f"LLM Modal Func: Cargando modelo: {model_path_config} con token: {'S铆' if hf_token else 'No'}") |
|
|
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_path_config, cache_dir="/cache/huggingface/hub", token=hf_token, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path_config, |
|
torch_dtype="auto", |
|
device_map="auto", |
|
cache_dir="/cache/huggingface/hub", |
|
token=hf_token, |
|
trust_remote_code=True |
|
) |
|
|
|
model_context_window = getattr(model.config, 'max_position_embeddings', getattr(model.config, 'sliding_window', 4096)) |
|
if model_context_window is None : model_context_window = 4096 |
|
|
|
max_prompt_len = model_context_window - max_new_tokens_config - 50 |
|
if max_prompt_len <=0 : max_prompt_len = model_context_window // 2 |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_prompt_len).to(model.device) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_new_tokens_config, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id, |
|
do_sample=True, temperature=0.6, top_p=0.9, |
|
) |
|
|
|
input_length = inputs.input_ids.shape[1] |
|
generated_ids = outputs[0][input_length:] |
|
analysis = tokenizer.decode(generated_ids, skip_special_tokens=True) |
|
|
|
print(f"LLM Modal Func: Longitud del an谩lisis generado: {len(analysis)} caracteres.") |
|
return analysis.strip() |
|
except Exception as e: |
|
error_traceback = traceback.format_exc() |
|
print(f"Error en generate_analysis_llm_modal_remote: {e}\n{error_traceback}") |
|
return f"Error al generar an谩lisis con el modelo LLM: {str(e)}" |
|
|
|
|
|
@stub.asgi_app(image=app_image) |
|
def serve_gradio_app_asgi(): |
|
import gradio as gr |
|
|
|
|
|
if REMOTE_APP_DIR not in sys.path: |
|
sys.path.insert(0, REMOTE_APP_DIR) |
|
print(f"INFO (modal_app.py @asgi): A帽adido {REMOTE_APP_DIR} a sys.path") |
|
|
|
|
|
|
|
|
|
try: |
|
import decorators |
|
class _GPUNeutralizerInModal: |
|
def __init__(self, *args, **kwargs): pass |
|
def __call__(self, func): return func |
|
|
|
|
|
|
|
decorators.ActualSpacesGPU = _GPUNeutralizerInModal |
|
decorators._GPU_decorator_target = _GPUNeutralizerInModal |
|
decorators.gpu_decorator = lambda duration=0: lambda func: func |
|
print("INFO (modal_app.py @asgi): Decoradores GPU de 'spaces' neutralizados para el entorno Modal.") |
|
except ImportError: |
|
print("ADVERTENCIA (modal_app.py @asgi): M贸dulo 'decorators' no encontrado durante la neutralizaci贸n. Puede ser OK.") |
|
except Exception as e_neut_modal: |
|
print(f"ADVERTENCIA (modal_app.py @asgi): Error durante la neutralizaci贸n de decoradores en Modal: {e_neut_modal}") |
|
|
|
|
|
|
|
from UI import create_interface |
|
import interface as app_interface_module |
|
from config import MODEL_PATH as cfg_MODEL_PATH, MAX_LENGTH as cfg_MAX_LENGTH |
|
|
|
|
|
def analysis_func_wrapper_for_interface_modal(prompt: str) -> str: |
|
print("Gradio Backend (Modal): Llamando a generate_analysis_llm_modal_remote.remote...") |
|
return generate_analysis_llm_modal_remote.remote(prompt, cfg_MODEL_PATH, cfg_MAX_LENGTH) |
|
|
|
app_interface_module.generate_analysis_from_modal = analysis_func_wrapper_for_interface_modal |
|
app_interface_module.USE_MODAL_FOR_LLM_ANALYSIS = True |
|
print("INFO (modal_app.py @asgi): Runner de LLM Modal inyectado en el m贸dulo 'interface'.") |
|
|
|
gradio_ui_instance = create_interface(process_function_for_button=app_interface_module.process_and_plot) |
|
|
|
print("INFO (modal_app.py @asgi): Interfaz Gradio creada y lista para ser servida por Modal.") |
|
return gr.routes.App.create_app(gradio_ui_instance) |
|
|
|
@stub.local_entrypoint() |
|
def test_llm_local_entry(): |
|
print("Probando la generaci贸n de LLM con Modal (local_entrypoint)...") |
|
if str(LOCAL_APP_DIR) not in sys.path: |
|
sys.path.insert(0, str(LOCAL_APP_DIR)) |
|
from config import MODEL_PATH, MAX_LENGTH |
|
|
|
sample_prompt = "Explica brevemente el concepto de R cuadrado (R虏) en el ajuste de modelos." |
|
try: |
|
analysis = generate_analysis_llm_modal_remote.remote(sample_prompt, MODEL_PATH, MAX_LENGTH) |
|
print("\nRespuesta del LLM:") |
|
print(analysis) |
|
except Exception as e: |
|
print(f"Error durante test_llm_local_entry: {e}") |
|
traceback.print_exc() |