|
|
|
import modal |
|
import sys |
|
from pathlib import Path |
|
import os |
|
|
|
|
|
PYTHON_VERSION = "3.10" |
|
APP_NAME = "bioprocess-custom-eq-agent-modal" |
|
|
|
|
|
LOCAL_APP_DIR = Path(__file__).parent |
|
REMOTE_APP_DIR = "/app" |
|
|
|
stub = modal.Stub(APP_NAME) |
|
|
|
|
|
app_image = ( |
|
modal.Image.debian_slim(python_version=PYTHON_VERSION) |
|
.pip_install_from_requirements(LOCAL_APP_DIR / "requirements.txt") |
|
.copy_mount( |
|
modal.Mount.from_local_dir(LOCAL_APP_DIR, remote_path=REMOTE_APP_DIR) |
|
) |
|
.env({ |
|
"PYTHONPATH": REMOTE_APP_DIR, |
|
"HF_HOME": "/cache/huggingface", |
|
"HF_HUB_CACHE": "/cache/huggingface/hub", |
|
"TRANSFORMERS_CACHE": "/cache/huggingface/hub", |
|
"MPLCONFIGDIR": "/tmp/matplotlib_cache" |
|
}) |
|
.run_commands( |
|
"apt-get update && apt-get install -y git git-lfs && rm -rf /var/lib/apt/lists/*", |
|
"mkdir -p /cache/huggingface/hub /tmp/matplotlib_cache" |
|
) |
|
) |
|
|
|
|
|
@stub.function( |
|
image=app_image, |
|
gpu="any", |
|
secrets=[ |
|
modal.Secret.from_name("huggingface-read-token", optional=True) |
|
], |
|
timeout=600, |
|
|
|
volumes={"/cache/huggingface": modal.Volume.persisted(f"{APP_NAME}-hf-cache-vol")} |
|
) |
|
def generate_analysis_llm_modal_remote(prompt: str, model_path_config: str, max_new_tokens_config: int) -> str: |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
hf_token = os.environ.get("HUGGING_FACE_TOKEN") |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"LLM Modal Func: Usando dispositivo: {device}") |
|
print(f"LLM Modal Func: Cargando modelo: {model_path_config} con token: {'Sí' if hf_token else 'No'}") |
|
|
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_path_config, cache_dir="/cache/huggingface/hub", token=hf_token) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path_config, |
|
torch_dtype="auto", |
|
device_map="auto", |
|
cache_dir="/cache/huggingface/hub", |
|
token=hf_token, |
|
|
|
) |
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096-max_new_tokens_config).to(model.device) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_new_tokens_config, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id, |
|
do_sample=True, |
|
temperature=0.6, |
|
top_p=0.9, |
|
|
|
) |
|
|
|
|
|
input_length = inputs.input_ids.shape[1] |
|
generated_ids = outputs[0][input_length:] |
|
analysis = tokenizer.decode(generated_ids, skip_special_tokens=True) |
|
|
|
print(f"LLM Modal Func: Longitud del análisis generado: {len(analysis)} caracteres.") |
|
return analysis.strip() |
|
except Exception as e: |
|
error_traceback = traceback.format_exc() |
|
print(f"Error en generate_analysis_llm_modal_remote: {e}\n{error_traceback}") |
|
return f"Error al generar análisis con el modelo LLM: {str(e)}" |
|
|
|
|
|
@stub.asgi_app() |
|
def serve_gradio_app_asgi(): |
|
|
|
import gradio as gr |
|
sys.path.insert(0, REMOTE_APP_DIR) |
|
|
|
|
|
from UI import create_interface |
|
import interface as app_interface_module |
|
from config import MODEL_PATH as cfg_MODEL_PATH, MAX_LENGTH as cfg_MAX_LENGTH |
|
|
|
|
|
def analysis_func_wrapper_for_interface(prompt: str) -> str: |
|
print("Gradio Backend: Llamando a generate_analysis_llm_modal_remote.remote...") |
|
return generate_analysis_llm_modal_remote.remote(prompt, cfg_MODEL_PATH, cfg_MAX_LENGTH) |
|
|
|
|
|
app_interface_module.generate_analysis_from_modal = analysis_func_wrapper_for_interface |
|
app_interface_module.USE_MODAL_FOR_LLM_ANALYSIS = True |
|
|
|
|
|
gradio_ui, all_ui_inputs, ui_outputs, ui_submit_button = create_interface() |
|
|
|
ui_submit_button.click( |
|
fn=app_interface_module.process_and_plot, |
|
inputs=all_ui_inputs, |
|
outputs=ui_outputs |
|
) |
|
|
|
return gr.routes.App.create_app(gradio_ui) |
|
|
|
|
|
@stub.local_entrypoint() |
|
def test_llm(): |
|
print("Probando la generación de LLM con Modal (localmente)...") |
|
from config import MODEL_PATH, MAX_LENGTH |
|
sample_prompt = "Explica brevemente el concepto de R cuadrado (R²) en el ajuste de modelos." |
|
analysis = generate_analysis_llm_modal_remote.remote(sample_prompt, MODEL_PATH, MAX_LENGTH) |
|
print("\nRespuesta del LLM:") |
|
print(analysis) |