import re
import PIL.Image
import gradio as gr
import jax
import jax.numpy as jnp
import numpy as np
import flax.linen as nn
from inference import PaliGemmaModel, VAEModel

COLORS = ['#4285f4', '#db4437', '#f4b400', '#0f9d58', '#e48ef1']

# Instantiate the models
pali_gemma_model = PaliGemmaModel()
vae_model = VAEModel('vae-oid.npz')

##### Parse segmentation output tokens into masks
##### Also returns bounding boxes with their labels

def parse_segmentation(input_image, input_text, max_new_tokens=100):
    out = pali_gemma_model.infer(image=input_image, text=input_text, max_new_tokens=max_new_tokens)
    objs = extract_objs(out.lstrip("\n"), input_image.size[0], input_image.size[1], unique_labels=True)
    labels = set(obj.get('name') for obj in objs if obj.get('name'))
    color_map = {l: COLORS[i % len(COLORS)] for i, l in enumerate(labels)}
    highlighted_text = [(obj['content'], obj.get('name')) for obj in objs]
    annotated_img = (
        input_image,
        [
            (
                obj['mask'] if obj.get('mask') is not None else obj['xyxy'],
                obj['name'] or '',
            )
            for obj in objs
            if 'mask' in obj or 'xyxy' in obj
        ],
    )
    has_annotations = bool(annotated_img[1])
    return annotated_img

INTRO_TEXT="🔬🧠 CellVision AI -- Intelligent Cell Imaging Analysis 🤖🧫"
IMAGE_PROMPT="""
Describe the morphological characteristics and visible interactions between different cell types.
Assess the biological context to identify signs of cancer and the presence of antigens.
"""

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(INTRO_TEXT)
    with gr.Tab("Segment/Detect"):
        with gr.Row():
            with gr.Column():
                image = gr.Image(type="pil")
                seg_input = gr.Text(label="Entities to Segment/Detect")

            with gr.Column():
                annotated_image = gr.AnnotatedImage(label="Output")

        seg_btn = gr.Button("Submit")
        examples = [
            ["./examples/cart1.jpg", "segment cells"],
            ["./examples/cart1.jpg", "detect cells"],
            ["./examples/cart2.jpg", "segment cells"],
            ["./examples/cart2.jpg", "detect cells"],
            ["./examples/cart3.jpg", "segment cells"],
            ["./examples/cart3.jpg", "detect cells"]
        ]
        gr.Examples(
            examples=examples,
            inputs=[image, seg_input],
        )
        seg_inputs = [
            image,
            seg_input,
        ]
        seg_outputs = [
            annotated_image
        ]
        seg_btn.click(
            fn=parse_segmentation,
            inputs=seg_inputs,
            outputs=seg_outputs,
        )
    with gr.Tab("Text Generation"):
        with gr.Column():
            image = gr.Image(type="pil")
            text_input = gr.Text(label="Input Text")
            text_output = gr.Text(label="Text Output")
            chat_btn = gr.Button()
            tokens = gr.Slider(
                label="Max New Tokens",
                info="Set to larger for longer generation.",
                minimum=10,
                maximum=100,
                value=50,
                step=10,
            )

        chat_inputs = [
            image,
            text_input,
            tokens
        ]
        chat_outputs = [
            text_output
        ]
        chat_btn.click(
            fn=pali_gemma_model.infer,
            inputs=chat_inputs,
            outputs=chat_outputs,
        )

        examples = [
            ["./examples/cart1.jpg", IMAGE_PROMPT],
            ["./examples/cart2.jpg", IMAGE_PROMPT],
            ["./examples/cart3.jpg", IMAGE_PROMPT]
        ]
        gr.Examples(
            examples=examples,
            inputs=chat_inputs,
        )

### Postprocessing Utils for Segmentation Tokens
### Segmentation tokens are passed to another VAE which decodes them to a mask

def extract_objs(text, width, height, unique_labels=False):
    """Returns objs for a string with "<loc>" and "<seg>" tokens."""
    objs = []
    seen = set()
    while text:
        m = _SEGMENT_DETECT_RE.match(text)
        if not m:
            break
        print("m", m)
        gs = list(m.groups())
        before = gs.pop(0)
        name = gs.pop()
        y1, x1, y2, x2 = [int(x) / 1024 for x in gs[:4]]

        y1, x1, y2, x2 = map(round, (y1*height, x1*width, y2*height, x2*width))
        seg_indices = gs[4:20]
        if seg_indices[0] is None:
            mask = None
        else:
            seg_indices = np.array([int(x) for x in seg_indices], dtype=np.int32)
            m64, = vae_model.reconstruct_masks(seg_indices[None])[..., 0]
            m64 = np.clip(np.array(m64) * 0.5 + 0.5, 0, 1)
            m64 = PIL.Image.fromarray((m64 * 255).astype('uint8'))
            mask = np.zeros([height, width])
            if y2 > y1 and x2 > x1:
                mask[y1:y2, x1:x2] = np.array(m64.resize([x2 - x1, y2 - y1])) / 255.0

        content = m.group()
        if before:
            objs.append(dict(content=before))
            content = content[len(before):]
        while unique_labels and name in seen:
            name = (name or '') + "'"
        seen.add(name)
        objs.append(dict(
            content=content, xyxy=(x1, y1, x2, y2), mask=mask, name=name))
        text = text[len(before) + len(content):]

    if text:
        objs.append(dict(content=text))

    return objs

_SEGMENT_DETECT_RE = re.compile(
    r'(.*?)' +
    r'<loc(\d{4})>' * 4 + r'\s*' +
    '(?:%s)?' % (r'<seg(\d{3})>' * 16) +
    r'\s*([^;<>]+)? ?(?:; )?',
)

if __name__ == "__main__":
    demo.queue(max_size=10).launch(debug=True)