File size: 4,693 Bytes

from typing import  Dict, List, Any
from PIL import Image
import torch
from torch import autocast
from tqdm.auto import tqdm
from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
from point_e.diffusion.sampler import PointCloudSampler
from point_e.models.download import load_checkpoint
from point_e.models.configs import MODEL_CONFIGS, model_from_config
from point_e.util.plotting import plot_point_cloud
import json
import base64
import numpy as np
from io import BytesIO


# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type != 'cuda':
    raise ValueError("need to run on GPU")

class EndpointHandler():
    def __init__(self, path=""):
        # load the optimized model
        print('creating base model...')
        
        print('creating base model...')
        self.base_name = 'base40M-textvec'
        self.base_model = model_from_config(MODEL_CONFIGS[self.base_name], device)
        self.base_model.eval()
        self.base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[self.base_name])

        print('creating image model...')
        self.base_image_name = 'base40M'
        self.base_image_model = model_from_config(MODEL_CONFIGS[self.base_image_name], device)
        self.base_image_model.eval()
        self.base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[self.base_image_name])

        print('creating upsample model...')
        self.upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
        self.upsampler_model.eval()
        self.upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
        
        print('downloading base checkpoint...')
        self.base_model.load_state_dict(load_checkpoint(self.base_name, device))
        self.base_image_model.load_state_dict(load_checkpoint(self.base_image_name, device))
        
        print('downloading upsampler checkpoint...')
        self.upsampler_model.load_state_dict(load_checkpoint('upsample', device))

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`dict`:. plotly json Data
        """

        #Checks if an image key has been provided, and if so, uses the image data instead of text input
        if "image" in data:
            image_data = data.pop("image")
            use_image = True
            print('image data found')
        else:
            print('no image data found')

        
        inputs = data.pop("inputs", data)

       if use_image:
           sampler = PointCloudSampler(
                device=device,
                models=[base_model, upsampler_model],
                diffusions=[base_diffusion, upsampler_diffusion],
                num_points=[1024, 4096 - 1024],
                aux_channels=['R', 'G', 'B'],
                guidance_scale=[3.0, 3.0],
            )

            # Load an image to condition on.
            img = Image.open('example_data/cube_stack.jpg')
       else:
            sampler = PointCloudSampler(
                device=device,
                models=[self.base_model,self.upsampler_model],
                diffusions=[self.base_diffusion, self.upsampler_diffusion],
                num_points=[1024, 4096 - 1024],
                aux_channels=['R', 'G', 'B'],
                guidance_scale=[3.0, 0.0],
                model_kwargs_key_filter=('texts', ''), # Do not condition the upsampler at all
            ) 
 
        # run inference pipeline
        with autocast(device.type):
            samples = None
            if use_image:
                for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(images=[img]))):
                    samples = x
            else:
                for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(texts=[inputs]))):
                    samples = x
            
        #image = self.pipe(inputs, guidance_scale=7.5)["sample"][0]  

        pc = sampler.output_to_point_clouds(samples)[0]
        print('type of pc: ', type(pc))

        pc_dict = {}
        
        data_list = pc.coords.tolist()
        json_string = json.dumps(data_list)
        pc_dict['data'] = json_string

        # Convert NumPy arrays to Python lists for serializing
        serializable_channels = {key: value.tolist() for key, value in pc.channels.items()}

        # Serialize the dictionary to a JSON-formatted string
        channel_data = json.dumps(serializable_channels)
        pc_dict['channels'] = channel_data
        
        return pc_dict