File size: 4,693 Bytes
f22523d
f18894a
f22523d
 
 
 
 
 
 
 
db2ce80
f22523d
db2ce80
f22523d
 
 
 
 
 
 
 
 
 
 
 
 
42799d6
f18894a
3893e21
f22523d
 
 
 
f18894a
 
 
 
 
 
f22523d
 
 
 
 
 
 
f18894a
f22523d
 
 
 
 
 
 
 
 
 
 
 
42799d6
 
 
 
 
 
fa46e5f
42799d6
f18894a
42799d6
f22523d
 
f18894a
 
 
 
 
 
 
 
 
f22523d
f18894a
 
 
 
 
 
 
 
 
 
 
 
f22523d
 
 
 
f18894a
 
 
 
 
 
 
f22523d
 
 
db2ce80
5260df5
 
db2ce80
 
 
5260df5
f22523d
5260df5
 
f22523d
5260df5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from typing import  Dict, List, Any
from PIL import Image
import torch
from torch import autocast
from tqdm.auto import tqdm
from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
from point_e.diffusion.sampler import PointCloudSampler
from point_e.models.download import load_checkpoint
from point_e.models.configs import MODEL_CONFIGS, model_from_config
from point_e.util.plotting import plot_point_cloud
import json
import base64
import numpy as np
from io import BytesIO


# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type != 'cuda':
    raise ValueError("need to run on GPU")

class EndpointHandler():
    def __init__(self, path=""):
        # load the optimized model
        print('creating base model...')
        
        print('creating base model...')
        self.base_name = 'base40M-textvec'
        self.base_model = model_from_config(MODEL_CONFIGS[self.base_name], device)
        self.base_model.eval()
        self.base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[self.base_name])

        print('creating image model...')
        self.base_image_name = 'base40M'
        self.base_image_model = model_from_config(MODEL_CONFIGS[self.base_image_name], device)
        self.base_image_model.eval()
        self.base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[self.base_image_name])

        print('creating upsample model...')
        self.upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
        self.upsampler_model.eval()
        self.upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
        
        print('downloading base checkpoint...')
        self.base_model.load_state_dict(load_checkpoint(self.base_name, device))
        self.base_image_model.load_state_dict(load_checkpoint(self.base_image_name, device))
        
        print('downloading upsampler checkpoint...')
        self.upsampler_model.load_state_dict(load_checkpoint('upsample', device))

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`dict`:. plotly json Data
        """

        #Checks if an image key has been provided, and if so, uses the image data instead of text input
        if "image" in data:
            image_data = data.pop("image")
            use_image = True
            print('image data found')
        else:
            print('no image data found')

        
        inputs = data.pop("inputs", data)

       if use_image:
           sampler = PointCloudSampler(
                device=device,
                models=[base_model, upsampler_model],
                diffusions=[base_diffusion, upsampler_diffusion],
                num_points=[1024, 4096 - 1024],
                aux_channels=['R', 'G', 'B'],
                guidance_scale=[3.0, 3.0],
            )

            # Load an image to condition on.
            img = Image.open('example_data/cube_stack.jpg')
       else:
            sampler = PointCloudSampler(
                device=device,
                models=[self.base_model,self.upsampler_model],
                diffusions=[self.base_diffusion, self.upsampler_diffusion],
                num_points=[1024, 4096 - 1024],
                aux_channels=['R', 'G', 'B'],
                guidance_scale=[3.0, 0.0],
                model_kwargs_key_filter=('texts', ''), # Do not condition the upsampler at all
            ) 
 
        # run inference pipeline
        with autocast(device.type):
            samples = None
            if use_image:
                for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(images=[img]))):
                    samples = x
            else:
                for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(texts=[inputs]))):
                    samples = x
            
        #image = self.pipe(inputs, guidance_scale=7.5)["sample"][0]  

        pc = sampler.output_to_point_clouds(samples)[0]
        print('type of pc: ', type(pc))

        pc_dict = {}
        
        data_list = pc.coords.tolist()
        json_string = json.dumps(data_list)
        pc_dict['data'] = json_string

        # Convert NumPy arrays to Python lists for serializing
        serializable_channels = {key: value.tolist() for key, value in pc.channels.items()}

        # Serialize the dictionary to a JSON-formatted string
        channel_data = json.dumps(serializable_channels)
        pc_dict['channels'] = channel_data
        
        return pc_dict