Spaces:

hpwang
/

VistaDream

Build error

File size: 3,402 Bytes

fd5e0f7
 
 
 
 
 
 
 
57bbbad
fd5e0f7

'''
render using frames in GS
inpaint with fooocus
'''
import torch
import numpy as np
from ops.llava import Llava
from ops.gs.basic import Frame
from ops.fooocus import Fooocus

class Inpaint_Tool():
    def __init__(self,cfg) -> None:
        self.cfg = cfg
        self._load_model()
        
    def _load_model(self):
        self.fooocus = Fooocus()
        self.llava = Llava(device='cpu',llava_ckpt=self.cfg.model.vlm.llava.ckpt)

    def _llava_prompt(self,frame):
        prompt = '<image>\n \
                USER: Detaily imagine and describe the scene this image taken from? \
                \n ASSISTANT: This image is taken from a scene of ' 
        return prompt   

    def __call__(self, frame:Frame, outpaint_selections=[], outpaint_extend_times=0.0):
        '''
        Must be Frame type
        '''
        # conduct reconstuction
        # ----------------------- LLaVA -----------------------
        if frame.prompt is None:
            print('Inpaint-Caption[1/3] Move llava.model to GPU...')
            self.llava.model.to('cuda')
            print('Inpaint-Caption[2/3] Llava inpainting instruction:')
            query  = self._llava_prompt(frame)
            prompt = self.llava(frame.rgb,query)
            split  = str.rfind(prompt,'ASSISTANT: This image is taken from a scene of ') + len(f'ASSISTANT: This image is taken from a scene of ')
            prompt = prompt[split:]
            print(prompt) 
            print('Inpaint-Caption[3/3] Move llava.model to CPU...')
            self.llava.model.to('cpu')
            torch.cuda.empty_cache()
            frame.prompt = prompt
        else:
            prompt = frame.prompt
            print(f'Using pre-generated prompt: {prompt}')
        # --------------------- Fooocus ----------------------
        print('Inpaint-Fooocus[1/2] Fooocus inpainting...')
        image = frame.rgb
        mask = np.zeros_like(image,bool) if len(outpaint_selections)>0 else frame.inpaint
        fooocus_result = self.fooocus(image_number=1,
                            prompt=prompt + ' 8K, no large circles, no cameras, no fisheye.',
                            negative_prompt='Any fisheye, any large circles, any blur, unrealism.',
                            outpaint_selections=outpaint_selections,
                            outpaint_extend_times=outpaint_extend_times,
                            origin_image=image,
                            mask_image=mask,)[0]
        torch.cuda.empty_cache()
        
        # reset the frame for outpainting
        if len(outpaint_selections) > 0.:
            assert len(outpaint_selections) == 4
            small_H, small_W = frame.rgb.shape[0:2]
            large_H, large_W = fooocus_result.shape[0:2]
            if frame.intrinsic is not None:
                # NO CHANGE TO FOCAL
                frame.intrinsic[0,-1] = large_W//2 
                frame.intrinsic[1,-1] = large_H//2 
            # begin sample pixel
            frame.H = large_H
            frame.W = large_W
            begin_H = (large_H-small_H)//2
            begin_W = (large_W-small_W)//2
            inpaint = np.ones_like(fooocus_result[...,0])
            inpaint[begin_H:(begin_H+small_H),begin_W:(begin_W+small_W)] *= 0.
            frame.inpaint = inpaint > 0.5
        frame.rgb = fooocus_result
        
        print('Inpaint-Fooocus[2/2] Assign Frame...')
        return frame