MohamedRashad commited on
Commit
689e677
·
1 Parent(s): 87af913

Update SDK version, remove FluxPipeline, and clean up requirements

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +1 -9
  3. live_preview_helpers.py +0 -165
  4. requirements.txt +0 -5
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 👀
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Tool to generate 3D assets for games
 
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.8.0
8
  app_file: app.py
9
  pinned: false
10
  short_description: Tool to generate 3D assets for games
app.py CHANGED
@@ -15,16 +15,10 @@ from trellis.pipelines import TrellisImageTo3DPipeline
15
  from trellis.representations import Gaussian, MeshExtractResult
16
  from trellis.utils import render_utils, postprocessing_utils
17
  from gradio_client import Client
18
- from diffusers import FluxPipeline
19
- from huggingface_hub import InferenceClient
20
 
21
  llm_client = Client("Qwen/Qwen2.5-72B-Instruct")
22
  t2i_client = Client("black-forest-labs/FLUX.1-dev")
23
 
24
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
- # pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cpu")
26
- # print(f"Flux pipeline loaded on {pipe.device}")
27
-
28
  def generate_t2i_prompt(item_name):
29
  llm_prompt_template = """You are tasked with creating a concise yet highly detailed description of an item to be used for generating an image in a game development pipeline. The image should show the **entire item** with no parts cropped or hidden. The background should always be plain and monocolor, with no focus on it.
30
 
@@ -71,8 +65,6 @@ def preprocess_pil_image(image: Image.Image) -> Tuple[str, Image.Image]:
71
  return trial_id, processed_image
72
 
73
  def generate_item_image(object_t2i_prompt):
74
- # image = pipe(prompt=object_t2i_prompt, guidance_scale=3.5, num_inference_steps=28, width=1024, height=1024, generator=torch.Generator("cpu").manual_seed(0), output_type="pil").images[0]
75
- # image = client.text_to_image(object_t2i_prompt, guidance_scale=3.5, num_inference_steps=28, width=1024, height=1024)
76
  img_path = t2i_client.predict(
77
  prompt=object_t2i_prompt,
78
  seed=0,
@@ -218,7 +210,7 @@ with gr.Blocks(title="Game Items Generator") as demo:
218
 
219
  with gr.Row():
220
  with gr.Column():
221
- with gr.Row():
222
  item_text_field = gr.Textbox(label="Item Name", placeholder="Enter the name of the item", lines=2, scale=4)
223
  enhance_prompt_btn = gr.Button("Enhance Prompt", variant="primary", scale=1)
224
  generate_image_btn = gr.Button("Generate Image", variant="primary")
 
15
  from trellis.representations import Gaussian, MeshExtractResult
16
  from trellis.utils import render_utils, postprocessing_utils
17
  from gradio_client import Client
 
 
18
 
19
  llm_client = Client("Qwen/Qwen2.5-72B-Instruct")
20
  t2i_client = Client("black-forest-labs/FLUX.1-dev")
21
 
 
 
 
 
22
  def generate_t2i_prompt(item_name):
23
  llm_prompt_template = """You are tasked with creating a concise yet highly detailed description of an item to be used for generating an image in a game development pipeline. The image should show the **entire item** with no parts cropped or hidden. The background should always be plain and monocolor, with no focus on it.
24
 
 
65
  return trial_id, processed_image
66
 
67
  def generate_item_image(object_t2i_prompt):
 
 
68
  img_path = t2i_client.predict(
69
  prompt=object_t2i_prompt,
70
  seed=0,
 
210
 
211
  with gr.Row():
212
  with gr.Column():
213
+ with gr.Row(equal_height=True):
214
  item_text_field = gr.Textbox(label="Item Name", placeholder="Enter the name of the item", lines=2, scale=4)
215
  enhance_prompt_btn = gr.Button("Enhance Prompt", variant="primary", scale=1)
216
  generate_image_btn = gr.Button("Generate Image", variant="primary")
live_preview_helpers.py DELETED
@@ -1,165 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from typing import Any, Dict, List, Optional, Union
4
-
5
- # Helper functions
6
- def calculate_shift(
7
- image_seq_len,
8
- base_seq_len: int = 256,
9
- max_seq_len: int = 4096,
10
- base_shift: float = 0.5,
11
- max_shift: float = 1.16,
12
- ):
13
- m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
14
- b = base_shift - m * base_seq_len
15
- mu = image_seq_len * m + b
16
- return mu
17
-
18
- def retrieve_timesteps(
19
- scheduler,
20
- num_inference_steps: Optional[int] = None,
21
- device: Optional[Union[str, torch.device]] = None,
22
- timesteps: Optional[List[int]] = None,
23
- sigmas: Optional[List[float]] = None,
24
- **kwargs,
25
- ):
26
- if timesteps is not None and sigmas is not None:
27
- raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
28
- if timesteps is not None:
29
- scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
30
- timesteps = scheduler.timesteps
31
- num_inference_steps = len(timesteps)
32
- elif sigmas is not None:
33
- scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
34
- timesteps = scheduler.timesteps
35
- num_inference_steps = len(timesteps)
36
- else:
37
- scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
38
- timesteps = scheduler.timesteps
39
- return timesteps, num_inference_steps
40
-
41
- # FLUX pipeline function
42
- @torch.inference_mode()
43
- def flux_pipe_call_that_returns_an_iterable_of_images(
44
- self,
45
- prompt: Union[str, List[str]] = None,
46
- prompt_2: Optional[Union[str, List[str]]] = None,
47
- height: Optional[int] = None,
48
- width: Optional[int] = None,
49
- num_inference_steps: int = 28,
50
- timesteps: List[int] = None,
51
- guidance_scale: float = 3.5,
52
- num_images_per_prompt: Optional[int] = 1,
53
- generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
54
- latents: Optional[torch.FloatTensor] = None,
55
- prompt_embeds: Optional[torch.FloatTensor] = None,
56
- pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
57
- output_type: Optional[str] = "pil",
58
- return_dict: bool = True,
59
- joint_attention_kwargs: Optional[Dict[str, Any]] = None,
60
- max_sequence_length: int = 512,
61
- good_vae: Optional[Any] = None,
62
- ):
63
- height = height or self.default_sample_size * self.vae_scale_factor
64
- width = width or self.default_sample_size * self.vae_scale_factor
65
-
66
- # 1. Check inputs
67
- self.check_inputs(
68
- prompt,
69
- prompt_2,
70
- height,
71
- width,
72
- prompt_embeds=prompt_embeds,
73
- pooled_prompt_embeds=pooled_prompt_embeds,
74
- max_sequence_length=max_sequence_length,
75
- )
76
-
77
- self._guidance_scale = guidance_scale
78
- self._joint_attention_kwargs = joint_attention_kwargs
79
- self._interrupt = False
80
-
81
- # 2. Define call parameters
82
- batch_size = 1 if isinstance(prompt, str) else len(prompt)
83
- device = self._execution_device
84
-
85
- # 3. Encode prompt
86
- lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
87
- prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
88
- prompt=prompt,
89
- prompt_2=prompt_2,
90
- prompt_embeds=prompt_embeds,
91
- pooled_prompt_embeds=pooled_prompt_embeds,
92
- device=device,
93
- num_images_per_prompt=num_images_per_prompt,
94
- max_sequence_length=max_sequence_length,
95
- lora_scale=lora_scale,
96
- )
97
- # 4. Prepare latent variables
98
- num_channels_latents = self.transformer.config.in_channels // 4
99
- latents, latent_image_ids = self.prepare_latents(
100
- batch_size * num_images_per_prompt,
101
- num_channels_latents,
102
- height,
103
- width,
104
- prompt_embeds.dtype,
105
- device,
106
- generator,
107
- latents,
108
- )
109
- # 5. Prepare timesteps
110
- sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
111
- image_seq_len = latents.shape[1]
112
- mu = calculate_shift(
113
- image_seq_len,
114
- self.scheduler.config.base_image_seq_len,
115
- self.scheduler.config.max_image_seq_len,
116
- self.scheduler.config.base_shift,
117
- self.scheduler.config.max_shift,
118
- )
119
- timesteps, num_inference_steps = retrieve_timesteps(
120
- self.scheduler,
121
- num_inference_steps,
122
- device,
123
- timesteps,
124
- sigmas,
125
- mu=mu,
126
- )
127
- self._num_timesteps = len(timesteps)
128
-
129
- # Handle guidance
130
- guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32).expand(latents.shape[0]) if self.transformer.config.guidance_embeds else None
131
-
132
- # 6. Denoising loop
133
- for i, t in enumerate(timesteps):
134
- if self.interrupt:
135
- continue
136
-
137
- timestep = t.expand(latents.shape[0]).to(latents.dtype)
138
-
139
- noise_pred = self.transformer(
140
- hidden_states=latents,
141
- timestep=timestep / 1000,
142
- guidance=guidance,
143
- pooled_projections=pooled_prompt_embeds,
144
- encoder_hidden_states=prompt_embeds,
145
- txt_ids=text_ids,
146
- img_ids=latent_image_ids,
147
- joint_attention_kwargs=self.joint_attention_kwargs,
148
- return_dict=False,
149
- )[0]
150
- # Yield intermediate result
151
- latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor)
152
- latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor
153
- image = self.vae.decode(latents_for_image, return_dict=False)[0]
154
- yield self.image_processor.postprocess(image, output_type=output_type)[0]
155
-
156
- latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
157
- torch.cuda.empty_cache()
158
-
159
- # Final image using good_vae
160
- latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
161
- latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor
162
- image = good_vae.decode(latents, return_dict=False)[0]
163
- self.maybe_free_model_hooks()
164
- torch.cuda.empty_cache()
165
- yield self.image_processor.postprocess(image, output_type=output_type)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,6 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  --find-links https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu121.html
3
 
4
- accelerate
5
- sentencepiece
6
- diffusers
7
- gradio_client==1.4.0
8
- huggingface-hub==0.26.5
9
  torch==2.4.0
10
  torchvision==0.19.0
11
  pillow==10.4.0
 
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  --find-links https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu121.html
3
 
 
 
 
 
 
4
  torch==2.4.0
5
  torchvision==0.19.0
6
  pillow==10.4.0