torinriley commited on
Commit
74045df
·
1 Parent(s): 2da4017
Files changed (2) hide show
  1. app.py +11 -35
  2. src/config.py +6 -6
app.py CHANGED
@@ -21,8 +21,8 @@ data_dir = Path("data")
21
  data_dir.mkdir(exist_ok=True)
22
 
23
  # Model configuration
24
- MODEL_REPO = "stabilityai/stable-diffusion-2-1"
25
- MODEL_FILENAME = "v2-1_768-ema-pruned.ckpt"
26
  model_file = data_dir / MODEL_FILENAME
27
 
28
  # Download model if it doesn't exist
@@ -40,19 +40,17 @@ if not model_file.exists():
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
  print(f"Using device: {device}")
42
 
43
- # Initialize configuration - use the correct tokenizer for SD 2.1
44
  config = Config(
45
  device=DeviceConfig(device=device),
46
- tokenizer=CLIPTokenizer.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
47
  )
48
 
49
  # Load models
50
  config.models = model_loader.load_models(str(model_file), device)
51
 
52
  MAX_SEED = np.iinfo(np.int32).max
53
- MAX_IMAGE_SIZE = 1280
54
- DEFAULT_WIDTH = 768
55
- DEFAULT_HEIGHT = 768
56
 
57
  def txt2img(
58
  prompt,
@@ -186,9 +184,9 @@ def inpaint(
186
  return None, seed
187
 
188
  examples = [
189
- "A stunning 4K hyperrealistic photograph of a futuristic cityscape at night, with towering skyscrapers, flying vehicles, and holographic advertisements reflecting in the water below",
190
- "An intricate fantasy landscape with crystal mountains, bioluminescent plants, and a magical waterfall under twin moons, highly detailed 8K rendering",
191
- "Professional portrait photograph of a cyberpunk character with detailed neon implants and holographic tattoos, studio lighting, shallow depth of field, shot on Sony A7R IV",
192
  ]
193
 
194
  css = """
@@ -207,33 +205,11 @@ css = """
207
  color: #666;
208
  margin-top: 20px;
209
  }
210
-
211
- .model-info {
212
- font-size: 0.9em;
213
- padding: 10px;
214
- background-color: #f0f9ff;
215
- border-radius: 5px;
216
- margin: 10px 0;
217
- border-left: 3px solid #0ea5e9;
218
- }
219
  """
220
 
221
  with gr.Blocks(css=css) as demo:
222
  with gr.Column(elem_id="col-container"):
223
- gr.Markdown(" # LiteDiffusion 2.1: High-Res Text-to-Image Magic ✨")
224
-
225
- gr.Markdown(
226
- """<div class="model-info">
227
- 📈 <b>Upgraded to Stable Diffusion 2.1!</b> Now featuring:
228
- <ul>
229
- <li>Higher resolution outputs (768×768)</li>
230
- <li>Improved image quality and detail</li>
231
- <li>Better prompt understanding</li>
232
- <li>Enhanced inpainting capabilities</li>
233
- </ul>
234
- </div>""",
235
- elem_classes="model-info"
236
- )
237
 
238
  with gr.Tabs(elem_classes="tabs") as tabs:
239
  with gr.TabItem("Text-to-Image"):
@@ -311,7 +287,7 @@ with gr.Blocks(css=css) as demo:
311
  minimum=256,
312
  maximum=MAX_IMAGE_SIZE,
313
  step=32,
314
- value=DEFAULT_WIDTH,
315
  )
316
 
317
  height = gr.Slider(
@@ -319,7 +295,7 @@ with gr.Blocks(css=css) as demo:
319
  minimum=256,
320
  maximum=MAX_IMAGE_SIZE,
321
  step=32,
322
- value=DEFAULT_HEIGHT,
323
  )
324
 
325
  with gr.Row():
 
21
  data_dir.mkdir(exist_ok=True)
22
 
23
  # Model configuration
24
+ MODEL_REPO = "stable-diffusion-v1-5/stable-diffusion-v1-5"
25
+ MODEL_FILENAME = "v1-5-pruned-emaonly.ckpt"
26
  model_file = data_dir / MODEL_FILENAME
27
 
28
  # Download model if it doesn't exist
 
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
  print(f"Using device: {device}")
42
 
43
+ # Initialize configuration
44
  config = Config(
45
  device=DeviceConfig(device=device),
46
+ tokenizer=CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
47
  )
48
 
49
  # Load models
50
  config.models = model_loader.load_models(str(model_file), device)
51
 
52
  MAX_SEED = np.iinfo(np.int32).max
53
+ MAX_IMAGE_SIZE = 1024
 
 
54
 
55
  def txt2img(
56
  prompt,
 
184
  return None, seed
185
 
186
  examples = [
187
+ "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
188
+ "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
189
+ "A detailed portrait of a cyberpunk character with glowing neon implants and holographic tattoos",
190
  ]
191
 
192
  css = """
 
205
  color: #666;
206
  margin-top: 20px;
207
  }
 
 
 
 
 
 
 
 
 
208
  """
209
 
210
  with gr.Blocks(css=css) as demo:
211
  with gr.Column(elem_id="col-container"):
212
+ gr.Markdown(" # LiteDiffusion")
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  with gr.Tabs(elem_classes="tabs") as tabs:
215
  with gr.TabItem("Text-to-Image"):
 
287
  minimum=256,
288
  maximum=MAX_IMAGE_SIZE,
289
  step=32,
290
+ value=512,
291
  )
292
 
293
  height = gr.Slider(
 
295
  minimum=256,
296
  maximum=MAX_IMAGE_SIZE,
297
  step=32,
298
+ value=512,
299
  )
300
 
301
  with gr.Row():
src/config.py CHANGED
@@ -4,16 +4,16 @@ import torch
4
 
5
  @dataclass
6
  class ModelConfig:
7
- # Image dimensions (updated for SD 2.1)
8
- width: int = 768
9
- height: int = 768
10
- latents_width: int = 96 # width // 8
11
- latents_height: int = 96 # height // 8
12
 
13
  # Model architecture parameters
14
  n_embd: int = 1280
15
  n_head: int = 8
16
- d_context: int = 1024 # Updated for SD 2.1 OpenCLIP
17
 
18
  # UNet parameters
19
  n_time: int = 1280
 
4
 
5
  @dataclass
6
  class ModelConfig:
7
+ # Image dimensions
8
+ width: int = 512
9
+ height: int = 512
10
+ latents_width: int = 64 # width // 8
11
+ latents_height: int = 64 # height // 8
12
 
13
  # Model architecture parameters
14
  n_embd: int = 1280
15
  n_head: int = 8
16
+ d_context: int = 768
17
 
18
  # UNet parameters
19
  n_time: int = 1280