torinriley commited on
Commit
4dab9fb
·
1 Parent(s): b7b4c25
Files changed (2) hide show
  1. app.py +34 -10
  2. src/config.py +6 -6
app.py CHANGED
@@ -21,8 +21,8 @@ data_dir = Path("data")
21
  data_dir.mkdir(exist_ok=True)
22
 
23
  # Model configuration
24
- MODEL_REPO = "stable-diffusion-v1-5/stable-diffusion-v1-5"
25
- MODEL_FILENAME = "v1-5-pruned-emaonly.ckpt"
26
  model_file = data_dir / MODEL_FILENAME
27
 
28
  # Download model if it doesn't exist
@@ -43,14 +43,16 @@ print(f"Using device: {device}")
43
  # Initialize configuration
44
  config = Config(
45
  device=DeviceConfig(device=device),
46
- tokenizer=CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
47
  )
48
 
49
  # Load models
50
  config.models = model_loader.load_models(str(model_file), device)
51
 
52
  MAX_SEED = np.iinfo(np.int32).max
53
- MAX_IMAGE_SIZE = 1024
 
 
54
 
55
  def txt2img(
56
  prompt,
@@ -184,9 +186,9 @@ def inpaint(
184
  return None, seed
185
 
186
  examples = [
187
- "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
188
- "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
189
- "A detailed portrait of a cyberpunk character with glowing neon implants and holographic tattoos",
190
  ]
191
 
192
  css = """
@@ -205,11 +207,33 @@ css = """
205
  color: #666;
206
  margin-top: 20px;
207
  }
 
 
 
 
 
 
 
 
 
208
  """
209
 
210
  with gr.Blocks(css=css) as demo:
211
  with gr.Column(elem_id="col-container"):
212
- gr.Markdown(" # LiteDiffusion")
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  with gr.Tabs(elem_classes="tabs") as tabs:
215
  with gr.TabItem("Text-to-Image"):
@@ -287,7 +311,7 @@ with gr.Blocks(css=css) as demo:
287
  minimum=256,
288
  maximum=MAX_IMAGE_SIZE,
289
  step=32,
290
- value=512,
291
  )
292
 
293
  height = gr.Slider(
@@ -295,7 +319,7 @@ with gr.Blocks(css=css) as demo:
295
  minimum=256,
296
  maximum=MAX_IMAGE_SIZE,
297
  step=32,
298
- value=512,
299
  )
300
 
301
  with gr.Row():
 
21
  data_dir.mkdir(exist_ok=True)
22
 
23
  # Model configuration
24
+ MODEL_REPO = "stabilityai/stable-diffusion-2-1"
25
+ MODEL_FILENAME = "v2-1_768-ema-pruned.ckpt"
26
  model_file = data_dir / MODEL_FILENAME
27
 
28
  # Download model if it doesn't exist
 
43
  # Initialize configuration
44
  config = Config(
45
  device=DeviceConfig(device=device),
46
+ tokenizer=CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2-1")
47
  )
48
 
49
  # Load models
50
  config.models = model_loader.load_models(str(model_file), device)
51
 
52
  MAX_SEED = np.iinfo(np.int32).max
53
+ MAX_IMAGE_SIZE = 1280
54
+ DEFAULT_WIDTH = 768
55
+ DEFAULT_HEIGHT = 768
56
 
57
  def txt2img(
58
  prompt,
 
186
  return None, seed
187
 
188
  examples = [
189
+ "A stunning 4K hyperrealistic photograph of a futuristic cityscape at night, with towering skyscrapers, flying vehicles, and holographic advertisements reflecting in the water below",
190
+ "An intricate fantasy landscape with crystal mountains, bioluminescent plants, and a magical waterfall under twin moons, highly detailed 8K rendering",
191
+ "Professional portrait photograph of a cyberpunk character with detailed neon implants and holographic tattoos, studio lighting, shallow depth of field, shot on Sony A7R IV",
192
  ]
193
 
194
  css = """
 
207
  color: #666;
208
  margin-top: 20px;
209
  }
210
+
211
+ .model-info {
212
+ font-size: 0.9em;
213
+ padding: 10px;
214
+ background-color: #f0f9ff;
215
+ border-radius: 5px;
216
+ margin: 10px 0;
217
+ border-left: 3px solid #0ea5e9;
218
+ }
219
  """
220
 
221
  with gr.Blocks(css=css) as demo:
222
  with gr.Column(elem_id="col-container"):
223
+ gr.Markdown(" # LiteDiffusion 2.1: High-Res Text-to-Image Magic ✨")
224
+
225
+ gr.Markdown(
226
+ """<div class="model-info">
227
+ 📈 <b>Upgraded to Stable Diffusion 2.1!</b> Now featuring:
228
+ <ul>
229
+ <li>Higher resolution outputs (768×768)</li>
230
+ <li>Improved image quality and detail</li>
231
+ <li>Better prompt understanding</li>
232
+ <li>Enhanced inpainting capabilities</li>
233
+ </ul>
234
+ </div>""",
235
+ elem_classes="model-info"
236
+ )
237
 
238
  with gr.Tabs(elem_classes="tabs") as tabs:
239
  with gr.TabItem("Text-to-Image"):
 
311
  minimum=256,
312
  maximum=MAX_IMAGE_SIZE,
313
  step=32,
314
+ value=DEFAULT_WIDTH,
315
  )
316
 
317
  height = gr.Slider(
 
319
  minimum=256,
320
  maximum=MAX_IMAGE_SIZE,
321
  step=32,
322
+ value=DEFAULT_HEIGHT,
323
  )
324
 
325
  with gr.Row():
src/config.py CHANGED
@@ -4,16 +4,16 @@ import torch
4
 
5
  @dataclass
6
  class ModelConfig:
7
- # Image dimensions
8
- width: int = 512
9
- height: int = 512
10
- latents_width: int = 64 # width // 8
11
- latents_height: int = 64 # height // 8
12
 
13
  # Model architecture parameters
14
  n_embd: int = 1280
15
  n_head: int = 8
16
- d_context: int = 768
17
 
18
  # UNet parameters
19
  n_time: int = 1280
 
4
 
5
  @dataclass
6
  class ModelConfig:
7
+ # Image dimensions (updated for SD 2.1)
8
+ width: int = 768
9
+ height: int = 768
10
+ latents_width: int = 96 # width // 8
11
+ latents_height: int = 96 # height // 8
12
 
13
  # Model architecture parameters
14
  n_embd: int = 1280
15
  n_head: int = 8
16
+ d_context: int = 1024 # Updated for SD 2.1 OpenCLIP
17
 
18
  # UNet parameters
19
  n_time: int = 1280