Spaces:

torinriley
/

Diffusion

Running

App Files Files Community

torinriley commited on Mar 20

Commit

4dab9fb

1 Parent(s): b7b4c25

maybe

Browse files

Files changed (2) hide show

app.py +34 -10
src/config.py +6 -6

app.py CHANGED Viewed

@@ -21,8 +21,8 @@ data_dir = Path("data")
 data_dir.mkdir(exist_ok=True)
 # Model configuration
-MODEL_REPO = "stable-diffusion-v1-5/stable-diffusion-v1-5"
-MODEL_FILENAME = "v1-5-pruned-emaonly.ckpt"
 model_file = data_dir / MODEL_FILENAME
 # Download model if it doesn't exist
@@ -43,14 +43,16 @@ print(f"Using device: {device}")
 # Initialize configuration
 config = Config(
     device=DeviceConfig(device=device),
-    tokenizer=CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 )
 # Load models
 config.models = model_loader.load_models(str(model_file), device)
 MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
 def txt2img(
     prompt,
@@ -184,9 +186,9 @@ def inpaint(
         return None, seed
 examples = [
-    "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
-    "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
-    "A detailed portrait of a cyberpunk character with glowing neon implants and holographic tattoos",
 ]
 css = """
@@ -205,11 +207,33 @@ css = """
     color: #666;
     margin-top: 20px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # LiteDiffusion")
         with gr.Tabs(elem_classes="tabs") as tabs:
             with gr.TabItem("Text-to-Image"):
@@ -287,7 +311,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=512,
                 )
                 height = gr.Slider(
@@ -295,7 +319,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=512,
                 )
             with gr.Row():

 data_dir.mkdir(exist_ok=True)
 # Model configuration
+MODEL_REPO = "stabilityai/stable-diffusion-2-1"
+MODEL_FILENAME = "v2-1_768-ema-pruned.ckpt"
 model_file = data_dir / MODEL_FILENAME
 # Download model if it doesn't exist
 # Initialize configuration
 config = Config(
     device=DeviceConfig(device=device),
+    tokenizer=CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2-1")
 )
 # Load models
 config.models = model_loader.load_models(str(model_file), device)
 MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1280
+DEFAULT_WIDTH = 768
+DEFAULT_HEIGHT = 768
 def txt2img(
     prompt,
         return None, seed
 examples = [
+    "A stunning 4K hyperrealistic photograph of a futuristic cityscape at night, with towering skyscrapers, flying vehicles, and holographic advertisements reflecting in the water below",
+    "An intricate fantasy landscape with crystal mountains, bioluminescent plants, and a magical waterfall under twin moons, highly detailed 8K rendering",
+    "Professional portrait photograph of a cyberpunk character with detailed neon implants and holographic tattoos, studio lighting, shallow depth of field, shot on Sony A7R IV",
 ]
 css = """
     color: #666;
     margin-top: 20px;
 }
+.model-info {
+    font-size: 0.9em;
+    padding: 10px;
+    background-color: #f0f9ff;
+    border-radius: 5px;
+    margin: 10px 0;
+    border-left: 3px solid #0ea5e9;
+}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # ✨ LiteDiffusion 2.1: High-Res Text-to-Image Magic ✨")
+        gr.Markdown(
+            """<div class="model-info">
+            📈 <b>Upgraded to Stable Diffusion 2.1!</b> Now featuring:
+            <ul>
+                <li>Higher resolution outputs (768×768)</li>
+                <li>Improved image quality and detail</li>
+                <li>Better prompt understanding</li>
+                <li>Enhanced inpainting capabilities</li>
+            </ul>
+            </div>""",
+            elem_classes="model-info"
+        )
         with gr.Tabs(elem_classes="tabs") as tabs:
             with gr.TabItem("Text-to-Image"):
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=DEFAULT_WIDTH,
                 )
                 height = gr.Slider(
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=DEFAULT_HEIGHT,
                 )
             with gr.Row():

src/config.py CHANGED Viewed

@@ -4,16 +4,16 @@ import torch
 @dataclass
 class ModelConfig:
-    # Image dimensions
-    width: int = 512
-    height: int = 512
-    latents_width: int = 64  # width // 8
-    latents_height: int = 64  # height // 8
     # Model architecture parameters
     n_embd: int = 1280
     n_head: int = 8
-    d_context: int = 768
     # UNet parameters
     n_time: int = 1280

 @dataclass
 class ModelConfig:
+    # Image dimensions (updated for SD 2.1)
+    width: int = 768
+    height: int = 768
+    latents_width: int = 96  # width // 8
+    latents_height: int = 96  # height // 8
     # Model architecture parameters
     n_embd: int = 1280
     n_head: int = 8
+    d_context: int = 1024  # Updated for SD 2.1 OpenCLIP
     # UNet parameters
     n_time: int = 1280