Spaces:

torinriley
/

Diffusion

Running

App Files Files Community

torinriley commited on Mar 20

Commit

74045df

1 Parent(s): 2da4017

updtae

Browse files

Files changed (2) hide show

app.py +11 -35
src/config.py +6 -6

app.py CHANGED Viewed

@@ -21,8 +21,8 @@ data_dir = Path("data")
 data_dir.mkdir(exist_ok=True)
 # Model configuration
-MODEL_REPO = "stabilityai/stable-diffusion-2-1"
-MODEL_FILENAME = "v2-1_768-ema-pruned.ckpt"
 model_file = data_dir / MODEL_FILENAME
 # Download model if it doesn't exist
@@ -40,19 +40,17 @@ if not model_file.exists():
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-# Initialize configuration - use the correct tokenizer for SD 2.1
 config = Config(
     device=DeviceConfig(device=device),
-    tokenizer=CLIPTokenizer.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
 )
 # Load models
 config.models = model_loader.load_models(str(model_file), device)
 MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1280
-DEFAULT_WIDTH = 768
-DEFAULT_HEIGHT = 768
 def txt2img(
     prompt,
@@ -186,9 +184,9 @@ def inpaint(
         return None, seed
 examples = [
-    "A stunning 4K hyperrealistic photograph of a futuristic cityscape at night, with towering skyscrapers, flying vehicles, and holographic advertisements reflecting in the water below",
-    "An intricate fantasy landscape with crystal mountains, bioluminescent plants, and a magical waterfall under twin moons, highly detailed 8K rendering",
-    "Professional portrait photograph of a cyberpunk character with detailed neon implants and holographic tattoos, studio lighting, shallow depth of field, shot on Sony A7R IV",
 ]
 css = """
@@ -207,33 +205,11 @@ css = """
     color: #666;
     margin-top: 20px;
 }
-.model-info {
-    font-size: 0.9em;
-    padding: 10px;
-    background-color: #f0f9ff;
-    border-radius: 5px;
-    margin: 10px 0;
-    border-left: 3px solid #0ea5e9;
-}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # ✨ LiteDiffusion 2.1: High-Res Text-to-Image Magic ✨")
-        gr.Markdown(
-            """<div class="model-info">
-            📈 <b>Upgraded to Stable Diffusion 2.1!</b> Now featuring:
-            <ul>
-                <li>Higher resolution outputs (768×768)</li>
-                <li>Improved image quality and detail</li>
-                <li>Better prompt understanding</li>
-                <li>Enhanced inpainting capabilities</li>
-            </ul>
-            </div>""",
-            elem_classes="model-info"
-        )
         with gr.Tabs(elem_classes="tabs") as tabs:
             with gr.TabItem("Text-to-Image"):
@@ -311,7 +287,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=DEFAULT_WIDTH,
                 )
                 height = gr.Slider(
@@ -319,7 +295,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=DEFAULT_HEIGHT,
                 )
             with gr.Row():

 data_dir.mkdir(exist_ok=True)
 # Model configuration
+MODEL_REPO = "stable-diffusion-v1-5/stable-diffusion-v1-5"
+MODEL_FILENAME = "v1-5-pruned-emaonly.ckpt"
 model_file = data_dir / MODEL_FILENAME
 # Download model if it doesn't exist
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+# Initialize configuration
 config = Config(
     device=DeviceConfig(device=device),
+    tokenizer=CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 )
 # Load models
 config.models = model_loader.load_models(str(model_file), device)
 MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
 def txt2img(
     prompt,
         return None, seed
 examples = [
+    "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
+    "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
+    "A detailed portrait of a cyberpunk character with glowing neon implants and holographic tattoos",
 ]
 css = """
     color: #666;
     margin-top: 20px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # LiteDiffusion")
         with gr.Tabs(elem_classes="tabs") as tabs:
             with gr.TabItem("Text-to-Image"):
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=512,
                 )
                 height = gr.Slider(
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=512,
                 )
             with gr.Row():

src/config.py CHANGED Viewed

@@ -4,16 +4,16 @@ import torch
 @dataclass
 class ModelConfig:
-    # Image dimensions (updated for SD 2.1)
-    width: int = 768
-    height: int = 768
-    latents_width: int = 96  # width // 8
-    latents_height: int = 96  # height // 8
     # Model architecture parameters
     n_embd: int = 1280
     n_head: int = 8
-    d_context: int = 1024  # Updated for SD 2.1 OpenCLIP
     # UNet parameters
     n_time: int = 1280

 @dataclass
 class ModelConfig:
+    # Image dimensions
+    width: int = 512
+    height: int = 512
+    latents_width: int = 64  # width // 8
+    latents_height: int = 64  # height // 8
     # Model architecture parameters
     n_embd: int = 1280
     n_head: int = 8
+    d_context: int = 768
     # UNet parameters
     n_time: int = 1280