autoguidance-playground

Sleeping

Azazelle commited on Aug 21, 2024

Commit

ec52c4d

verified ·

1 Parent(s): 215da49

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,13 +6,13 @@ import torch
 print(torch.__version__)
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
-MODEL_BIG = "HuggingFaceTB/SmolLM-1.7B-Instruct"
-MODEL_SMALL = "HuggingFaceTB/SmolLM-360M-Instruct"
 TITLE = "<h1><center>Auto-Guidance Playground</center></h1>"
 SUB_TITLE = """<center>Auto-guidance was a technique made by NVIDIA for text-conditioned image models. This is a test of the concept with SmolLM.</center>"""
@@ -34,20 +34,21 @@ END_MESSAGE = """
 **The conversation has reached to its end, please press "Clear" to restart a new conversation**
 """
-device = "cpu" # for GPU usage or "cpu" for CPU usage
 tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
 model_big = AutoModelForCausalLM.from_pretrained(
     MODEL_BIG,
-    torch_dtype=torch.bfloat16,
-).to(device)
 model_small = AutoModelForCausalLM.from_pretrained(
     MODEL_SMALL,
-    torch_dtype=torch.bfloat16,
-).to(device)
-if device == "cuda":
-    model = torch.compile(model)
 @spaces.GPU
 def stream_chat(
@@ -84,7 +85,6 @@ def stream_chat(
             logits_big = model_big(current_input).logits[:, -1, :]
         probs_small = torch.softmax(logits_small / temperature, dim=-1)
-        probs_big = torch.softmax(logits_big / temperature, dim=-1)
         interpolated_logits = logits_big + (guidance_scale - 1) * (logits_big - logits_small) * probs_small

 print(torch.__version__)
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
+import bitsandbytes as bnb
+MODEL_BIG = "HuggingFaceTB/SmolLM-360M-Instruct"
+MODEL_SMALL = "HuggingFaceTB/SmolLM-135M-Instruct"
 TITLE = "<h1><center>Auto-Guidance Playground</center></h1>"
 SUB_TITLE = """<center>Auto-guidance was a technique made by NVIDIA for text-conditioned image models. This is a test of the concept with SmolLM.</center>"""
 **The conversation has reached to its end, please press "Clear" to restart a new conversation**
 """
 tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
 model_big = AutoModelForCausalLM.from_pretrained(
     MODEL_BIG,
+    load_in_8bit=True,
+    device_map="auto")
 model_small = AutoModelForCausalLM.from_pretrained(
     MODEL_SMALL,
+    load_in_8bit=True,
+    device_map="auto")
+if model_big.device == "cuda":
+    model_big = torch.compile(model_big)
+if model_small.device == "cuda":
+    model_small = torch.compile(model_small)
 @spaces.GPU
 def stream_chat(
             logits_big = model_big(current_input).logits[:, -1, :]
         probs_small = torch.softmax(logits_small / temperature, dim=-1)
         interpolated_logits = logits_big + (guidance_scale - 1) * (logits_big - logits_small) * probs_small