Azazelle commited on
Commit
ec52c4d
·
verified ·
1 Parent(s): 215da49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -6,13 +6,13 @@ import torch
6
 
7
  print(torch.__version__)
8
 
9
- import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
  import gradio as gr
12
  from threading import Thread
 
13
 
14
- MODEL_BIG = "HuggingFaceTB/SmolLM-1.7B-Instruct"
15
- MODEL_SMALL = "HuggingFaceTB/SmolLM-360M-Instruct"
16
 
17
  TITLE = "<h1><center>Auto-Guidance Playground</center></h1>"
18
  SUB_TITLE = """<center>Auto-guidance was a technique made by NVIDIA for text-conditioned image models. This is a test of the concept with SmolLM.</center>"""
@@ -34,20 +34,21 @@ END_MESSAGE = """
34
  **The conversation has reached to its end, please press "Clear" to restart a new conversation**
35
  """
36
 
37
- device = "cpu" # for GPU usage or "cpu" for CPU usage
38
-
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
40
  model_big = AutoModelForCausalLM.from_pretrained(
41
  MODEL_BIG,
42
- torch_dtype=torch.bfloat16,
43
- ).to(device)
44
  model_small = AutoModelForCausalLM.from_pretrained(
45
  MODEL_SMALL,
46
- torch_dtype=torch.bfloat16,
47
- ).to(device)
 
 
 
48
 
49
- if device == "cuda":
50
- model = torch.compile(model)
51
 
52
  @spaces.GPU
53
  def stream_chat(
@@ -84,7 +85,6 @@ def stream_chat(
84
  logits_big = model_big(current_input).logits[:, -1, :]
85
 
86
  probs_small = torch.softmax(logits_small / temperature, dim=-1)
87
- probs_big = torch.softmax(logits_big / temperature, dim=-1)
88
 
89
  interpolated_logits = logits_big + (guidance_scale - 1) * (logits_big - logits_small) * probs_small
90
 
 
6
 
7
  print(torch.__version__)
8
 
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
10
  import gradio as gr
11
  from threading import Thread
12
+ import bitsandbytes as bnb
13
 
14
+ MODEL_BIG = "HuggingFaceTB/SmolLM-360M-Instruct"
15
+ MODEL_SMALL = "HuggingFaceTB/SmolLM-135M-Instruct"
16
 
17
  TITLE = "<h1><center>Auto-Guidance Playground</center></h1>"
18
  SUB_TITLE = """<center>Auto-guidance was a technique made by NVIDIA for text-conditioned image models. This is a test of the concept with SmolLM.</center>"""
 
34
  **The conversation has reached to its end, please press "Clear" to restart a new conversation**
35
  """
36
 
 
 
37
  tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
38
  model_big = AutoModelForCausalLM.from_pretrained(
39
  MODEL_BIG,
40
+ load_in_8bit=True,
41
+ device_map="auto")
42
  model_small = AutoModelForCausalLM.from_pretrained(
43
  MODEL_SMALL,
44
+ load_in_8bit=True,
45
+ device_map="auto")
46
+
47
+ if model_big.device == "cuda":
48
+ model_big = torch.compile(model_big)
49
 
50
+ if model_small.device == "cuda":
51
+ model_small = torch.compile(model_small)
52
 
53
  @spaces.GPU
54
  def stream_chat(
 
85
  logits_big = model_big(current_input).logits[:, -1, :]
86
 
87
  probs_small = torch.softmax(logits_small / temperature, dim=-1)
 
88
 
89
  interpolated_logits = logits_big + (guidance_scale - 1) * (logits_big - logits_small) * probs_small
90