MohamedRashad commited on
Commit
5e94e7f
·
verified ·
1 Parent(s): 675a4cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
  import torch
8
  from threading import Thread
9
-
10
 
11
  # Load model directly
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -14,6 +14,7 @@ tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.get
14
  model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
15
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
16
 
 
17
  def respond(
18
  message,
19
  history: list[tuple[str, str]],
 
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
  import torch
8
  from threading import Thread
9
+ import spaces
10
 
11
  # Load model directly
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
14
  model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
15
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
16
 
17
+ @spaces.GPU
18
  def respond(
19
  message,
20
  history: list[tuple[str, str]],