MohamedRashad commited on
Commit
e769375
·
verified ·
1 Parent(s): 9e39b36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -10,7 +10,7 @@ from threading import Thread
10
  # Load model directly
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.getenv("HF_TOKEN"))
13
- model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
14
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
15
 
16
  def respond(
 
10
  # Load model directly
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.getenv("HF_TOKEN"))
13
+ model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, token=os.getenv("HF_TOKEN")).to(device)
14
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
15
 
16
  def respond(