Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,8 +10,8 @@ import spaces
|
|
10 |
|
11 |
# Load model directly
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Yehia-7B", token=os.getenv("HF_TOKEN"))
|
14 |
-
model = AutoModelForCausalLM.from_pretrained("Navid-AI/Yehia-7B", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
|
15 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
16 |
|
17 |
@spaces.GPU
|
|
|
10 |
|
11 |
# Load model directly
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Yehia-7B-preview", token=os.getenv("HF_TOKEN"))
|
14 |
+
model = AutoModelForCausalLM.from_pretrained("Navid-AI/Yehia-7B-preview", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
|
15 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
16 |
|
17 |
@spaces.GPU
|