Spaces:
Runtime error
Runtime error
ari9dam
commited on
Commit
Β·
5eb35d4
1
Parent(s):
bc393d0
gpu 80bit inference
Browse files- app.py +8 -5
- requirements.txt +4 -2
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
from threading import Thread
|
3 |
from typing import Iterator
|
4 |
-
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
import transformers
|
@@ -11,8 +11,9 @@ MAX_MAX_NEW_TOKENS = 2048
|
|
11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
13 |
|
|
|
14 |
model_id = "microsoft/Orca-2-13b"
|
15 |
-
model = transformers.AutoModelForCausalLM.from_pretrained(model_id)
|
16 |
|
17 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, use_fast=False)
|
18 |
|
@@ -21,15 +22,17 @@ user_message = "How can you determine if a restaurant is popular among locals or
|
|
21 |
|
22 |
DESCRIPTION = """
|
23 |
# Orca-2 13B
|
24 |
-
This Space demonstrates model [Orca-2-13B](https://huggingface.co/microsoft/Orca-2-13B) by Microsoft, a Llama 2
|
25 |
|
26 |
The system message is set to be the cautious system message:
|
27 |
You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.
|
28 |
-
Feel free to modify it in the additional input section.
|
29 |
|
30 |
π For more details about the Orca family of models take a look [at our blog post](https://msft.it/6042iGtzK).
|
31 |
π¨ Looking for lighter versions of Orca-2? π Check out the [7B Chat model](https://huggingface.co/spaces/huggingface-projects/Orca-2-7b). Note: Orca 2 is licensed under the [Microsoft Research License](LICENSE). Llama 2 is licensed under the [LLAMA 2 Community License](https://ai.meta.com/llama/license/).
|
32 |
"""
|
|
|
|
|
33 |
|
34 |
# Function to combine system message and user
|
35 |
def to_prompt(conversations):
|
@@ -43,7 +46,7 @@ def to_prompt(conversations):
|
|
43 |
inputs = tokenizer(prompt, return_tensors='pt').input_ids
|
44 |
return inputs
|
45 |
|
46 |
-
|
47 |
def generate(
|
48 |
message: str,
|
49 |
chat_history: list[tuple[str, str]],
|
|
|
1 |
import os
|
2 |
from threading import Thread
|
3 |
from typing import Iterator
|
4 |
+
import spaces
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
import transformers
|
|
|
11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
13 |
|
14 |
+
|
15 |
model_id = "microsoft/Orca-2-13b"
|
16 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
|
17 |
|
18 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, use_fast=False)
|
19 |
|
|
|
22 |
|
23 |
DESCRIPTION = """
|
24 |
# Orca-2 13B
|
25 |
+
This Space demonstrates model [Orca-2-13B](https://huggingface.co/microsoft/Orca-2-13B) by Microsoft, a Llama 2 derivative with 13B parameters fine-tuned for sigle turn instructions. This space is <b>running 8-bit inference with greedy decoding</b>.
|
26 |
|
27 |
The system message is set to be the cautious system message:
|
28 |
You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.
|
29 |
+
Feel free to modify it in the additional input section.
|
30 |
|
31 |
π For more details about the Orca family of models take a look [at our blog post](https://msft.it/6042iGtzK).
|
32 |
π¨ Looking for lighter versions of Orca-2? π Check out the [7B Chat model](https://huggingface.co/spaces/huggingface-projects/Orca-2-7b). Note: Orca 2 is licensed under the [Microsoft Research License](LICENSE). Llama 2 is licensed under the [LLAMA 2 Community License](https://ai.meta.com/llama/license/).
|
33 |
"""
|
34 |
+
if not torch.cuda.is_available():
|
35 |
+
DESCRIPTION += "\n<p>Running on CPU π₯Ά This demo does not work on CPU.</p>"
|
36 |
|
37 |
# Function to combine system message and user
|
38 |
def to_prompt(conversations):
|
|
|
46 |
inputs = tokenizer(prompt, return_tensors='pt').input_ids
|
47 |
return inputs
|
48 |
|
49 |
+
@spaces.GPU
|
50 |
def generate(
|
51 |
message: str,
|
52 |
chat_history: list[tuple[str, str]],
|
requirements.txt
CHANGED
@@ -4,6 +4,7 @@ altair==5.1.2
|
|
4 |
annotated-types==0.6.0
|
5 |
anyio==3.7.1
|
6 |
attrs==23.1.0
|
|
|
7 |
certifi==2023.11.17
|
8 |
charset-normalizer==3.3.2
|
9 |
click==8.1.7
|
@@ -56,6 +57,7 @@ requests==2.31.0
|
|
56 |
rich==13.7.0
|
57 |
rpds-py==0.13.1
|
58 |
safetensors==0.4.0
|
|
|
59 |
semantic-version==2.10.0
|
60 |
sentencepiece==0.1.99
|
61 |
shellingham==1.5.4
|
@@ -67,7 +69,7 @@ sympy==1.12
|
|
67 |
tokenizers==0.13.3
|
68 |
tomlkit==0.12.0
|
69 |
toolz==0.12.0
|
70 |
-
torch
|
71 |
tqdm==4.66.1
|
72 |
transformers==4.33.1
|
73 |
triton==2.1.0
|
@@ -77,4 +79,4 @@ tzdata==2023.3
|
|
77 |
urllib3==2.1.0
|
78 |
uvicorn==0.24.0.post1
|
79 |
websockets==11.0.3
|
80 |
-
zipp==3.17.0
|
|
|
4 |
annotated-types==0.6.0
|
5 |
anyio==3.7.1
|
6 |
attrs==23.1.0
|
7 |
+
bitsandbytes==0.41.1
|
8 |
certifi==2023.11.17
|
9 |
charset-normalizer==3.3.2
|
10 |
click==8.1.7
|
|
|
57 |
rich==13.7.0
|
58 |
rpds-py==0.13.1
|
59 |
safetensors==0.4.0
|
60 |
+
scipy==1.11.4
|
61 |
semantic-version==2.10.0
|
62 |
sentencepiece==0.1.99
|
63 |
shellingham==1.5.4
|
|
|
69 |
tokenizers==0.13.3
|
70 |
tomlkit==0.12.0
|
71 |
toolz==0.12.0
|
72 |
+
torch --index-url https://download.pytorch.org/whl/cu118
|
73 |
tqdm==4.66.1
|
74 |
transformers==4.33.1
|
75 |
triton==2.1.0
|
|
|
79 |
urllib3==2.1.0
|
80 |
uvicorn==0.24.0.post1
|
81 |
websockets==11.0.3
|
82 |
+
zipp==3.17.0
|