Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ DEFAULT_MAX_NEW_TOKENS = 1024
|
|
18 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
19 |
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
-
model_id = "meta-llama/Llama-3.
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
23 |
model = AutoModelForCausalLM.from_pretrained(
|
24 |
model_id,
|
@@ -26,6 +26,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
26 |
torch_dtype=torch.float16,
|
27 |
load_in_8bit=True,
|
28 |
)
|
|
|
29 |
model.eval()
|
30 |
@spaces.GPU(duration=90)
|
31 |
def generate(
|
@@ -36,7 +37,7 @@ def generate(
|
|
36 |
top_k: int = 50,
|
37 |
repetition_penalty: float = 1.2,
|
38 |
) -> Iterator[str]:
|
39 |
-
prompt = f"
|
40 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
41 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
42 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
|
18 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
19 |
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
+
model_id = "meta-llama/Llama-3.2-1B"
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
23 |
model = AutoModelForCausalLM.from_pretrained(
|
24 |
model_id,
|
|
|
26 |
torch_dtype=torch.float16,
|
27 |
load_in_8bit=True,
|
28 |
)
|
29 |
+
|
30 |
model.eval()
|
31 |
@spaces.GPU(duration=90)
|
32 |
def generate(
|
|
|
37 |
top_k: int = 50,
|
38 |
repetition_penalty: float = 1.2,
|
39 |
) -> Iterator[str]:
|
40 |
+
prompt = f"{message}"
|
41 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
42 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
43 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|