Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import torch
|
3 |
from threading import Thread
|
4 |
from typing import Iterator
|
5 |
|
@@ -10,18 +9,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
10 |
# Konfigurationsparameter
|
11 |
MAX_MAX_NEW_TOKENS = 100
|
12 |
DEFAULT_MAX_NEW_TOKENS = 20
|
13 |
-
MAX_INPUT_TOKEN_LENGTH =
|
14 |
|
15 |
# Modell und Tokenizer laden
|
16 |
model_id = "Loewolf/GPT_1"
|
17 |
-
|
18 |
-
|
19 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
20 |
-
else:
|
21 |
-
raise EnvironmentError("CUDA ist nicht verfügbar. Dieses Skript benötigt eine GPU.")
|
22 |
|
23 |
# Gradio Chat Interface Funktion
|
24 |
-
@spaces.GPU
|
25 |
def generate(
|
26 |
message: str,
|
27 |
chat_history: list[tuple[str, str]],
|
@@ -31,7 +26,7 @@ def generate(
|
|
31 |
top_p: float = 0.9,
|
32 |
top_k: int = 50,
|
33 |
repetition_penalty: float = 1.2,
|
34 |
-
) ->
|
35 |
conversation = []
|
36 |
if system_prompt:
|
37 |
conversation.append({"role": "system", "content": system_prompt})
|
@@ -40,11 +35,9 @@ def generate(
|
|
40 |
conversation.append({"role": "user", "content": message})
|
41 |
|
42 |
input_ids = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH)
|
43 |
-
input_ids = input_ids.to(model.device)
|
44 |
-
|
45 |
generate_kwargs = dict(
|
46 |
-
input_ids=input_ids,
|
47 |
-
|
48 |
temperature=temperature,
|
49 |
top_p=top_p,
|
50 |
top_k=top_k,
|
|
|
1 |
import os
|
|
|
2 |
from threading import Thread
|
3 |
from typing import Iterator
|
4 |
|
|
|
9 |
# Konfigurationsparameter
|
10 |
MAX_MAX_NEW_TOKENS = 100
|
11 |
DEFAULT_MAX_NEW_TOKENS = 20
|
12 |
+
MAX_INPUT_TOKEN_LENGTH = 400 # Begrenzung auf 400 Tokens
|
13 |
|
14 |
# Modell und Tokenizer laden
|
15 |
model_id = "Loewolf/GPT_1"
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
18 |
|
19 |
# Gradio Chat Interface Funktion
|
|
|
20 |
def generate(
|
21 |
message: str,
|
22 |
chat_history: list[tuple[str, str]],
|
|
|
26 |
top_p: float = 0.9,
|
27 |
top_k: int = 50,
|
28 |
repetition_penalty: float = 1.2,
|
29 |
+
) -> str:
|
30 |
conversation = []
|
31 |
if system_prompt:
|
32 |
conversation.append({"role": "system", "content": system_prompt})
|
|
|
35 |
conversation.append({"role": "user", "content": message})
|
36 |
|
37 |
input_ids = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH)
|
|
|
|
|
38 |
generate_kwargs = dict(
|
39 |
+
input_ids=input_ids["input_ids"],
|
40 |
+
max_length=input_ids["input_ids"].shape[1] + max_new_tokens,
|
41 |
temperature=temperature,
|
42 |
top_p=top_p,
|
43 |
top_k=top_k,
|