Loewolf commited on
Commit
06ea162
·
1 Parent(s): 57579dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import torch
3
  from threading import Thread
4
  from typing import Iterator
5
 
@@ -10,18 +9,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
10
  # Konfigurationsparameter
11
  MAX_MAX_NEW_TOKENS = 100
12
  DEFAULT_MAX_NEW_TOKENS = 20
13
- MAX_INPUT_TOKEN_LENGTH = 200 # Anpassung auf 400 Tokens
14
 
15
  # Modell und Tokenizer laden
16
  model_id = "Loewolf/GPT_1"
17
- if torch.cuda.is_available():
18
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
19
- tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- else:
21
- raise EnvironmentError("CUDA ist nicht verfügbar. Dieses Skript benötigt eine GPU.")
22
 
23
  # Gradio Chat Interface Funktion
24
- @spaces.GPU
25
  def generate(
26
  message: str,
27
  chat_history: list[tuple[str, str]],
@@ -31,7 +26,7 @@ def generate(
31
  top_p: float = 0.9,
32
  top_k: int = 50,
33
  repetition_penalty: float = 1.2,
34
- ) -> Iterator[str]:
35
  conversation = []
36
  if system_prompt:
37
  conversation.append({"role": "system", "content": system_prompt})
@@ -40,11 +35,9 @@ def generate(
40
  conversation.append({"role": "user", "content": message})
41
 
42
  input_ids = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH)
43
- input_ids = input_ids.to(model.device)
44
-
45
  generate_kwargs = dict(
46
- input_ids=input_ids,
47
- max_new_tokens=min(max_new_tokens, MAX_MAX_NEW_TOKENS),
48
  temperature=temperature,
49
  top_p=top_p,
50
  top_k=top_k,
 
1
  import os
 
2
  from threading import Thread
3
  from typing import Iterator
4
 
 
9
  # Konfigurationsparameter
10
  MAX_MAX_NEW_TOKENS = 100
11
  DEFAULT_MAX_NEW_TOKENS = 20
12
+ MAX_INPUT_TOKEN_LENGTH = 400 # Begrenzung auf 400 Tokens
13
 
14
  # Modell und Tokenizer laden
15
  model_id = "Loewolf/GPT_1"
16
+ model = AutoModelForCausalLM.from_pretrained(model_id)
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
18
 
19
  # Gradio Chat Interface Funktion
 
20
  def generate(
21
  message: str,
22
  chat_history: list[tuple[str, str]],
 
26
  top_p: float = 0.9,
27
  top_k: int = 50,
28
  repetition_penalty: float = 1.2,
29
+ ) -> str:
30
  conversation = []
31
  if system_prompt:
32
  conversation.append({"role": "system", "content": system_prompt})
 
35
  conversation.append({"role": "user", "content": message})
36
 
37
  input_ids = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH)
 
 
38
  generate_kwargs = dict(
39
+ input_ids=input_ids["input_ids"],
40
+ max_length=input_ids["input_ids"].shape[1] + max_new_tokens,
41
  temperature=temperature,
42
  top_p=top_p,
43
  top_k=top_k,