Spaces:
Running
on
Zero
Running
on
Zero
tori29umai
commited on
Commit
•
fae7c5b
1
Parent(s):
ce7967b
Update app.py
Browse files
app.py
CHANGED
@@ -287,7 +287,15 @@ class LlamaAdapter:
|
|
287 |
top_k=top_k,
|
288 |
repeat_penalty=repeat_penalty
|
289 |
)
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
class CharacterMaker:
|
292 |
def __init__(self):
|
293 |
self.llama = None
|
@@ -299,7 +307,6 @@ class CharacterMaker:
|
|
299 |
self.model_lock = threading.Lock()
|
300 |
self.use_chat_format = False
|
301 |
|
302 |
-
@spaces.GPU(duration=120)
|
303 |
def load_model(self, model_type):
|
304 |
with self.model_lock:
|
305 |
if self.current_model == model_type:
|
@@ -313,10 +320,9 @@ class CharacterMaker:
|
|
313 |
try:
|
314 |
model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
|
315 |
n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
|
316 |
-
self.llama =
|
317 |
self.current_model = model_type
|
318 |
self.model_loaded.set()
|
319 |
-
print(f"{model_type} モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
320 |
except Exception as e:
|
321 |
print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
|
322 |
self.model_loaded.set()
|
@@ -397,20 +403,16 @@ class CharacterMaker:
|
|
397 |
|
398 |
def make_prompt(self, input_str: str):
|
399 |
prompt_template = """{{chat_author_description}}
|
400 |
-
|
401 |
{{chat_instructions}}
|
402 |
-
|
403 |
・キャラクターの回答例
|
404 |
{% for qa in example_qa %}
|
405 |
{{qa}}
|
406 |
{% endfor %}
|
407 |
-
|
408 |
・会話履歴
|
409 |
{% for history in histories %}
|
410 |
user: {{history.user}}
|
411 |
assistant: {{history.assistant}}
|
412 |
{% endfor %}
|
413 |
-
|
414 |
user: {{input_str}}
|
415 |
assistant:"""
|
416 |
|
|
|
287 |
top_k=top_k,
|
288 |
repeat_penalty=repeat_penalty
|
289 |
)
|
290 |
+
|
291 |
+
|
292 |
+
@spaces.GPU(duration=120)
|
293 |
+
def load_model_gpu(model_type, model_path, n_gpu_layers, params):
|
294 |
+
llama = LlamaAdapter(model_path, params, n_gpu_layers)
|
295 |
+
print(f"{model_type} モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
296 |
+
return llama
|
297 |
+
|
298 |
+
|
299 |
class CharacterMaker:
|
300 |
def __init__(self):
|
301 |
self.llama = None
|
|
|
307 |
self.model_lock = threading.Lock()
|
308 |
self.use_chat_format = False
|
309 |
|
|
|
310 |
def load_model(self, model_type):
|
311 |
with self.model_lock:
|
312 |
if self.current_model == model_type:
|
|
|
320 |
try:
|
321 |
model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
|
322 |
n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
|
323 |
+
self.llama = load_model_gpu(model_type, model_path, n_gpu_layers, params)
|
324 |
self.current_model = model_type
|
325 |
self.model_loaded.set()
|
|
|
326 |
except Exception as e:
|
327 |
print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
|
328 |
self.model_loaded.set()
|
|
|
403 |
|
404 |
def make_prompt(self, input_str: str):
|
405 |
prompt_template = """{{chat_author_description}}
|
|
|
406 |
{{chat_instructions}}
|
|
|
407 |
・キャラクターの回答例
|
408 |
{% for qa in example_qa %}
|
409 |
{{qa}}
|
410 |
{% endfor %}
|
|
|
411 |
・会話履歴
|
412 |
{% for history in histories %}
|
413 |
user: {{history.user}}
|
414 |
assistant: {{history.assistant}}
|
415 |
{% endfor %}
|
|
|
416 |
user: {{input_str}}
|
417 |
assistant:"""
|
418 |
|