AINovelChat

Running on Zero

App Files Files Community

tori29umai commited on Aug 19

Commit

fae7c5b

•

1 Parent(s): ce7967b

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -8

app.py CHANGED Viewed

@@ -287,7 +287,15 @@ class LlamaAdapter:
             top_k=top_k,
             repeat_penalty=repeat_penalty
         )
 class CharacterMaker:
     def __init__(self):
         self.llama = None
@@ -299,7 +307,6 @@ class CharacterMaker:
         self.model_lock = threading.Lock()
         self.use_chat_format = False
-    @spaces.GPU(duration=120)
     def load_model(self, model_type):
         with self.model_lock:
             if self.current_model == model_type:
@@ -313,10 +320,9 @@ class CharacterMaker:
             try:
                 model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
                 n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
-                self.llama = LlamaAdapter(model_path, params, n_gpu_layers)
                 self.current_model = model_type
                 self.model_loaded.set()
-                print(f"{model_type} モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
             except Exception as e:
                 print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
                 self.model_loaded.set()
@@ -397,20 +403,16 @@ class CharacterMaker:
     def make_prompt(self, input_str: str):
         prompt_template = """{{chat_author_description}}
 {{chat_instructions}}
 ・キャラクターの回答例
 {% for qa in example_qa %}
 {{qa}}
 {% endfor %}
 ・会話履歴
 {% for history in histories %}
 user: {{history.user}}
 assistant: {{history.assistant}}
 {% endfor %}
 user: {{input_str}}
 assistant:"""

             top_k=top_k,
             repeat_penalty=repeat_penalty
         )
+@spaces.GPU(duration=120)
+def load_model_gpu(model_type, model_path, n_gpu_layers, params):
+    llama = LlamaAdapter(model_path, params, n_gpu_layers)
+    print(f"{model_type} モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
+    return llama
 class CharacterMaker:
     def __init__(self):
         self.llama = None
         self.model_lock = threading.Lock()
         self.use_chat_format = False
     def load_model(self, model_type):
         with self.model_lock:
             if self.current_model == model_type:
             try:
                 model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
                 n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
+                self.llama = load_model_gpu(model_type, model_path, n_gpu_layers, params)
                 self.current_model = model_type
                 self.model_loaded.set()
             except Exception as e:
                 print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
                 self.model_loaded.set()
     def make_prompt(self, input_str: str):
         prompt_template = """{{chat_author_description}}
 {{chat_instructions}}
 ・キャラクターの回答例
 {% for qa in example_qa %}
 {{qa}}
 {% endfor %}
 ・会話履歴
 {% for history in histories %}
 user: {{history.user}}
 assistant: {{history.assistant}}
 {% endfor %}
 user: {{input_str}}
 assistant:"""