granite-3.0-1b-a400m-instruct-CPU

Running

aixsatoshi commited on Jul 23

Commit

da8a347

•

1 Parent(s): d65d6d9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,8 +9,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
   model_id,
   torch_dtype=torch.float16,
-  low_cpu_mem_usage=True,
-  device_map="auto",
 )
 TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
@@ -40,7 +41,7 @@ h3 {
 }
 """
-@spaces.GPU(duration=120)
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     print(f'Message: {message}')
     print(f'History: {history}')
@@ -142,4 +143,3 @@ with gr.Blocks(css=CSS) as demo:
 if __name__ == "__main__":
     demo.launch()

 model = AutoModelForCausalLM.from_pretrained(
   model_id,
   torch_dtype=torch.float16,
+  device_map="sequential",
+  offload_folder="offload",  # オフロードフォルダの指定
+  offload_state_dict=True  # 必要に応じてstate_dictをオフロード
 )
 TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
 }
 """
+@gr.GPU
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     print(f'Message: {message}')
     print(f'History: {history}')
 if __name__ == "__main__":
     demo.launch()