aixsatoshi
commited on
Commit
•
da8a347
1
Parent(s):
d65d6d9
Update app.py
Browse files
app.py
CHANGED
@@ -9,8 +9,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
model_id,
|
11 |
torch_dtype=torch.float16,
|
12 |
-
|
13 |
-
|
|
|
14 |
)
|
15 |
|
16 |
TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
|
@@ -40,7 +41,7 @@ h3 {
|
|
40 |
}
|
41 |
"""
|
42 |
|
43 |
-
@
|
44 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
45 |
print(f'Message: {message}')
|
46 |
print(f'History: {history}')
|
@@ -142,4 +143,3 @@ with gr.Blocks(css=CSS) as demo:
|
|
142 |
|
143 |
if __name__ == "__main__":
|
144 |
demo.launch()
|
145 |
-
|
|
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
model_id,
|
11 |
torch_dtype=torch.float16,
|
12 |
+
device_map="sequential",
|
13 |
+
offload_folder="offload", # オフロードフォルダの指定
|
14 |
+
offload_state_dict=True # 必要に応じてstate_dictをオフロード
|
15 |
)
|
16 |
|
17 |
TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
|
|
|
41 |
}
|
42 |
"""
|
43 |
|
44 |
+
@gr.GPU
|
45 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
46 |
print(f'Message: {message}')
|
47 |
print(f'History: {history}')
|
|
|
143 |
|
144 |
if __name__ == "__main__":
|
145 |
demo.launch()
|
|