NV9523 commited on
Commit
eeafbf0
·
verified ·
1 Parent(s): 8603a8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -2,7 +2,7 @@ import subprocess
2
  import sys
3
 
4
  # Cài đặt các thư viện nếu chưa có
5
- subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers", "streamlit", "torch", "peft"])
6
 
7
  import streamlit as st
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
@@ -13,20 +13,15 @@ import torch
13
  BASE_MODEL_NAME = "unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit"
14
  ADAPTER_MODEL_PATH = "lora_model"
15
 
16
- # Load mô hình gốc trên CPU
17
- base_model = AutoModelForCausalLM.from_pretrained(
18
- BASE_MODEL_NAME,
19
- torch_dtype=torch.float32, # sử dụng float32 cho CPU
20
- device_map="cpu" # ép chạy trên CPU
21
- )
22
  # Áp dụng adapter LoRA
23
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_PATH)
24
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
25
 
26
  def generate_response(prompt):
27
  """Generate a response from the model."""
28
- # Chuyển dữ liệu input sang CPU
29
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
30
  streamer = TextStreamer(tokenizer)
31
  with torch.no_grad():
32
  model.generate(**inputs, streamer=streamer, max_length=512)
@@ -59,4 +54,4 @@ if user_input:
59
  st.markdown(response)
60
 
61
  # Append assistant response
62
- st.session_state.messages.append({"role": "assistant", "content": response})
 
2
  import sys
3
 
4
  # Cài đặt các thư viện nếu chưa có
5
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers", "streamlit", "torch", "bitsandbytes","peft"])
6
 
7
  import streamlit as st
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 
13
  BASE_MODEL_NAME = "unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit"
14
  ADAPTER_MODEL_PATH = "lora_model"
15
 
16
+ # Load mô hình gốc
17
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
18
  # Áp dụng adapter LoRA
19
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_PATH)
20
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
21
 
22
  def generate_response(prompt):
23
  """Generate a response from the model."""
24
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
25
  streamer = TextStreamer(tokenizer)
26
  with torch.no_grad():
27
  model.generate(**inputs, streamer=streamer, max_length=512)
 
54
  st.markdown(response)
55
 
56
  # Append assistant response
57
+ st.session_state.messages.append({"role": "assistant", "content": response})