drmasad commited on
Commit
8666754
·
verified ·
1 Parent(s): b431bd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -59
app.py CHANGED
@@ -7,7 +7,7 @@ import torch
7
  from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
8
  from huggingface_hub import login
9
 
10
- # Initialize the OpenAI client (if needed for Hugging Face Inference API)
11
  client = OpenAI(
12
  base_url="https://api-inference.huggingface.co/v1",
13
  api_key=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
@@ -19,108 +19,72 @@ if api_token:
19
  else:
20
  print("API token is not set in the environment variables.")
21
 
22
- # Define model links and configurations
23
  model_links = {
24
  "HAH-2024-v0.1": "drmasad/HAH-2024-v0.11"
25
  }
26
 
27
- # Define sidebar options
28
- selected_model = "HAH-2024-v0.1" # Directly using your model
29
 
30
- # Sidebar temperature control
31
  temp_values = st.sidebar.slider("Select a temperature value", 0.0, 1.0, (0.5))
32
-
33
- # Reset conversation functionality
34
  def reset_conversation():
35
  st.session_state.conversation = []
36
  st.session_state.messages = []
37
 
38
  st.sidebar.button("Reset Chat", on_click=reset_conversation)
39
-
40
- # Display model information on the sidebar
41
- model_info = {
42
- "HAH-2024-v0.1": {
43
- "description": "HAH-2024-v0.1 is a fine-tuned model based on Mistral 7B. It's designed for conversations on diabetes.",
44
- "logo": "https://www.hmgaihub.com/untitled.png",
45
- }
46
- }
47
-
48
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
49
- st.sidebar.markdown(model_info[selected_model]["description"])
50
- st.sidebar.image(model_info[selected_model]["logo"])
51
 
52
- def load_model():
53
- model_name = model_links["HAH-2024-v0.1"]
 
54
  base_model = "mistralai/Mistral-7B-Instruct-v0.2"
55
-
56
- # Load model with quantization and device map configurations
57
  bnb_config = BitsAndBytesConfig(
58
  load_in_4bit=True,
59
  bnb_4bit_quant_type="nf4",
60
  bnb_4bit_compute_dtype=torch.bfloat16,
61
  bnb_4bit_use_double_quant=False,
62
- llm_int8_enable_fp32_cpu_offload=True # Enable CPU offloading for certain parts
63
  )
64
-
65
- # Custom device map to manage resource utilization
66
- device_map = {
67
- 'encoder': 'cuda', # Keep encoder on GPU
68
- 'decoder': 'cpu', # Offload decoder to CPU if GPU RAM is insufficient
69
- }
70
-
71
  model = AutoModelForCausalLM.from_pretrained(
72
- model_name,
73
- quantization_config=bnb_config,
74
- torch_dtype=torch.bfloat16,
75
- device_map=device_map, # Apply custom device map
76
- trust_remote_code=True,
77
  )
78
-
79
  model.config.use_cache = False
80
  model = prepare_model_for_kbit_training(model)
81
-
82
- peft_config = LoraConfig(
83
- lora_alpha=16,
84
- lora_dropout=0.1,
85
- r=64,
86
- bias="none",
87
- task_type="CAUSAL_LM",
88
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
89
- )
90
-
91
  model = get_peft_model(model, peft_config)
92
-
93
  tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
94
-
95
  return model, tokenizer
96
 
 
 
97
 
98
- # Initialize chat history
99
  if "messages" not in st.session_state:
100
  st.session_state.messages = []
101
 
102
- # Display previous chat messages
103
  for message in st.session_state.messages:
104
  with st.chat_message(message["role"]):
105
  st.markdown(message["content"])
106
 
107
- # User input for conversation
108
  if prompt := st.chat_input("Ask me anything about diabetes"):
109
  with st.chat_message("user"):
110
  st.markdown(prompt)
111
-
112
  st.session_state.messages.append({"role": "user", "content": prompt})
113
-
114
  with st.chat_message("assistant"):
115
  result = pipeline(
116
- task="text-generation",
117
- model=model_name,
118
- tokenizer=tokenizer,
119
- max_length=1024,
120
  temperature=temp_values
121
  )(prompt)
122
-
123
  response = result[0]['generated_text']
124
  st.markdown(response)
125
-
126
  st.session_state.messages.append({"role": "assistant", "content": response})
 
7
  from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
8
  from huggingface_hub import login
9
 
10
+ # Initialize the OpenAI client
11
  client = OpenAI(
12
  base_url="https://api-inference.huggingface.co/v1",
13
  api_key=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
 
19
  else:
20
  print("API token is not set in the environment variables.")
21
 
22
+ # Define model links
23
  model_links = {
24
  "HAH-2024-v0.1": "drmasad/HAH-2024-v0.11"
25
  }
26
 
27
+ # Set selected model
28
+ selected_model = "HAH-2024-v0.1"
29
 
30
+ # Sidebar setup
31
  temp_values = st.sidebar.slider("Select a temperature value", 0.0, 1.0, (0.5))
 
 
32
  def reset_conversation():
33
  st.session_state.conversation = []
34
  st.session_state.messages = []
35
 
36
  st.sidebar.button("Reset Chat", on_click=reset_conversation)
 
 
 
 
 
 
 
 
 
37
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
38
+ st.sidebar.image("https://www.hmgaihub.com/untitled.png")
 
39
 
40
+ # Function to load model
41
+ def load_model(selected_model_name):
42
+ model_name = model_links[selected_model_name]
43
  base_model = "mistralai/Mistral-7B-Instruct-v0.2"
 
 
44
  bnb_config = BitsAndBytesConfig(
45
  load_in_4bit=True,
46
  bnb_4bit_quant_type="nf4",
47
  bnb_4bit_compute_dtype=torch.bfloat16,
48
  bnb_4bit_use_double_quant=False,
49
+ llm_int8_enable_fp32_cpu_offload=True
50
  )
51
+ device_map = {'encoder': 'cuda', 'decoder': 'cpu'}
 
 
 
 
 
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
+ model_name, quantization_config=bnb_config, torch_dtype=torch.bfloat16,
54
+ device_map=device_map, trust_remote_code=True
 
 
 
55
  )
 
56
  model.config.use_cache = False
57
  model = prepare_model_for_kbit_training(model)
58
+ peft_config = LoraConfig(lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM",
59
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"])
 
 
 
 
 
 
 
 
60
  model = get_peft_model(model, peft_config)
 
61
  tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
 
62
  return model, tokenizer
63
 
64
+ # Load model and tokenizer
65
+ model, tokenizer = load_model(selected_model)
66
 
67
+ # Chat application logic
68
  if "messages" not in st.session_state:
69
  st.session_state.messages = []
70
 
 
71
  for message in st.session_state.messages:
72
  with st.chat_message(message["role"]):
73
  st.markdown(message["content"])
74
 
 
75
  if prompt := st.chat_input("Ask me anything about diabetes"):
76
  with st.chat_message("user"):
77
  st.markdown(prompt)
 
78
  st.session_state.messages.append({"role": "user", "content": prompt})
79
+
80
  with st.chat_message("assistant"):
81
  result = pipeline(
82
+ task="text-generation",
83
+ model=model,
84
+ tokenizer=tokenizer,
85
+ max_length=1024,
86
  temperature=temp_values
87
  )(prompt)
 
88
  response = result[0]['generated_text']
89
  st.markdown(response)
 
90
  st.session_state.messages.append({"role": "assistant", "content": response})