Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

App Files Files

drmasad commited on Apr 27, 2024

Commit

8666754

verified ·

1 Parent(s): b431bd6

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -59

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import torch
 from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
 from huggingface_hub import login
-# Initialize the OpenAI client (if needed for Hugging Face Inference API)
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1",
     api_key=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
@@ -19,108 +19,72 @@ if api_token:
 else:
     print("API token is not set in the environment variables.")
-# Define model links and configurations
 model_links = {
     "HAH-2024-v0.1": "drmasad/HAH-2024-v0.11"
 }
-# Define sidebar options
-selected_model = "HAH-2024-v0.1"  # Directly using your model
-# Sidebar temperature control
 temp_values = st.sidebar.slider("Select a temperature value", 0.0, 1.0, (0.5))
-# Reset conversation functionality
 def reset_conversation():
     st.session_state.conversation = []
     st.session_state.messages = []
 st.sidebar.button("Reset Chat", on_click=reset_conversation)
-# Display model information on the sidebar
-model_info = {
-    "HAH-2024-v0.1": {
-        "description": "HAH-2024-v0.1 is a fine-tuned model based on Mistral 7B. It's designed for conversations on diabetes.",
-        "logo": "https://www.hmgaihub.com/untitled.png",
-    }
-}
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
-st.sidebar.markdown(model_info[selected_model]["description"])
-st.sidebar.image(model_info[selected_model]["logo"])
-def load_model():
-    model_name = model_links["HAH-2024-v0.1"]
     base_model = "mistralai/Mistral-7B-Instruct-v0.2"
-    # Load model with quantization and device map configurations
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_use_double_quant=False,
-        llm_int8_enable_fp32_cpu_offload=True  # Enable CPU offloading for certain parts
     )
-    # Custom device map to manage resource utilization
-    device_map = {
-        'encoder': 'cuda',      # Keep encoder on GPU
-        'decoder': 'cpu',       # Offload decoder to CPU if GPU RAM is insufficient
-    }
     model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        quantization_config=bnb_config,
-        torch_dtype=torch.bfloat16,
-        device_map=device_map,  # Apply custom device map
-        trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
-    peft_config = LoraConfig(
-        lora_alpha=16,
-        lora_dropout=0.1,
-        r=64,
-        bias="none",
-        task_type="CAUSAL_LM",
-        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
-    )
     model = get_peft_model(model, peft_config)
     tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     return model, tokenizer
-# Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# Display previous chat messages
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
-# User input for conversation
 if prompt := st.chat_input("Ask me anything about diabetes"):
     with st.chat_message("user"):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("assistant"):
         result = pipeline(
-            task="text-generation",
-            model=model_name,
-            tokenizer=tokenizer,
-            max_length=1024,
             temperature=temp_values
         )(prompt)
         response = result[0]['generated_text']
         st.markdown(response)
     st.session_state.messages.append({"role": "assistant", "content": response})

 from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
 from huggingface_hub import login
+# Initialize the OpenAI client
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1",
     api_key=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
 else:
     print("API token is not set in the environment variables.")
+# Define model links
 model_links = {
     "HAH-2024-v0.1": "drmasad/HAH-2024-v0.11"
 }
+# Set selected model
+selected_model = "HAH-2024-v0.1"
+# Sidebar setup
 temp_values = st.sidebar.slider("Select a temperature value", 0.0, 1.0, (0.5))
 def reset_conversation():
     st.session_state.conversation = []
     st.session_state.messages = []
 st.sidebar.button("Reset Chat", on_click=reset_conversation)
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
+st.sidebar.image("https://www.hmgaihub.com/untitled.png")
+# Function to load model
+def load_model(selected_model_name):
+    model_name = model_links[selected_model_name]
     base_model = "mistralai/Mistral-7B-Instruct-v0.2"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_use_double_quant=False,
+        llm_int8_enable_fp32_cpu_offload=True
     )
+    device_map = {'encoder': 'cuda', 'decoder': 'cpu'}
     model = AutoModelForCausalLM.from_pretrained(
+        model_name, quantization_config=bnb_config, torch_dtype=torch.bfloat16,
+        device_map=device_map, trust_remote_code=True
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
+    peft_config = LoraConfig(lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM",
+                             target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"])
     model = get_peft_model(model, peft_config)
     tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     return model, tokenizer
+# Load model and tokenizer
+model, tokenizer = load_model(selected_model)
+# Chat application logic
 if "messages" not in st.session_state:
     st.session_state.messages = []
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 if prompt := st.chat_input("Ask me anything about diabetes"):
     with st.chat_message("user"):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("assistant"):
         result = pipeline(
+            task="text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_length=1024,
             temperature=temp_values
         )(prompt)
         response = result[0]['generated_text']
         st.markdown(response)
     st.session_state.messages.append({"role": "assistant", "content": response})