syubraj commited on
Commit
e275f6c
·
verified ·
1 Parent(s): b6e3a4f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -15
README.md CHANGED
@@ -24,28 +24,62 @@ This is a **LoRA adapter** for the `microsoft/Phi-3.5-mini-instruct` model, fine
24
  To use this adapter, you need the base model **`microsoft/Phi-3.5-mini-instruct`**. Load it with `peft`:
25
 
26
  ```python
27
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
28
  from peft import PeftModel
29
 
30
- base_model_name = "microsoft/Phi-3.5-mini-instruct" # Base model
31
- lora_model_path = "syubraj/Phi-3.5-mini-instruct-MedicalChat-QLoRA" # QLoRA adapter
32
 
33
- # Load base model
34
- base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
 
 
 
35
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
36
 
37
- # Load LoRA adapter
38
- lora_model = PeftModel.from_pretrained(base_model, lora_model_path)
39
- lora_model.to("cuda") # Move to GPU if available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Generate a response
42
- def generate_response(prompt, max_length=256):
43
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
44
- outputs = lora_model.generate(**inputs, max_length=max_length)
45
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
46
 
47
- # Example
48
- print(generate_response("Hi Doctor, what are the symptoms of flu?"))
49
  ```
50
 
51
  ---
 
24
  To use this adapter, you need the base model **`microsoft/Phi-3.5-mini-instruct`**. Load it with `peft`:
25
 
26
  ```python
27
+ import torch
28
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
29
  from peft import PeftModel
30
 
31
+ device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ print(device)
33
 
34
+ # Define base model and your fine-tuned LoRA checkpoint
35
+ base_model_name = "microsoft/Phi-3.5-mini-instruct"
36
+ lora_model_path = "syubraj/Phi-3.5-mini-instruct-MedicalChat-QLoRA"
37
+
38
+ # Load tokenizer
39
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
40
 
41
+ # Load model with proper 4-bit quantization settings
42
+ bnb_config = BitsAndBytesConfig(
43
+ load_in_4bit=True,
44
+ bnb_4bit_compute_dtype=torch.float16,
45
+ bnb_4bit_use_double_quant=True,
46
+ bnb_4bit_quant_type="nf4"
47
+ )
48
+
49
+ base_model = AutoModelForCausalLM.from_pretrained(
50
+ "microsoft/Phi-3.5-mini-instruct",
51
+ quantization_config=bnb_config,
52
+ device_map="auto"
53
+ )
54
+
55
+ model = PeftModel.from_pretrained(base_model, lora_model_path)
56
+
57
+ model = model.merge_and_unload()
58
+ model.to(device)
59
+
60
+ print("Model successfully loaded!")
61
+
62
+ # Inference function
63
+ def generate_response(user_query, system_message=None, max_length=1024):
64
+ if system_message is None:
65
+ system_message = ("You are a trusted AI-powered medical assistant. "
66
+ "Analyze patient queries carefully and provide accurate, professional, and empathetic responses. "
67
+ "Prioritize patient safety, adhere to medical best practices, and recommend consulting a healthcare provider when necessary.")
68
+
69
+ # Prepare input prompt
70
+ prompt = f"<|system|> {system_message} <|end|>\n<|user|> {user_query} <|end|>\n<|assistant|>"
71
+
72
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
73
+ outputs = model.generate(**inputs, max_length=max_length)
74
+
75
+ # Decode response
76
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
77
+ return response.split("<|assistant|>")[-1].strip().split("<|end|>")[0].strip()
78
 
79
+ if __name__ == "__main__":
80
+ res = generate_response("Hi, How can someone let go of fever?")
81
+ print(res)
 
 
82
 
 
 
83
  ```
84
 
85
  ---