Bhaskar2611 commited on
Commit
2f8b5bf
·
verified ·
1 Parent(s): 464e4d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -161,19 +161,30 @@
161
  # # gr.load("models/Bhaskar2611/Capstone").launch()
162
 
163
  import os
164
- from transformers import AutoTokenizer, AutoModelForCausalLM
165
  import gradio as gr
166
 
167
- # Load your Hugging Face token from environment variables
168
  hf_token = os.environ.get("HF_TOKEN")
169
 
170
- model_id = "HuggingFaceH4/zephyr-7b-beta"
 
171
 
172
- # Load tokenizer and model with token parameter (no deprecated args)
173
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
174
- model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
175
 
176
- # Define a skin assistant prompt to set the context for the model
 
 
 
 
 
 
 
 
 
 
 
177
  SKIN_ASSISTANT_PROMPT = (
178
  "You are a helpful assistant specialized in skin diseases and dermatology. "
179
  "Provide accurate, concise, and helpful advice about skin conditions, symptoms, "
@@ -181,17 +192,18 @@ SKIN_ASSISTANT_PROMPT = (
181
  )
182
 
183
  def generate_response(user_input):
184
- # Combine the assistant prompt + user input
185
  prompt = SKIN_ASSISTANT_PROMPT + user_input
186
-
187
- inputs = tokenizer(prompt, return_tensors="pt")
188
- outputs = model.generate(**inputs, max_length=2048, do_sample=True, temperature=0.7)
 
 
 
 
 
 
189
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
190
-
191
- # Remove the assistant prompt part from the response (if it appears)
192
- if response.startswith(SKIN_ASSISTANT_PROMPT):
193
- response = response[len(SKIN_ASSISTANT_PROMPT):].strip()
194
- return response
195
 
196
  # Gradio interface
197
  iface = gr.Interface(
@@ -205,3 +217,4 @@ iface = gr.Interface(
205
  if __name__ == "__main__":
206
  iface.launch()
207
 
 
 
161
  # # gr.load("models/Bhaskar2611/Capstone").launch()
162
 
163
  import os
164
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
165
  import gradio as gr
166
 
167
+ # Load your Hugging Face token (if needed for private models or API limit increases)
168
  hf_token = os.environ.get("HF_TOKEN")
169
 
170
+ # Model ID for Mistral 7B Instruct
171
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
172
 
173
+ # Load tokenizer
174
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
 
175
 
176
+ # BitsAndBytesConfig for 4-bit quantization to reduce memory usage
177
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True)
178
+
179
+ # Load model with quantization and device mapping
180
+ model = AutoModelForCausalLM.from_pretrained(
181
+ model_id,
182
+ quantization_config=bnb_config,
183
+ device_map="auto",
184
+ token=hf_token
185
+ )
186
+
187
+ # Skin assistant system prompt
188
  SKIN_ASSISTANT_PROMPT = (
189
  "You are a helpful assistant specialized in skin diseases and dermatology. "
190
  "Provide accurate, concise, and helpful advice about skin conditions, symptoms, "
 
192
  )
193
 
194
  def generate_response(user_input):
 
195
  prompt = SKIN_ASSISTANT_PROMPT + user_input
196
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
197
+ outputs = model.generate(
198
+ **inputs,
199
+ max_new_tokens=1024,
200
+ do_sample=True,
201
+ temperature=0.7,
202
+ top_p=0.95,
203
+ repetition_penalty=1.1
204
+ )
205
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
206
+ return response.replace(SKIN_ASSISTANT_PROMPT, "").strip()
 
 
 
 
207
 
208
  # Gradio interface
209
  iface = gr.Interface(
 
217
  if __name__ == "__main__":
218
  iface.launch()
219
 
220
+