benardo0 commited on
Commit
10ffd90
·
verified ·
1 Parent(s): 5edea36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -50
app.py CHANGED
@@ -153,7 +153,7 @@ from typing import List, Dict
153
  import logging
154
  import traceback
155
 
156
- # Set up logging to help us track what's happening
157
  logging.basicConfig(
158
  level=logging.INFO,
159
  format='%(asctime)s - %(levelname)s - %(message)s'
@@ -163,42 +163,40 @@ logger = logging.getLogger(__name__)
163
  class MedicalAssistant:
164
  def __init__(self):
165
  """
166
- Initialize the medical assistant with the Llama3-Med42 model.
167
- This model is specifically trained on medical data and quantized to 4-bit precision
168
- for better memory efficiency while maintaining good performance.
169
  """
170
  try:
171
- logger.info("Starting model initialization...")
172
 
173
- # Updated model to use Llama3-Med42
174
  self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
175
  self.max_length = 2048
176
 
177
- # Initialize the pipeline for simplified text generation
178
- # The pipeline handles tokenizer and model loading automatically
179
- logger.info("Initializing pipeline...")
180
- self.pipe = pipeline(
181
- "text-generation",
182
- model=self.model_name,
183
- token=os.getenv('HUGGING_FACE_TOKEN'),
184
- device_map="auto",
185
- torch_dtype=torch.float16, # Use half precision for 4-bit model
186
- load_in_4bit=True # Enable 4-bit quantization
187
- )
188
-
189
- # Load tokenizer separately for more control over text processing
190
  logger.info("Loading tokenizer...")
191
  self.tokenizer = AutoTokenizer.from_pretrained(
192
  self.model_name,
193
- token=os.getenv('HUGGING_FACE_TOKEN'),
194
- trust_remote_code=True
195
  )
196
 
197
- # Ensure proper padding token configuration
198
  if self.tokenizer.pad_token is None:
199
  self.tokenizer.pad_token = self.tokenizer.eos_token
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- logger.info("Medical Assistant initialized successfully!")
202
 
203
  except Exception as e:
204
  logger.error(f"Initialization failed: {str(e)}")
@@ -207,44 +205,47 @@ class MedicalAssistant:
207
 
208
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
209
  """
210
- Generate a response using the Llama3-Med42 pipeline.
211
- This method formats the conversation history and generates appropriate medical responses.
212
  """
213
  try:
214
  logger.info("Preparing message for generation")
215
 
216
- # Create a medical context-aware prompt
217
- system_prompt = """You are a medical AI assistant based on Llama3,
218
- specifically trained on medical knowledge. Provide accurate, professional
219
- medical guidance while acknowledging limitations. Always recommend
220
- consulting healthcare providers for specific medical advice."""
221
 
222
- # Format the conversation for the model
223
  messages = [
224
  {"role": "system", "content": system_prompt},
225
  {"role": "user", "content": message}
226
  ]
227
 
228
- # Add chat history if available
229
  if chat_history:
230
- for chat in chat_history:
 
 
231
  messages.append({
232
  "role": "user" if chat["role"] == "user" else "assistant",
233
  "content": chat["content"]
234
  })
235
 
236
- logger.info("Generating response")
237
- # Generate response using the pipeline
 
238
  response = self.pipe(
239
  messages,
240
- max_new_tokens=256,
241
  do_sample=True,
242
  temperature=0.7,
243
  top_p=0.95,
244
- repetition_penalty=1.1
 
245
  )[0]["generated_text"]
246
 
247
- # Clean up the response by extracting the last assistant message
248
  response = response.split("assistant:")[-1].strip()
249
 
250
  logger.info("Response generated successfully")
@@ -255,14 +256,14 @@ class MedicalAssistant:
255
  logger.error(traceback.format_exc())
256
  return f"I apologize, but I encountered an error: {str(e)}"
257
 
258
- # Initialize the assistant
259
  assistant = None
260
 
261
  def initialize_assistant():
262
- """Initialize the assistant with proper error handling"""
263
  global assistant
264
  try:
265
- logger.info("Attempting to initialize assistant")
266
  assistant = MedicalAssistant()
267
  logger.info("Assistant initialized successfully")
268
  return True
@@ -272,7 +273,7 @@ def initialize_assistant():
272
  return False
273
 
274
  def chat_response(message: str, history: List[Dict]):
275
- """Handle chat interactions with error recovery"""
276
  global assistant
277
 
278
  if assistant is None:
@@ -287,14 +288,12 @@ def chat_response(message: str, history: List[Dict]):
287
  logger.error(traceback.format_exc())
288
  return f"I encountered an error: {str(e)}"
289
 
290
- # Create the Gradio interface
291
  demo = gr.ChatInterface(
292
  fn=chat_response,
293
- title="Medical Assistant (NURSEOGE)",
294
- description="""This medical assistant is powered by NURSEOGE,
295
- a model specifically trained on medical knowledge. It provides
296
- guidance and information about health-related queries while
297
- maintaining professional medical standards.""",
298
  examples=[
299
  "What are the symptoms of malaria?",
300
  "How can I prevent type 2 diabetes?",
@@ -302,7 +301,7 @@ demo = gr.ChatInterface(
302
  ]
303
  )
304
 
305
- # Launch the interface
306
  if __name__ == "__main__":
307
- logger.info("Starting the application")
308
  demo.launch()
 
153
  import logging
154
  import traceback
155
 
156
+ # Set up logging to help us understand what's happening in our application
157
  logging.basicConfig(
158
  level=logging.INFO,
159
  format='%(asctime)s - %(levelname)s - %(message)s'
 
163
  class MedicalAssistant:
164
  def __init__(self):
165
  """
166
+ Initialize a basic medical assistant for CPU-only environments.
167
+ This version uses standard model loading without quantization for maximum compatibility.
 
168
  """
169
  try:
170
+ logger.info("Starting basic model initialization...")
171
 
172
+ # Define our model configuration
173
  self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
174
  self.max_length = 2048
175
 
176
+ # First load the tokenizer since it's lighter on memory
 
 
 
 
 
 
 
 
 
 
 
 
177
  logger.info("Loading tokenizer...")
178
  self.tokenizer = AutoTokenizer.from_pretrained(
179
  self.model_name,
180
+ token=os.getenv('HUGGING_FACE_TOKEN')
 
181
  )
182
 
183
+ # Handle padding token setup
184
  if self.tokenizer.pad_token is None:
185
  self.tokenizer.pad_token = self.tokenizer.eos_token
186
+
187
+ # Initialize pipeline with basic CPU settings
188
+ logger.info("Initializing CPU-based pipeline...")
189
+ self.pipe = pipeline(
190
+ "text-generation",
191
+ model=self.model_name,
192
+ token=os.getenv('HUGGING_FACE_TOKEN'),
193
+ device_map="cpu", # Explicitly use CPU
194
+ torch_dtype=torch.float32, # Use standard precision
195
+ use_safetensors=True, # Enable safetensors for better memory handling
196
+ # Removed all quantization settings
197
+ )
198
 
199
+ logger.info("Medical Assistant initialized successfully in basic CPU mode!")
200
 
201
  except Exception as e:
202
  logger.error(f"Initialization failed: {str(e)}")
 
205
 
206
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
207
  """
208
+ Generate responses using basic CPU-friendly settings.
209
+ This method focuses on stability over speed, using conservative parameters.
210
  """
211
  try:
212
  logger.info("Preparing message for generation")
213
 
214
+ # Create our medical context prompt
215
+ system_prompt = """You are a medical AI assistant trained on medical knowledge.
216
+ Provide accurate, professional medical guidance while acknowledging limitations.
217
+ Always recommend consulting healthcare providers for specific medical advice."""
 
218
 
219
+ # Format our conversation for the model
220
  messages = [
221
  {"role": "system", "content": system_prompt},
222
  {"role": "user", "content": message}
223
  ]
224
 
225
+ # Add recent chat history if available
226
  if chat_history:
227
+ # Only keep recent history to manage memory
228
+ recent_history = chat_history[-2:] # Keep last 2 exchanges
229
+ for chat in recent_history:
230
  messages.append({
231
  "role": "user" if chat["role"] == "user" else "assistant",
232
  "content": chat["content"]
233
  })
234
 
235
+ logger.info("Generating response with basic settings")
236
+
237
+ # Generate with conservative parameters
238
  response = self.pipe(
239
  messages,
240
+ max_new_tokens=100, # Conservative token limit
241
  do_sample=True,
242
  temperature=0.7,
243
  top_p=0.95,
244
+ num_beams=1, # Single beam for simplicity
245
+ pad_token_id=self.tokenizer.pad_token_id
246
  )[0]["generated_text"]
247
 
248
+ # Clean up our response
249
  response = response.split("assistant:")[-1].strip()
250
 
251
  logger.info("Response generated successfully")
 
256
  logger.error(traceback.format_exc())
257
  return f"I apologize, but I encountered an error: {str(e)}"
258
 
259
+ # Initialize our assistant
260
  assistant = None
261
 
262
  def initialize_assistant():
263
+ """Initialize the assistant with careful error handling"""
264
  global assistant
265
  try:
266
+ logger.info("Attempting to initialize basic CPU assistant")
267
  assistant = MedicalAssistant()
268
  logger.info("Assistant initialized successfully")
269
  return True
 
273
  return False
274
 
275
  def chat_response(message: str, history: List[Dict]):
276
+ """Handle chat interactions with proper error recovery"""
277
  global assistant
278
 
279
  if assistant is None:
 
288
  logger.error(traceback.format_exc())
289
  return f"I encountered an error: {str(e)}"
290
 
291
+ # Create our Gradio interface
292
  demo = gr.ChatInterface(
293
  fn=chat_response,
294
+ title="Medical Assistant (Basic CPU Version)",
295
+ description="""This medical assistant provides medical guidance using a basic CPU configuration.
296
+ Responses may take longer but will be stable and reliable.""",
 
 
297
  examples=[
298
  "What are the symptoms of malaria?",
299
  "How can I prevent type 2 diabetes?",
 
301
  ]
302
  )
303
 
304
+ # Launch our interface
305
  if __name__ == "__main__":
306
+ logger.info("Starting the basic CPU application")
307
  demo.launch()