benardo0 commited on
Commit
7625d6b
·
verified ·
1 Parent(s): 596865f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -45
app.py CHANGED
@@ -1,12 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  from typing import List, Dict
6
  import logging
 
7
 
8
- # Set up logging to help us debug model loading and inference
9
- logging.basicConfig(level=logging.INFO)
 
 
 
10
  logger = logging.getLogger(__name__)
11
 
12
  class MedicalAssistant:
@@ -15,62 +166,81 @@ class MedicalAssistant:
15
  try:
16
  logger.info("Starting model initialization...")
17
 
18
- # Model configuration - adjust these based on your available compute
19
  self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
20
- self.max_length = 1048
21
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
  logger.info(f"Using device: {self.device}")
 
 
 
24
 
25
- # Load tokenizer first - this is typically faster and can catch issues early
26
- logger.info("Loading tokenizer...")
27
- self.tokenizer = AutoTokenizer.from_pretrained(
28
- self.model_name,
29
- padding_side="left",
30
- trust_remote_code=True
31
- )
 
 
 
 
 
32
 
33
  # Set padding token if not set
34
  if self.tokenizer.pad_token is None:
35
  self.tokenizer.pad_token = self.tokenizer.eos_token
 
36
 
37
- # Load model with memory optimizations
38
- logger.info("Loading model...")
39
- self.model = AutoModelForCausalLM.from_pretrained(
40
- self.model_name,
41
- torch_dtype=torch.float16,
42
- device_map="auto",
43
- load_in_8bit=True,
44
- trust_remote_code=True
45
- )
46
-
47
- logger.info("Model initialization completed successfully!")
 
 
 
 
 
48
 
49
  except Exception as e:
50
- logger.error(f"Error during initialization: {str(e)}")
 
51
  raise
52
 
53
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
54
  """Generate a response to the user's message"""
55
  try:
 
 
56
  # Prepare the prompt
57
  system_prompt = """You are a medical AI assistant. Respond to medical queries
58
  professionally and accurately. If you're unsure, always recommend consulting
59
  with a healthcare provider."""
60
 
61
- # Combine system prompt, chat history, and current message
62
  full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
 
63
 
64
- # Tokenize input
65
  inputs = self.tokenizer(
66
  full_prompt,
67
  return_tensors="pt",
68
  padding=True,
69
  truncation=True,
70
  max_length=self.max_length
71
- ).to(self.device)
 
 
 
72
 
73
- # Generate response
74
  with torch.no_grad():
75
  outputs = self.model.generate(
76
  **inputs,
@@ -82,65 +252,62 @@ class MedicalAssistant:
82
  repetition_penalty=1.1
83
  )
84
 
85
- # Decode and clean up response
86
- response = self.tokenizer.decode(
87
- outputs[0],
88
- skip_special_tokens=True
89
- )
90
-
91
- # Extract just the assistant's response
92
  response = response.split("Assistant:")[-1].strip()
93
 
 
94
  return response
95
 
96
  except Exception as e:
97
  logger.error(f"Error during response generation: {str(e)}")
98
- return f"I apologize, but I encountered an error. Please try again."
 
99
 
100
- # Initialize the assistant
101
  assistant = None
102
 
103
  def initialize_assistant():
104
  """Initialize the assistant and handle any errors"""
105
  global assistant
106
  try:
 
107
  assistant = MedicalAssistant()
 
108
  return True
109
  except Exception as e:
110
  logger.error(f"Failed to initialize assistant: {str(e)}")
 
111
  return False
112
 
113
  def chat_response(message: str, history: List[Dict]):
114
  """Handle chat messages and return responses"""
115
  global assistant
116
 
117
- # Check if assistant is initialized
118
  if assistant is None:
 
119
  if not initialize_assistant():
120
- return "I apologize, but I'm currently unavailable. Please try again later."
121
 
122
  try:
123
  return assistant.generate_response(message, history)
124
  except Exception as e:
125
  logger.error(f"Error in chat response: {str(e)}")
126
- return "I encountered an error. Please try again."
 
127
 
128
  # Create Gradio interface
129
  demo = gr.ChatInterface(
130
  fn=chat_response,
131
  title="Medical Assistant (Test Version)",
132
- description="""This is a test version of the medical assistant.
133
- Please use it to verify basic functionality.""",
134
  examples=[
135
  "What are the symptoms of malaria?",
136
  "How can I prevent type 2 diabetes?",
137
  "What should I do for a mild headache?"
138
- ],
139
- # retry_btn=None,
140
- # undo_btn=None,
141
- # clear_btn="Clear"
142
  )
143
 
144
  # Launch the interface
145
  if __name__ == "__main__":
 
146
  demo.launch()
 
1
+ # import os
2
+ # import gradio as gr
3
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ # import torch
5
+ # from typing import List, Dict
6
+ # import logging
7
+
8
+ # # Set up logging to help us debug model loading and inference
9
+ # logging.basicConfig(level=logging.INFO)
10
+ # logger = logging.getLogger(__name__)
11
+
12
+ # class MedicalAssistant:
13
+ # def __init__(self):
14
+ # """Initialize the medical assistant with model and tokenizer"""
15
+ # try:
16
+ # logger.info("Starting model initialization...")
17
+
18
+ # # Model configuration - adjust these based on your available compute
19
+ # self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
20
+ # self.max_length = 1048
21
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
+
23
+ # logger.info(f"Using device: {self.device}")
24
+
25
+ # # Load tokenizer first - this is typically faster and can catch issues early
26
+ # logger.info("Loading tokenizer...")
27
+ # self.tokenizer = AutoTokenizer.from_pretrained(
28
+ # self.model_name,
29
+ # padding_side="left",
30
+ # trust_remote_code=True
31
+ # )
32
+
33
+ # # Set padding token if not set
34
+ # if self.tokenizer.pad_token is None:
35
+ # self.tokenizer.pad_token = self.tokenizer.eos_token
36
+
37
+ # # Load model with memory optimizations
38
+ # logger.info("Loading model...")
39
+ # self.model = AutoModelForCausalLM.from_pretrained(
40
+ # self.model_name,
41
+ # torch_dtype=torch.float16,
42
+ # device_map="auto",
43
+ # load_in_8bit=True,
44
+ # trust_remote_code=True
45
+ # )
46
+
47
+ # logger.info("Model initialization completed successfully!")
48
+
49
+ # except Exception as e:
50
+ # logger.error(f"Error during initialization: {str(e)}")
51
+ # raise
52
+
53
+ # def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
54
+ # """Generate a response to the user's message"""
55
+ # try:
56
+ # # Prepare the prompt
57
+ # system_prompt = """You are a medical AI assistant. Respond to medical queries
58
+ # professionally and accurately. If you're unsure, always recommend consulting
59
+ # with a healthcare provider."""
60
+
61
+ # # Combine system prompt, chat history, and current message
62
+ # full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
63
+
64
+ # # Tokenize input
65
+ # inputs = self.tokenizer(
66
+ # full_prompt,
67
+ # return_tensors="pt",
68
+ # padding=True,
69
+ # truncation=True,
70
+ # max_length=self.max_length
71
+ # ).to(self.device)
72
+
73
+ # # Generate response
74
+ # with torch.no_grad():
75
+ # outputs = self.model.generate(
76
+ # **inputs,
77
+ # max_new_tokens=512,
78
+ # do_sample=True,
79
+ # temperature=0.7,
80
+ # top_p=0.95,
81
+ # pad_token_id=self.tokenizer.pad_token_id,
82
+ # repetition_penalty=1.1
83
+ # )
84
+
85
+ # # Decode and clean up response
86
+ # response = self.tokenizer.decode(
87
+ # outputs[0],
88
+ # skip_special_tokens=True
89
+ # )
90
+
91
+ # # Extract just the assistant's response
92
+ # response = response.split("Assistant:")[-1].strip()
93
+
94
+ # return response
95
+
96
+ # except Exception as e:
97
+ # logger.error(f"Error during response generation: {str(e)}")
98
+ # return f"I apologize, but I encountered an error. Please try again."
99
+
100
+ # # Initialize the assistant
101
+ # assistant = None
102
+
103
+ # def initialize_assistant():
104
+ # """Initialize the assistant and handle any errors"""
105
+ # global assistant
106
+ # try:
107
+ # assistant = MedicalAssistant()
108
+ # return True
109
+ # except Exception as e:
110
+ # logger.error(f"Failed to initialize assistant: {str(e)}")
111
+ # return False
112
+
113
+ # def chat_response(message: str, history: List[Dict]):
114
+ # """Handle chat messages and return responses"""
115
+ # global assistant
116
+
117
+ # # Check if assistant is initialized
118
+ # if assistant is None:
119
+ # if not initialize_assistant():
120
+ # return "I apologize, but I'm currently unavailable. Please try again later."
121
+
122
+ # try:
123
+ # return assistant.generate_response(message, history)
124
+ # except Exception as e:
125
+ # logger.error(f"Error in chat response: {str(e)}")
126
+ # return "I encountered an error. Please try again."
127
+
128
+ # # Create Gradio interface
129
+ # demo = gr.ChatInterface(
130
+ # fn=chat_response,
131
+ # title="Medical Assistant (Test Version)",
132
+ # description="""This is a test version of the medical assistant.
133
+ # Please use it to verify basic functionality.""",
134
+ # examples=[
135
+ # "What are the symptoms of malaria?",
136
+ # "How can I prevent type 2 diabetes?",
137
+ # "What should I do for a mild headache?"
138
+ # ],
139
+ # # retry_btn=None,
140
+ # # undo_btn=None,
141
+ # # clear_btn="Clear"
142
+ # )
143
+
144
+ # # Launch the interface
145
+ # if __name__ == "__main__":
146
+ # demo.launch()
147
+
148
  import os
149
  import gradio as gr
150
  from transformers import AutoModelForCausalLM, AutoTokenizer
151
  import torch
152
  from typing import List, Dict
153
  import logging
154
+ import traceback
155
 
156
+ # Configure detailed logging
157
+ logging.basicConfig(
158
+ level=logging.INFO,
159
+ format='%(asctime)s - %(levelname)s - %(message)s'
160
+ )
161
  logger = logging.getLogger(__name__)
162
 
163
  class MedicalAssistant:
 
166
  try:
167
  logger.info("Starting model initialization...")
168
 
169
+ # Model configuration
170
  self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
171
+ self.max_length = 2048
172
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
173
 
174
  logger.info(f"Using device: {self.device}")
175
+ logger.info(f"Available CUDA devices: {torch.cuda.device_count() if torch.cuda.is_available() else 'None'}")
176
+ if torch.cuda.is_available():
177
+ logger.info(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
178
 
179
+ # First, verify the model exists
180
+ logger.info(f"Attempting to load tokenizer from {self.model_name}")
181
+ try:
182
+ self.tokenizer = AutoTokenizer.from_pretrained(
183
+ self.model_name,
184
+ trust_remote_code=True
185
+ )
186
+ logger.info("Tokenizer loaded successfully")
187
+ except Exception as e:
188
+ logger.error(f"Failed to load tokenizer: {str(e)}")
189
+ logger.error(traceback.format_exc())
190
+ raise
191
 
192
  # Set padding token if not set
193
  if self.tokenizer.pad_token is None:
194
  self.tokenizer.pad_token = self.tokenizer.eos_token
195
+ logger.info("Set padding token to EOS token")
196
 
197
+ # Load model with more conservative settings
198
+ logger.info("Loading model - this may take a few minutes...")
199
+ try:
200
+ self.model = AutoModelForCausalLM.from_pretrained(
201
+ self.model_name,
202
+ torch_dtype=torch.float16,
203
+ device_map="auto",
204
+ load_in_4bit=True, # More conservative than 8-bit
205
+ trust_remote_code=True,
206
+ low_cpu_mem_usage=True
207
+ )
208
+ logger.info("Model loaded successfully!")
209
+ except Exception as e:
210
+ logger.error(f"Failed to load model: {str(e)}")
211
+ logger.error(traceback.format_exc())
212
+ raise
213
 
214
  except Exception as e:
215
+ logger.error(f"Initialization failed: {str(e)}")
216
+ logger.error(traceback.format_exc())
217
  raise
218
 
219
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
220
  """Generate a response to the user's message"""
221
  try:
222
+ logger.info("Generating response for message")
223
+
224
  # Prepare the prompt
225
  system_prompt = """You are a medical AI assistant. Respond to medical queries
226
  professionally and accurately. If you're unsure, always recommend consulting
227
  with a healthcare provider."""
228
 
 
229
  full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
230
+ logger.info("Tokenizing input")
231
 
 
232
  inputs = self.tokenizer(
233
  full_prompt,
234
  return_tensors="pt",
235
  padding=True,
236
  truncation=True,
237
  max_length=self.max_length
238
+ )
239
+
240
+ # Move inputs to the correct device
241
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
242
 
243
+ logger.info("Generating response")
244
  with torch.no_grad():
245
  outputs = self.model.generate(
246
  **inputs,
 
252
  repetition_penalty=1.1
253
  )
254
 
255
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
256
  response = response.split("Assistant:")[-1].strip()
257
 
258
+ logger.info("Response generated successfully")
259
  return response
260
 
261
  except Exception as e:
262
  logger.error(f"Error during response generation: {str(e)}")
263
+ logger.error(traceback.format_exc())
264
+ return f"I apologize, but I encountered an error: {str(e)}"
265
 
266
+ # Global variable for the assistant
267
  assistant = None
268
 
269
  def initialize_assistant():
270
  """Initialize the assistant and handle any errors"""
271
  global assistant
272
  try:
273
+ logger.info("Attempting to initialize assistant")
274
  assistant = MedicalAssistant()
275
+ logger.info("Assistant initialized successfully")
276
  return True
277
  except Exception as e:
278
  logger.error(f"Failed to initialize assistant: {str(e)}")
279
+ logger.error(traceback.format_exc())
280
  return False
281
 
282
  def chat_response(message: str, history: List[Dict]):
283
  """Handle chat messages and return responses"""
284
  global assistant
285
 
 
286
  if assistant is None:
287
+ logger.info("Assistant not initialized, attempting initialization")
288
  if not initialize_assistant():
289
+ return "I apologize, but I'm currently unavailable. The error has been logged for investigation."
290
 
291
  try:
292
  return assistant.generate_response(message, history)
293
  except Exception as e:
294
  logger.error(f"Error in chat response: {str(e)}")
295
+ logger.error(traceback.format_exc())
296
+ return f"I encountered an error: {str(e)}"
297
 
298
  # Create Gradio interface
299
  demo = gr.ChatInterface(
300
  fn=chat_response,
301
  title="Medical Assistant (Test Version)",
302
+ description="This is a test version of the medical assistant. Please use it to verify basic functionality.",
 
303
  examples=[
304
  "What are the symptoms of malaria?",
305
  "How can I prevent type 2 diabetes?",
306
  "What should I do for a mild headache?"
307
+ ]
 
 
 
308
  )
309
 
310
  # Launch the interface
311
  if __name__ == "__main__":
312
+ logger.info("Starting the application")
313
  demo.launch()