benardo0 commited on
Commit
d40e9bd
·
verified ·
1 Parent(s): 10ffd90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -213
app.py CHANGED
@@ -1,159 +1,11 @@
1
- # import os
2
- # import gradio as gr
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # import torch
5
- # from typing import List, Dict
6
- # import logging
7
-
8
- # # Set up logging to help us debug model loading and inference
9
- # logging.basicConfig(level=logging.INFO)
10
- # logger = logging.getLogger(__name__)
11
-
12
- # class MedicalAssistant:
13
- # def __init__(self):
14
- # """Initialize the medical assistant with model and tokenizer"""
15
- # try:
16
- # logger.info("Starting model initialization...")
17
-
18
- # # Model configuration - adjust these based on your available compute
19
- # self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
20
- # self.max_length = 1048
21
- # self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
-
23
- # logger.info(f"Using device: {self.device}")
24
-
25
- # # Load tokenizer first - this is typically faster and can catch issues early
26
- # logger.info("Loading tokenizer...")
27
- # self.tokenizer = AutoTokenizer.from_pretrained(
28
- # self.model_name,
29
- # padding_side="left",
30
- # trust_remote_code=True
31
- # )
32
-
33
- # # Set padding token if not set
34
- # if self.tokenizer.pad_token is None:
35
- # self.tokenizer.pad_token = self.tokenizer.eos_token
36
-
37
- # # Load model with memory optimizations
38
- # logger.info("Loading model...")
39
- # self.model = AutoModelForCausalLM.from_pretrained(
40
- # self.model_name,
41
- # torch_dtype=torch.float16,
42
- # device_map="auto",
43
- # load_in_8bit=True,
44
- # trust_remote_code=True
45
- # )
46
-
47
- # logger.info("Model initialization completed successfully!")
48
-
49
- # except Exception as e:
50
- # logger.error(f"Error during initialization: {str(e)}")
51
- # raise
52
-
53
- # def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
54
- # """Generate a response to the user's message"""
55
- # try:
56
- # # Prepare the prompt
57
- # system_prompt = """You are a medical AI assistant. Respond to medical queries
58
- # professionally and accurately. If you're unsure, always recommend consulting
59
- # with a healthcare provider."""
60
-
61
- # # Combine system prompt, chat history, and current message
62
- # full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
63
-
64
- # # Tokenize input
65
- # inputs = self.tokenizer(
66
- # full_prompt,
67
- # return_tensors="pt",
68
- # padding=True,
69
- # truncation=True,
70
- # max_length=self.max_length
71
- # ).to(self.device)
72
-
73
- # # Generate response
74
- # with torch.no_grad():
75
- # outputs = self.model.generate(
76
- # **inputs,
77
- # max_new_tokens=512,
78
- # do_sample=True,
79
- # temperature=0.7,
80
- # top_p=0.95,
81
- # pad_token_id=self.tokenizer.pad_token_id,
82
- # repetition_penalty=1.1
83
- # )
84
-
85
- # # Decode and clean up response
86
- # response = self.tokenizer.decode(
87
- # outputs[0],
88
- # skip_special_tokens=True
89
- # )
90
-
91
- # # Extract just the assistant's response
92
- # response = response.split("Assistant:")[-1].strip()
93
-
94
- # return response
95
-
96
- # except Exception as e:
97
- # logger.error(f"Error during response generation: {str(e)}")
98
- # return f"I apologize, but I encountered an error. Please try again."
99
-
100
- # # Initialize the assistant
101
- # assistant = None
102
-
103
- # def initialize_assistant():
104
- # """Initialize the assistant and handle any errors"""
105
- # global assistant
106
- # try:
107
- # assistant = MedicalAssistant()
108
- # return True
109
- # except Exception as e:
110
- # logger.error(f"Failed to initialize assistant: {str(e)}")
111
- # return False
112
-
113
- # def chat_response(message: str, history: List[Dict]):
114
- # """Handle chat messages and return responses"""
115
- # global assistant
116
-
117
- # # Check if assistant is initialized
118
- # if assistant is None:
119
- # if not initialize_assistant():
120
- # return "I apologize, but I'm currently unavailable. Please try again later."
121
-
122
- # try:
123
- # return assistant.generate_response(message, history)
124
- # except Exception as e:
125
- # logger.error(f"Error in chat response: {str(e)}")
126
- # return "I encountered an error. Please try again."
127
-
128
- # # Create Gradio interface
129
- # demo = gr.ChatInterface(
130
- # fn=chat_response,
131
- # title="Medical Assistant (Test Version)",
132
- # description="""This is a test version of the medical assistant.
133
- # Please use it to verify basic functionality.""",
134
- # examples=[
135
- # "What are the symptoms of malaria?",
136
- # "How can I prevent type 2 diabetes?",
137
- # "What should I do for a mild headache?"
138
- # ],
139
- # # retry_btn=None,
140
- # # undo_btn=None,
141
- # # clear_btn="Clear"
142
- # )
143
-
144
- # # Launch the interface
145
- # if __name__ == "__main__":
146
- # demo.launch()
147
-
148
  import os
149
  import gradio as gr
150
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
151
- import torch
152
  from typing import List, Dict
153
  import logging
154
  import traceback
155
 
156
- # Set up logging to help us understand what's happening in our application
157
  logging.basicConfig(
158
  level=logging.INFO,
159
  format='%(asctime)s - %(levelname)s - %(message)s'
@@ -163,40 +15,39 @@ logger = logging.getLogger(__name__)
163
  class MedicalAssistant:
164
  def __init__(self):
165
  """
166
- Initialize a basic medical assistant for CPU-only environments.
167
- This version uses standard model loading without quantization for maximum compatibility.
168
  """
169
  try:
170
- logger.info("Starting basic model initialization...")
171
 
172
- # Define our model configuration
173
  self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
174
  self.max_length = 2048
175
 
176
- # First load the tokenizer since it's lighter on memory
177
  logger.info("Loading tokenizer...")
178
  self.tokenizer = AutoTokenizer.from_pretrained(
179
  self.model_name,
180
- token=os.getenv('HUGGING_FACE_TOKEN')
181
  )
182
 
183
- # Handle padding token setup
184
  if self.tokenizer.pad_token is None:
185
  self.tokenizer.pad_token = self.tokenizer.eos_token
 
 
 
 
 
 
186
 
187
- # Initialize pipeline with basic CPU settings
188
- logger.info("Initializing CPU-based pipeline...")
189
  self.pipe = pipeline(
190
  "text-generation",
191
- model=self.model_name,
192
- token=os.getenv('HUGGING_FACE_TOKEN'),
193
- device_map="cpu", # Explicitly use CPU
194
- torch_dtype=torch.float32, # Use standard precision
195
- use_safetensors=True, # Enable safetensors for better memory handling
196
- # Removed all quantization settings
197
  )
198
-
199
- logger.info("Medical Assistant initialized successfully in basic CPU mode!")
200
 
201
  except Exception as e:
202
  logger.error(f"Initialization failed: {str(e)}")
@@ -204,68 +55,38 @@ class MedicalAssistant:
204
  raise
205
 
206
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
207
- """
208
- Generate responses using basic CPU-friendly settings.
209
- This method focuses on stability over speed, using conservative parameters.
210
- """
211
  try:
212
- logger.info("Preparing message for generation")
 
 
213
 
214
- # Create our medical context prompt
215
- system_prompt = """You are a medical AI assistant trained on medical knowledge.
216
- Provide accurate, professional medical guidance while acknowledging limitations.
217
- Always recommend consulting healthcare providers for specific medical advice."""
218
 
219
- # Format our conversation for the model
220
- messages = [
221
- {"role": "system", "content": system_prompt},
222
- {"role": "user", "content": message}
223
- ]
224
-
225
- # Add recent chat history if available
226
- if chat_history:
227
- # Only keep recent history to manage memory
228
- recent_history = chat_history[-2:] # Keep last 2 exchanges
229
- for chat in recent_history:
230
- messages.append({
231
- "role": "user" if chat["role"] == "user" else "assistant",
232
- "content": chat["content"]
233
- })
234
-
235
- logger.info("Generating response with basic settings")
236
-
237
- # Generate with conservative parameters
238
  response = self.pipe(
239
- messages,
240
- max_new_tokens=100, # Conservative token limit
241
  do_sample=True,
242
  temperature=0.7,
243
  top_p=0.95,
244
- num_beams=1, # Single beam for simplicity
245
  pad_token_id=self.tokenizer.pad_token_id
246
  )[0]["generated_text"]
247
 
248
- # Clean up our response
249
- response = response.split("assistant:")[-1].strip()
250
-
251
- logger.info("Response generated successfully")
252
- return response
253
 
254
  except Exception as e:
255
  logger.error(f"Error during response generation: {str(e)}")
256
  logger.error(traceback.format_exc())
257
  return f"I apologize, but I encountered an error: {str(e)}"
258
 
259
- # Initialize our assistant
260
  assistant = None
261
 
262
  def initialize_assistant():
263
- """Initialize the assistant with careful error handling"""
264
  global assistant
265
  try:
266
- logger.info("Attempting to initialize basic CPU assistant")
267
  assistant = MedicalAssistant()
268
- logger.info("Assistant initialized successfully")
269
  return True
270
  except Exception as e:
271
  logger.error(f"Failed to initialize assistant: {str(e)}")
@@ -273,7 +94,6 @@ def initialize_assistant():
273
  return False
274
 
275
  def chat_response(message: str, history: List[Dict]):
276
- """Handle chat interactions with proper error recovery"""
277
  global assistant
278
 
279
  if assistant is None:
@@ -288,12 +108,11 @@ def chat_response(message: str, history: List[Dict]):
288
  logger.error(traceback.format_exc())
289
  return f"I encountered an error: {str(e)}"
290
 
291
- # Create our Gradio interface
292
  demo = gr.ChatInterface(
293
  fn=chat_response,
294
- title="Medical Assistant (Basic CPU Version)",
295
- description="""This medical assistant provides medical guidance using a basic CPU configuration.
296
- Responses may take longer but will be stable and reliable.""",
297
  examples=[
298
  "What are the symptoms of malaria?",
299
  "How can I prevent type 2 diabetes?",
@@ -301,7 +120,7 @@ demo = gr.ChatInterface(
301
  ]
302
  )
303
 
304
- # Launch our interface
305
  if __name__ == "__main__":
306
- logger.info("Starting the basic CPU application")
307
  demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
4
  from typing import List, Dict
5
  import logging
6
  import traceback
7
 
8
+ # Set up basic logging
9
  logging.basicConfig(
10
  level=logging.INFO,
11
  format='%(asctime)s - %(levelname)s - %(message)s'
 
15
  class MedicalAssistant:
16
  def __init__(self):
17
  """
18
+ Initialize the medical assistant with the pre-quantized model.
19
+ Designed for CPU-only environment on Hugging Face's free tier.
20
  """
21
  try:
22
+ logger.info("Starting model initialization...")
23
 
24
+ # Using the pre-quantized model - no need for additional quantization
25
  self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
26
  self.max_length = 2048
27
 
 
28
  logger.info("Loading tokenizer...")
29
  self.tokenizer = AutoTokenizer.from_pretrained(
30
  self.model_name,
31
+ trust_remote_code=True
32
  )
33
 
 
34
  if self.tokenizer.pad_token is None:
35
  self.tokenizer.pad_token = self.tokenizer.eos_token
36
+
37
+ logger.info("Loading model...")
38
+ self.model = AutoModelForCausalLM.from_pretrained(
39
+ self.model_name,
40
+ trust_remote_code=True
41
+ )
42
 
43
+ logger.info("Creating pipeline...")
 
44
  self.pipe = pipeline(
45
  "text-generation",
46
+ model=self.model,
47
+ tokenizer=self.tokenizer
 
 
 
 
48
  )
49
+
50
+ logger.info("Initialization completed successfully!")
51
 
52
  except Exception as e:
53
  logger.error(f"Initialization failed: {str(e)}")
 
55
  raise
56
 
57
  def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
 
 
 
 
58
  try:
59
+ system_prompt = """You are a medical AI assistant. Provide accurate,
60
+ professional medical guidance. Always recommend consulting healthcare
61
+ providers for specific medical advice."""
62
 
63
+ prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
 
 
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  response = self.pipe(
66
+ prompt,
67
+ max_new_tokens=256,
68
  do_sample=True,
69
  temperature=0.7,
70
  top_p=0.95,
71
+ num_return_sequences=1,
72
  pad_token_id=self.tokenizer.pad_token_id
73
  )[0]["generated_text"]
74
 
75
+ return response.split("Assistant:")[-1].strip()
 
 
 
 
76
 
77
  except Exception as e:
78
  logger.error(f"Error during response generation: {str(e)}")
79
  logger.error(traceback.format_exc())
80
  return f"I apologize, but I encountered an error: {str(e)}"
81
 
82
+ # Global assistant instance
83
  assistant = None
84
 
85
  def initialize_assistant():
 
86
  global assistant
87
  try:
88
+ logger.info("Attempting to initialize assistant")
89
  assistant = MedicalAssistant()
 
90
  return True
91
  except Exception as e:
92
  logger.error(f"Failed to initialize assistant: {str(e)}")
 
94
  return False
95
 
96
  def chat_response(message: str, history: List[Dict]):
 
97
  global assistant
98
 
99
  if assistant is None:
 
108
  logger.error(traceback.format_exc())
109
  return f"I encountered an error: {str(e)}"
110
 
111
+ # Create the Gradio interface
112
  demo = gr.ChatInterface(
113
  fn=chat_response,
114
+ title="NURSEOGE",
115
+ description="This medical assistant provides guidance and information about health-related queries.",
 
116
  examples=[
117
  "What are the symptoms of malaria?",
118
  "How can I prevent type 2 diabetes?",
 
120
  ]
121
  )
122
 
123
+ # Launch the interface
124
  if __name__ == "__main__":
125
+ logger.info("Starting the application")
126
  demo.launch()