benardo0 commited on
Commit
b1de9b2
·
verified ·
1 Parent(s): c8d430c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -80
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from typing import List, Optional, Dict
4
  import gradio as gr
@@ -7,9 +7,15 @@ from enum import Enum
7
  import re
8
  import os
9
  import time
 
10
  from huggingface_hub import hf_hub_download
11
 
12
- # We'll import llama_cpp in a way that provides better error messages
 
 
 
 
 
13
  try:
14
  from llama_cpp import Llama
15
  LLAMA_IMPORT_ERROR = None
@@ -33,7 +39,7 @@ class ChatResponse(BaseModel):
33
  response: str
34
  finished: bool
35
 
36
- # Standard health assessment questions
37
  HEALTH_ASSESSMENT_QUESTIONS = [
38
  "What are your current symptoms and how long have you been experiencing them?",
39
  "Do you have any pre-existing medical conditions or chronic illnesses?",
@@ -42,6 +48,7 @@ HEALTH_ASSESSMENT_QUESTIONS = [
42
  "Have you had any similar symptoms in the past? If yes, what treatments worked?"
43
  ]
44
 
 
45
  NURSE_OGE_IDENTITY = """
46
  You are Nurse Oge, a medical AI assistant focused on serving patients in Nigeria. Always be empathetic,
47
  professional, and thorough in your assessments. When asked about your identity, explain that you are
@@ -54,19 +61,14 @@ class NurseOgeAssistant:
54
  if LLAMA_IMPORT_ERROR:
55
  raise ImportError(f"Cannot initialize NurseOgeAssistant due to llama_cpp import error: {LLAMA_IMPORT_ERROR}")
56
 
57
- # Download the model file
58
  try:
59
- model_path = hf_hub_download(
60
- repo_id="mradermacher/Llama3-Med42-8B-GGUF",
61
- filename="Llama3-Med42-8B.IQ3_M.gguf",
62
- resume_download=True
63
- )
64
-
65
- # Initialize the model with the downloaded file
66
- self.llm = Llama(
67
- model_path=model_path,
68
- n_ctx=2048, # Context window
69
- n_threads=4 # Number of CPU threads to use
70
  )
71
 
72
  except Exception as e:
@@ -75,8 +77,6 @@ class NurseOgeAssistant:
75
  self.consultation_states = {}
76
  self.gathered_info = {}
77
 
78
- # ... (rest of the NurseOgeAssistant class methods remain the same)
79
-
80
  def _is_identity_question(self, message: str) -> bool:
81
  identity_patterns = [
82
  r"who are you",
@@ -107,72 +107,94 @@ class NurseOgeAssistant:
107
  return None
108
 
109
  async def process_message(self, conversation_id: str, message: str, history: List[Dict]) -> ChatResponse:
110
- # Initialize state if new conversation
111
- if conversation_id not in self.consultation_states:
112
- self.consultation_states[conversation_id] = ConsultationState.INITIAL
113
-
114
- # Handle identity questions
115
- if self._is_identity_question(message):
116
- return ChatResponse(
117
- response="I am Nurse Oge, a medical AI assistant dedicated to helping patients in Nigeria. "
118
- "I'm here to provide medical guidance while ensuring I gather all necessary health information "
119
- "for accurate assessments.",
120
- finished=True
121
- )
122
 
123
- # Handle location questions
124
- if self._is_location_question(message):
125
- return ChatResponse(
126
- response="I am based in Nigeria and specifically trained to serve Nigerian communities, "
127
- "taking into account local healthcare contexts and needs.",
128
- finished=True
129
- )
 
130
 
131
- # Start health assessment if it's a medical query
132
- if self.consultation_states[conversation_id] == ConsultationState.INITIAL:
133
- self.consultation_states[conversation_id] = ConsultationState.GATHERING_INFO
134
- next_question = self._get_next_assessment_question(conversation_id)
135
- return ChatResponse(
136
- response=f"Before I can provide any medical advice, I need to gather some important health information. "
137
- f"{next_question}",
138
- finished=False
139
- )
140
 
141
- # Continue gathering information
142
- if self.consultation_states[conversation_id] == ConsultationState.GATHERING_INFO:
143
- self.gathered_info[conversation_id].append(message)
144
- next_question = self._get_next_assessment_question(conversation_id)
145
-
146
- if next_question:
147
  return ChatResponse(
148
- response=f"Thank you for that information. {next_question}",
 
149
  finished=False
150
  )
151
- else:
152
- self.consultation_states[conversation_id] = ConsultationState.DIAGNOSIS
153
- context = "\n".join([
154
- f"Q: {q}\nA: {a}" for q, a in
155
- zip(HEALTH_ASSESSMENT_QUESTIONS, self.gathered_info[conversation_id])
156
- ])
157
-
158
- messages = [
159
- {"role": "system", "content": NURSE_OGE_IDENTITY},
160
- {"role": "user", "content": f"Based on the following patient information, provide a thorough assessment and recommendations:\n\n{context}\n\nOriginal query: {message}"}
161
- ]
162
-
163
- response = self.llm.create_chat_completion(
164
- messages=messages,
165
- max_tokens=1024,
166
- temperature=0.7
167
- )
168
-
169
- self.consultation_states[conversation_id] = ConsultationState.INITIAL
170
- self.gathered_info[conversation_id] = []
171
 
172
- return ChatResponse(
173
- response=response['choices'][0]['message']['content'],
174
- finished=True
175
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  # Initialize FastAPI
178
  app = FastAPI()
@@ -180,6 +202,14 @@ app = FastAPI()
180
  # Create a global variable for our assistant
181
  nurse_oge = None
182
 
 
 
 
 
 
 
 
 
183
  @app.on_event("startup")
184
  async def startup_event():
185
  global nurse_oge
@@ -187,7 +217,10 @@ async def startup_event():
187
  nurse_oge = NurseOgeAssistant()
188
  except Exception as e:
189
  print(f"Failed to initialize NurseOgeAssistant: {e}")
190
- # We'll continue running but the /chat endpoint will return errors
 
 
 
191
 
192
  @app.post("/chat")
193
  async def chat_endpoint(request: ChatRequest):
@@ -197,15 +230,13 @@ async def chat_endpoint(request: ChatRequest):
197
  detail="The medical assistant is not available at the moment. Please try again later."
198
  )
199
 
200
- conversation_id = "default"
201
-
202
  if not request.messages:
203
  raise HTTPException(status_code=400, detail="No messages provided")
204
 
205
  latest_message = request.messages[-1].content
206
 
207
  response = await nurse_oge.process_message(
208
- conversation_id=conversation_id,
209
  message=latest_message,
210
  history=request.messages[:-1]
211
  )
@@ -220,6 +251,7 @@ def gradio_chat(message, history):
220
  response = nurse_oge.process_message("gradio_user", message, history)
221
  return response.response
222
 
 
223
  demo = gr.ChatInterface(
224
  fn=gradio_chat,
225
  title="Nurse Oge",
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
  from pydantic import BaseModel
3
  from typing import List, Optional, Dict
4
  import gradio as gr
 
7
  import re
8
  import os
9
  import time
10
+ import gc
11
  from huggingface_hub import hf_hub_download
12
 
13
+ # Environment variables for configuration
14
+ MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
15
+ MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q4_K_M.gguf")
16
+ N_THREADS = int(os.getenv("N_THREADS", "4"))
17
+
18
+ # Import llama_cpp with error handling for better debugging
19
  try:
20
  from llama_cpp import Llama
21
  LLAMA_IMPORT_ERROR = None
 
39
  response: str
40
  finished: bool
41
 
42
+ # Standard health assessment questions for thorough patient evaluation
43
  HEALTH_ASSESSMENT_QUESTIONS = [
44
  "What are your current symptoms and how long have you been experiencing them?",
45
  "Do you have any pre-existing medical conditions or chronic illnesses?",
 
48
  "Have you had any similar symptoms in the past? If yes, what treatments worked?"
49
  ]
50
 
51
+ # Define the AI assistant's identity and role
52
  NURSE_OGE_IDENTITY = """
53
  You are Nurse Oge, a medical AI assistant focused on serving patients in Nigeria. Always be empathetic,
54
  professional, and thorough in your assessments. When asked about your identity, explain that you are
 
61
  if LLAMA_IMPORT_ERROR:
62
  raise ImportError(f"Cannot initialize NurseOgeAssistant due to llama_cpp import error: {LLAMA_IMPORT_ERROR}")
63
 
 
64
  try:
65
+ # Initialize the model using from_pretrained for better compatibility with free tier
66
+ self.llm = Llama.from_pretrained(
67
+ repo_id=MODEL_REPO_ID,
68
+ filename=MODEL_FILENAME,
69
+ n_ctx=2048, # Context window size
70
+ n_threads=N_THREADS, # Adjust based on available CPU resources
71
+ n_gpu_layers=0 # CPU-only inference for free tier
 
 
 
 
72
  )
73
 
74
  except Exception as e:
 
77
  self.consultation_states = {}
78
  self.gathered_info = {}
79
 
 
 
80
  def _is_identity_question(self, message: str) -> bool:
81
  identity_patterns = [
82
  r"who are you",
 
107
  return None
108
 
109
  async def process_message(self, conversation_id: str, message: str, history: List[Dict]) -> ChatResponse:
110
+ try:
111
+ # Initialize state for new conversations
112
+ if conversation_id not in self.consultation_states:
113
+ self.consultation_states[conversation_id] = ConsultationState.INITIAL
 
 
 
 
 
 
 
 
114
 
115
+ # Handle identity questions
116
+ if self._is_identity_question(message):
117
+ return ChatResponse(
118
+ response="I am Nurse Oge, a medical AI assistant dedicated to helping patients in Nigeria. "
119
+ "I'm here to provide medical guidance while ensuring I gather all necessary health information "
120
+ "for accurate assessments.",
121
+ finished=True
122
+ )
123
 
124
+ # Handle location questions
125
+ if self._is_location_question(message):
126
+ return ChatResponse(
127
+ response="I am based in Nigeria and specifically trained to serve Nigerian communities, "
128
+ "taking into account local healthcare contexts and needs.",
129
+ finished=True
130
+ )
 
 
131
 
132
+ # Start health assessment for medical queries
133
+ if self.consultation_states[conversation_id] == ConsultationState.INITIAL:
134
+ self.consultation_states[conversation_id] = ConsultationState.GATHERING_INFO
135
+ next_question = self._get_next_assessment_question(conversation_id)
 
 
136
  return ChatResponse(
137
+ response=f"Before I can provide any medical advice, I need to gather some important health information. "
138
+ f"{next_question}",
139
  finished=False
140
  )
141
+
142
+ # Continue gathering information
143
+ if self.consultation_states[conversation_id] == ConsultationState.GATHERING_INFO:
144
+ self.gathered_info[conversation_id].append(message)
145
+ next_question = self._get_next_assessment_question(conversation_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ if next_question:
148
+ return ChatResponse(
149
+ response=f"Thank you for that information. {next_question}",
150
+ finished=False
151
+ )
152
+ else:
153
+ self.consultation_states[conversation_id] = ConsultationState.DIAGNOSIS
154
+ context = "\n".join([
155
+ f"Q: {q}\nA: {a}" for q, a in
156
+ zip(HEALTH_ASSESSMENT_QUESTIONS, self.gathered_info[conversation_id])
157
+ ])
158
+
159
+ messages = [
160
+ {"role": "system", "content": NURSE_OGE_IDENTITY},
161
+ {"role": "user", "content": f"Based on the following patient information, provide a thorough assessment and recommendations:\n\n{context}\n\nOriginal query: {message}"}
162
+ ]
163
+
164
+ # Implement retry logic for API calls
165
+ max_retries = 3
166
+ retry_delay = 2
167
+
168
+ for attempt in range(max_retries):
169
+ try:
170
+ response = self.llm.create_chat_completion(
171
+ messages=messages,
172
+ max_tokens=512, # Reduced for free tier
173
+ temperature=0.7
174
+ )
175
+ break
176
+ except Exception as e:
177
+ if attempt < max_retries - 1:
178
+ time.sleep(retry_delay)
179
+ continue
180
+ return ChatResponse(
181
+ response="I'm sorry, I'm experiencing some technical difficulties. Please try again in a moment.",
182
+ finished=True
183
+ )
184
+
185
+ self.consultation_states[conversation_id] = ConsultationState.INITIAL
186
+ self.gathered_info[conversation_id] = []
187
+
188
+ return ChatResponse(
189
+ response=response['choices'][0]['message']['content'],
190
+ finished=True
191
+ )
192
+
193
+ except Exception as e:
194
+ return ChatResponse(
195
+ response=f"An error occurred while processing your request. Please try again.",
196
+ finished=True
197
+ )
198
 
199
  # Initialize FastAPI
200
  app = FastAPI()
 
202
  # Create a global variable for our assistant
203
  nurse_oge = None
204
 
205
+ # Add memory management middleware
206
+ @app.middleware("http")
207
+ async def add_memory_management(request: Request, call_next):
208
+ gc.collect() # Force garbage collection before processing request
209
+ response = await call_next(request)
210
+ gc.collect() # Clean up after request
211
+ return response
212
+
213
  @app.on_event("startup")
214
  async def startup_event():
215
  global nurse_oge
 
217
  nurse_oge = NurseOgeAssistant()
218
  except Exception as e:
219
  print(f"Failed to initialize NurseOgeAssistant: {e}")
220
+
221
+ @app.get("/health")
222
+ async def health_check():
223
+ return {"status": "healthy", "model_loaded": nurse_oge is not None}
224
 
225
  @app.post("/chat")
226
  async def chat_endpoint(request: ChatRequest):
 
230
  detail="The medical assistant is not available at the moment. Please try again later."
231
  )
232
 
 
 
233
  if not request.messages:
234
  raise HTTPException(status_code=400, detail="No messages provided")
235
 
236
  latest_message = request.messages[-1].content
237
 
238
  response = await nurse_oge.process_message(
239
+ conversation_id="default",
240
  message=latest_message,
241
  history=request.messages[:-1]
242
  )
 
251
  response = nurse_oge.process_message("gradio_user", message, history)
252
  return response.response
253
 
254
+ # Create and configure Gradio interface
255
  demo = gr.ChatInterface(
256
  fn=gradio_chat,
257
  title="Nurse Oge",