Spaces:
Sleeping
Sleeping
YanBoChen
commited on
Commit
Β·
5b7c9f8
1
Parent(s):
e84171b
Add dual task processing method for medical query analysis in llm_Med42_70BClient and update user_prompt to utilize it
Browse files- src/llm_clients.py +110 -0
- src/user_prompt.py +4 -4
src/llm_clients.py
CHANGED
@@ -273,6 +273,116 @@ class llm_Med42_70BClient:
|
|
273 |
'latency': latency # Include latency even for error cases
|
274 |
}
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
def extract_medical_keywords_for_customization(
|
277 |
self,
|
278 |
query: str,
|
|
|
273 |
'latency': latency # Include latency even for error cases
|
274 |
}
|
275 |
|
276 |
+
def analyze_medical_query_dual_task(
|
277 |
+
self,
|
278 |
+
user_query: str,
|
279 |
+
max_tokens: int = 100,
|
280 |
+
timeout: Optional[float] = None
|
281 |
+
) -> Dict[str, Union[str, float]]:
|
282 |
+
"""
|
283 |
+
Analyze medical query with dual task processing (Level 2+4 Combined).
|
284 |
+
|
285 |
+
Performs both condition extraction and medical query validation in single LLM call.
|
286 |
+
Specifically designed for user_prompt.py Level 2+4 combined processing.
|
287 |
+
|
288 |
+
Args:
|
289 |
+
user_query: Original user medical query (not wrapped prompt)
|
290 |
+
max_tokens: Maximum tokens to generate
|
291 |
+
timeout: Specific API call timeout
|
292 |
+
|
293 |
+
Returns:
|
294 |
+
Dict containing dual task results with structured format
|
295 |
+
"""
|
296 |
+
import time
|
297 |
+
|
298 |
+
# Start timing
|
299 |
+
start_time = time.time()
|
300 |
+
|
301 |
+
try:
|
302 |
+
self.logger.info(f"Calling Medical LLM (Dual Task) with query: {user_query}")
|
303 |
+
|
304 |
+
# Prepare chat completion request with dual task system prompt
|
305 |
+
response = self.client.chat.completions.create(
|
306 |
+
model="m42-health/Llama3-Med42-70B",
|
307 |
+
messages=[
|
308 |
+
{
|
309 |
+
"role": "system",
|
310 |
+
"content": """Medical Query Analysis - Dual Task Processing:
|
311 |
+
|
312 |
+
1. Extract primary medical condition (if specific condition identifiable)
|
313 |
+
2. Determine if this is a medical-related query
|
314 |
+
|
315 |
+
RESPONSE FORMAT:
|
316 |
+
MEDICAL: YES/NO
|
317 |
+
CONDITION: [specific condition name or "NONE"]
|
318 |
+
CONFIDENCE: [0.1-1.0]
|
319 |
+
|
320 |
+
EXAMPLES:
|
321 |
+
- "chest pain and shortness of breath" β MEDICAL: YES, CONDITION: Acute Coronary Syndrome, CONFIDENCE: 0.9
|
322 |
+
- "how to cook pasta safely" β MEDICAL: NO, CONDITION: NONE, CONFIDENCE: 0.95
|
323 |
+
- "persistent headache treatment options" β MEDICAL: YES, CONDITION: Headache Disorder, CONFIDENCE: 0.8
|
324 |
+
- "feeling unwell lately" β MEDICAL: YES, CONDITION: NONE, CONFIDENCE: 0.6
|
325 |
+
|
326 |
+
Return ONLY the specified format."""
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"role": "user",
|
330 |
+
"content": user_query
|
331 |
+
}
|
332 |
+
],
|
333 |
+
max_tokens=max_tokens,
|
334 |
+
temperature=0 # Ensure deterministic responses
|
335 |
+
)
|
336 |
+
|
337 |
+
# Calculate latency
|
338 |
+
end_time = time.time()
|
339 |
+
latency = end_time - start_time
|
340 |
+
|
341 |
+
# Extract the response text
|
342 |
+
response_text = response.choices[0].message.content or ""
|
343 |
+
|
344 |
+
# Log raw response and latency
|
345 |
+
self.logger.info(f"Raw LLM Dual Task Response: {response_text}")
|
346 |
+
self.logger.info(f"Dual Task Query Latency: {latency:.4f} seconds")
|
347 |
+
|
348 |
+
# Detect abnormal response
|
349 |
+
if self._is_abnormal_response(response_text):
|
350 |
+
self.logger.error(f"β Abnormal LLM dual task response detected: {response_text[:50]}...")
|
351 |
+
return {
|
352 |
+
'extracted_condition': '',
|
353 |
+
'confidence': '0',
|
354 |
+
'error': 'Abnormal LLM dual task response detected',
|
355 |
+
'raw_response': response_text,
|
356 |
+
'latency': latency
|
357 |
+
}
|
358 |
+
|
359 |
+
# Return structured response for Level 2+4 processing
|
360 |
+
return {
|
361 |
+
'extracted_condition': response_text, # For compatibility with existing logging
|
362 |
+
'confidence': '0.8', # Default confidence for successful dual task
|
363 |
+
'raw_response': response_text, # Contains MEDICAL/CONDITION/CONFIDENCE format
|
364 |
+
'latency': latency,
|
365 |
+
'dual_task_mode': True # Flag to indicate dual task processing
|
366 |
+
}
|
367 |
+
|
368 |
+
except Exception as e:
|
369 |
+
# Calculate latency even for failed requests
|
370 |
+
end_time = time.time()
|
371 |
+
latency = end_time - start_time
|
372 |
+
|
373 |
+
self.logger.error(f"Medical LLM dual task query error: {str(e)}")
|
374 |
+
self.logger.error(f"Error Type: {type(e).__name__}")
|
375 |
+
self.logger.error(f"Dual task query that caused error: {user_query}")
|
376 |
+
|
377 |
+
return {
|
378 |
+
'extracted_condition': '',
|
379 |
+
'confidence': '0',
|
380 |
+
'error': str(e),
|
381 |
+
'raw_response': '',
|
382 |
+
'latency': latency,
|
383 |
+
'dual_task_mode': True
|
384 |
+
}
|
385 |
+
|
386 |
def extract_medical_keywords_for_customization(
|
387 |
self,
|
388 |
query: str,
|
src/user_prompt.py
CHANGED
@@ -249,10 +249,10 @@ Return ONLY the specified format."""
|
|
249 |
|
250 |
logger.info("π€ COMBINED L2+4: Single LLM call for extraction + validation")
|
251 |
|
252 |
-
llama_response = self.llm_client.
|
253 |
-
|
254 |
-
max_tokens=100,
|
255 |
-
timeout=12.0
|
256 |
)
|
257 |
|
258 |
# Get both raw response and extracted condition
|
|
|
249 |
|
250 |
logger.info("π€ COMBINED L2+4: Single LLM call for extraction + validation")
|
251 |
|
252 |
+
llama_response = self.llm_client.analyze_medical_query_dual_task(
|
253 |
+
user_query=user_query, # Direct original query, not wrapped prompt
|
254 |
+
max_tokens=100,
|
255 |
+
timeout=12.0
|
256 |
)
|
257 |
|
258 |
# Get both raw response and extracted condition
|