YanBoChen commited on
Commit
5b7c9f8
Β·
1 Parent(s): e84171b

Add dual task processing method for medical query analysis in llm_Med42_70BClient and update user_prompt to utilize it

Browse files
Files changed (2) hide show
  1. src/llm_clients.py +110 -0
  2. src/user_prompt.py +4 -4
src/llm_clients.py CHANGED
@@ -273,6 +273,116 @@ class llm_Med42_70BClient:
273
  'latency': latency # Include latency even for error cases
274
  }
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def extract_medical_keywords_for_customization(
277
  self,
278
  query: str,
 
273
  'latency': latency # Include latency even for error cases
274
  }
275
 
276
+ def analyze_medical_query_dual_task(
277
+ self,
278
+ user_query: str,
279
+ max_tokens: int = 100,
280
+ timeout: Optional[float] = None
281
+ ) -> Dict[str, Union[str, float]]:
282
+ """
283
+ Analyze medical query with dual task processing (Level 2+4 Combined).
284
+
285
+ Performs both condition extraction and medical query validation in single LLM call.
286
+ Specifically designed for user_prompt.py Level 2+4 combined processing.
287
+
288
+ Args:
289
+ user_query: Original user medical query (not wrapped prompt)
290
+ max_tokens: Maximum tokens to generate
291
+ timeout: Specific API call timeout
292
+
293
+ Returns:
294
+ Dict containing dual task results with structured format
295
+ """
296
+ import time
297
+
298
+ # Start timing
299
+ start_time = time.time()
300
+
301
+ try:
302
+ self.logger.info(f"Calling Medical LLM (Dual Task) with query: {user_query}")
303
+
304
+ # Prepare chat completion request with dual task system prompt
305
+ response = self.client.chat.completions.create(
306
+ model="m42-health/Llama3-Med42-70B",
307
+ messages=[
308
+ {
309
+ "role": "system",
310
+ "content": """Medical Query Analysis - Dual Task Processing:
311
+
312
+ 1. Extract primary medical condition (if specific condition identifiable)
313
+ 2. Determine if this is a medical-related query
314
+
315
+ RESPONSE FORMAT:
316
+ MEDICAL: YES/NO
317
+ CONDITION: [specific condition name or "NONE"]
318
+ CONFIDENCE: [0.1-1.0]
319
+
320
+ EXAMPLES:
321
+ - "chest pain and shortness of breath" β†’ MEDICAL: YES, CONDITION: Acute Coronary Syndrome, CONFIDENCE: 0.9
322
+ - "how to cook pasta safely" β†’ MEDICAL: NO, CONDITION: NONE, CONFIDENCE: 0.95
323
+ - "persistent headache treatment options" β†’ MEDICAL: YES, CONDITION: Headache Disorder, CONFIDENCE: 0.8
324
+ - "feeling unwell lately" β†’ MEDICAL: YES, CONDITION: NONE, CONFIDENCE: 0.6
325
+
326
+ Return ONLY the specified format."""
327
+ },
328
+ {
329
+ "role": "user",
330
+ "content": user_query
331
+ }
332
+ ],
333
+ max_tokens=max_tokens,
334
+ temperature=0 # Ensure deterministic responses
335
+ )
336
+
337
+ # Calculate latency
338
+ end_time = time.time()
339
+ latency = end_time - start_time
340
+
341
+ # Extract the response text
342
+ response_text = response.choices[0].message.content or ""
343
+
344
+ # Log raw response and latency
345
+ self.logger.info(f"Raw LLM Dual Task Response: {response_text}")
346
+ self.logger.info(f"Dual Task Query Latency: {latency:.4f} seconds")
347
+
348
+ # Detect abnormal response
349
+ if self._is_abnormal_response(response_text):
350
+ self.logger.error(f"❌ Abnormal LLM dual task response detected: {response_text[:50]}...")
351
+ return {
352
+ 'extracted_condition': '',
353
+ 'confidence': '0',
354
+ 'error': 'Abnormal LLM dual task response detected',
355
+ 'raw_response': response_text,
356
+ 'latency': latency
357
+ }
358
+
359
+ # Return structured response for Level 2+4 processing
360
+ return {
361
+ 'extracted_condition': response_text, # For compatibility with existing logging
362
+ 'confidence': '0.8', # Default confidence for successful dual task
363
+ 'raw_response': response_text, # Contains MEDICAL/CONDITION/CONFIDENCE format
364
+ 'latency': latency,
365
+ 'dual_task_mode': True # Flag to indicate dual task processing
366
+ }
367
+
368
+ except Exception as e:
369
+ # Calculate latency even for failed requests
370
+ end_time = time.time()
371
+ latency = end_time - start_time
372
+
373
+ self.logger.error(f"Medical LLM dual task query error: {str(e)}")
374
+ self.logger.error(f"Error Type: {type(e).__name__}")
375
+ self.logger.error(f"Dual task query that caused error: {user_query}")
376
+
377
+ return {
378
+ 'extracted_condition': '',
379
+ 'confidence': '0',
380
+ 'error': str(e),
381
+ 'raw_response': '',
382
+ 'latency': latency,
383
+ 'dual_task_mode': True
384
+ }
385
+
386
  def extract_medical_keywords_for_customization(
387
  self,
388
  query: str,
src/user_prompt.py CHANGED
@@ -249,10 +249,10 @@ Return ONLY the specified format."""
249
 
250
  logger.info("πŸ€– COMBINED L2+4: Single LLM call for extraction + validation")
251
 
252
- llama_response = self.llm_client.analyze_medical_query(
253
- query=combined_prompt,
254
- max_tokens=100, # Keep concise for condition name
255
- timeout=12.0 # Single call timeout
256
  )
257
 
258
  # Get both raw response and extracted condition
 
249
 
250
  logger.info("πŸ€– COMBINED L2+4: Single LLM call for extraction + validation")
251
 
252
+ llama_response = self.llm_client.analyze_medical_query_dual_task(
253
+ user_query=user_query, # Direct original query, not wrapped prompt
254
+ max_tokens=100,
255
+ timeout=12.0
256
  )
257
 
258
  # Get both raw response and extracted condition