YanBoChen commited on
Commit
c414f60
·
1 Parent(s): 37c6713

🚀 Implement Advanced Condition Extraction for Medical Query Processing

Browse files

## 主要變更
- 新增 `src/medical_conditions.py`:集中管理醫學條件和關鍵詞配置
- 更新 `src/user_prompt.py`:實現多層 Fallback 的 Condition Extraction 機制

## 新增文件
- `src/medical_conditions.py`
- 集中醫學條件映射
- 提供條件關鍵詞查詢函數
- 支持條件驗證和詳細信息檢索

- `src/user_prompt.py`
- 實現四層 Condition Extraction 策略
- 支持預定義映射、Meditron 提取
- 添加語義搜索和通用醫學搜索 Fallback

## 參考文檔
- `docs/next/20250729Condition_Conversion_simplified.md`
- `docs/next/20250729Condition_Conversion_more_details.md`
- `docs/next/20250729Test_Retrieval.md`

## 實現特點
- 多層 Fallback 機制
- 靈活的條件提取
- 可擴展的醫學條件配置
- 用戶確認機制

## 性能目標
- 預定義映射:< 10ms
- Meditron 提取:< 2000ms
- 語義搜索:< 1s
- 總響應時間:< 7s

## 下一步
- 完善 Meditron 整合
- 添加更多醫學條件
- 優化語義搜索算法

Signed-off-by: OnCall.ai Team <[email protected]>

Files changed (2) hide show
  1. src/medical_conditions.py +99 -0
  2. src/user_prompt.py +321 -0
src/medical_conditions.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OnCall.ai Medical Conditions Configuration
3
+
4
+ This module provides centralized configuration for:
5
+ 1. Predefined medical conditions
6
+ 2. Condition-to-keyword mappings
7
+ 3. Fallback condition keywords
8
+
9
+ Author: OnCall.ai Team
10
+ Date: 2025-07-29
11
+ """
12
+
13
+ from typing import Dict, Optional
14
+
15
+ # Comprehensive Condition-to-Keyword Mapping
16
+ CONDITION_KEYWORD_MAPPING: Dict[str, Dict[str, str]] = {
17
+ "acute myocardial infarction": {
18
+ "emergency": "MI|chest pain|cardiac arrest",
19
+ "treatment": "aspirin|nitroglycerin|thrombolytic|PCI"
20
+ },
21
+ "acute stroke": {
22
+ "emergency": "stroke|neurological deficit|sudden weakness",
23
+ "treatment": "tPA|thrombolysis|stroke unit care"
24
+ },
25
+ "pulmonary embolism": {
26
+ "emergency": "chest pain|shortness of breath|sudden dyspnea",
27
+ "treatment": "anticoagulation|heparin|embolectomy"
28
+ },
29
+ # 從 @20250729Test_Retrieval.md 擴展的條件
30
+ "acute_ischemic_stroke": {
31
+ "emergency": "ischemic stroke|neurological deficit",
32
+ "treatment": "tPA|stroke unit management"
33
+ },
34
+ "hemorrhagic_stroke": {
35
+ "emergency": "hemorrhagic stroke|intracranial bleeding",
36
+ "treatment": "blood pressure control|neurosurgery"
37
+ },
38
+ "transient_ischemic_attack": {
39
+ "emergency": "TIA|temporary stroke symptoms",
40
+ "treatment": "antiplatelet|lifestyle modification"
41
+ },
42
+ "acute_coronary_syndrome": {
43
+ "emergency": "ACS|chest pain|ECG changes",
44
+ "treatment": "antiplatelet|statins|cardiac monitoring"
45
+ }
46
+ }
47
+
48
+ # Fallback Condition Keywords
49
+ FALLBACK_CONDITION_KEYWORDS: Dict[str, str] = {
50
+ "acute_ischemic_stroke": "acute ischemic stroke treatment",
51
+ "hemorrhagic_stroke": "hemorrhagic stroke management",
52
+ "transient_ischemic_attack": "TIA treatment protocol",
53
+ "acute_coronary_syndrome": "ACS treatment guidelines",
54
+ "stable_angina": "stable angina management",
55
+ "non_cardiac_chest_pain": "non-cardiac chest pain evaluation",
56
+ "witnessed_cardiac_arrest": "witnessed cardiac arrest protocol",
57
+ "unwitnessed_cardiac_arrest": "unwitnessed cardiac arrest management",
58
+ "post_resuscitation_care": "post-resuscitation care guidelines"
59
+ }
60
+
61
+ def get_condition_keywords(specific_condition: str) -> Optional[str]:
62
+ """
63
+ Retrieve fallback keywords for a specific condition
64
+
65
+ Args:
66
+ specific_condition: Medical condition name
67
+
68
+ Returns:
69
+ Corresponding keywords or the original condition
70
+ """
71
+ return FALLBACK_CONDITION_KEYWORDS.get(specific_condition, specific_condition)
72
+
73
+ def validate_condition(condition: str) -> bool:
74
+ """
75
+ Check if a condition exists in our predefined mapping
76
+
77
+ Args:
78
+ condition: Medical condition to validate
79
+
80
+ Returns:
81
+ Boolean indicating condition validity
82
+ """
83
+ return condition.lower() in {k.lower() for k in CONDITION_KEYWORD_MAPPING.keys()}
84
+
85
+ def get_condition_details(condition: str) -> Optional[Dict[str, str]]:
86
+ """
87
+ Retrieve detailed information for a specific condition
88
+
89
+ Args:
90
+ condition: Medical condition name
91
+
92
+ Returns:
93
+ Dict with emergency and treatment keywords, or None
94
+ """
95
+ normalized_condition = condition.lower()
96
+ for key, value in CONDITION_KEYWORD_MAPPING.items():
97
+ if key.lower() == normalized_condition:
98
+ return value
99
+ return None
src/user_prompt.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OnCall.ai User Prompt Processing Module
3
+
4
+ This module handles:
5
+ 1. Condition extraction from user queries
6
+ 2. Keyword mapping
7
+ 3. User confirmation workflow
8
+ 4. Fallback mechanisms
9
+
10
+ Author: OnCall.ai Team
11
+ Date: 2025-07-29
12
+ """
13
+
14
+ import logging
15
+ from typing import Dict, Optional, Any, List
16
+ from sentence_transformers import SentenceTransformer
17
+ import numpy as np # Added missing import for numpy
18
+
19
+ # Import our centralized medical conditions configuration
20
+ from medical_conditions import (
21
+ CONDITION_KEYWORD_MAPPING,
22
+ get_condition_keywords,
23
+ validate_condition
24
+ )
25
+
26
+ # Configure logging
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+ class UserPromptProcessor:
34
+ def __init__(self, meditron_client=None, retrieval_system=None):
35
+ """
36
+ Initialize UserPromptProcessor with optional Meditron and retrieval system
37
+
38
+ Args:
39
+ meditron_client: Optional Meditron client for advanced condition extraction
40
+ retrieval_system: Optional retrieval system for semantic search
41
+ """
42
+ self.meditron_client = meditron_client
43
+ self.retrieval_system = retrieval_system
44
+ self.embedding_model = SentenceTransformer("NeuML/pubmedbert-base-embeddings")
45
+ logger.info("UserPromptProcessor initialized")
46
+
47
+ def extract_condition_keywords(self, user_query: str) -> Dict[str, str]:
48
+ """
49
+ Extract condition keywords with multi-level fallback
50
+
51
+ Args:
52
+ user_query: User's medical query
53
+
54
+ Returns:
55
+ Dict with condition and keywords
56
+ """
57
+ # Level 1: Predefined Mapping (Fast Path)
58
+ predefined_result = self._predefined_mapping(user_query)
59
+ if predefined_result:
60
+ return predefined_result
61
+
62
+ # Level 2: Meditron Extraction (if available)
63
+ if self.meditron_client:
64
+ meditron_result = self._extract_with_meditron(user_query)
65
+ if meditron_result:
66
+ return meditron_result
67
+
68
+ # Level 3: Semantic Search Fallback
69
+ semantic_result = self._semantic_search_fallback(user_query)
70
+ if semantic_result:
71
+ return semantic_result
72
+
73
+ # Level 4: Generic Medical Search
74
+ generic_result = self._generic_medical_search(user_query)
75
+ if generic_result:
76
+ return generic_result
77
+
78
+ # No match found
79
+ return {
80
+ 'condition': '',
81
+ 'emergency_keywords': '',
82
+ 'treatment_keywords': ''
83
+ }
84
+
85
+ def _predefined_mapping(self, user_query: str) -> Optional[Dict[str, str]]:
86
+ """
87
+ Fast predefined condition mapping
88
+
89
+ Args:
90
+ user_query: User's medical query
91
+
92
+ Returns:
93
+ Mapped condition keywords or None
94
+ """
95
+ query_lower = user_query.lower()
96
+
97
+ for condition, mappings in CONDITION_KEYWORD_MAPPING.items():
98
+ if condition.lower() in query_lower:
99
+ logger.info(f"Matched predefined condition: {condition}")
100
+ return {
101
+ 'condition': condition,
102
+ 'emergency_keywords': mappings['emergency'],
103
+ 'treatment_keywords': mappings['treatment']
104
+ }
105
+
106
+ return None
107
+
108
+ def _extract_with_meditron(self, user_query: str) -> Optional[Dict[str, str]]:
109
+ """
110
+ Use Meditron for advanced condition extraction
111
+
112
+ Args:
113
+ user_query: User's medical query
114
+
115
+ Returns:
116
+ Dict with condition and keywords, or None
117
+ """
118
+ if not self.meditron_client:
119
+ return None
120
+
121
+ try:
122
+ meditron_response = self.meditron_client.analyze_medical_query(
123
+ query=user_query,
124
+ max_tokens=100,
125
+ timeout=2.0
126
+ )
127
+
128
+ extracted_condition = meditron_response.get('extracted_condition', '')
129
+
130
+ if extracted_condition and validate_condition(extracted_condition):
131
+ condition_details = get_condition_keywords(extracted_condition)
132
+ return {
133
+ 'condition': extracted_condition,
134
+ 'emergency_keywords': condition_details.get('emergency', ''),
135
+ 'treatment_keywords': condition_details.get('treatment', '')
136
+ }
137
+
138
+ return None
139
+
140
+ except Exception as e:
141
+ logger.error(f"Meditron condition extraction error: {e}")
142
+ return None
143
+
144
+ def _semantic_search_fallback(self, user_query: str) -> Optional[Dict[str, str]]:
145
+ """
146
+ Perform semantic search for condition extraction
147
+
148
+ Args:
149
+ user_query: User's medical query
150
+
151
+ Returns:
152
+ Dict with condition and keywords, or None
153
+ """
154
+ if not self.retrieval_system:
155
+ return None
156
+
157
+ try:
158
+ # Perform semantic search on sliding window chunks
159
+ semantic_results = self.retrieval_system.search_sliding_window_chunks(user_query)
160
+
161
+ if semantic_results:
162
+ # Extract condition from top semantic result
163
+ top_result = semantic_results[0]
164
+ condition = self._infer_condition_from_text(top_result['text'])
165
+
166
+ if condition and validate_condition(condition):
167
+ condition_details = get_condition_keywords(condition)
168
+ return {
169
+ 'condition': condition,
170
+ 'emergency_keywords': condition_details.get('emergency', ''),
171
+ 'treatment_keywords': condition_details.get('treatment', ''),
172
+ 'semantic_confidence': top_result.get('distance', 0)
173
+ }
174
+
175
+ return None
176
+
177
+ except Exception as e:
178
+ logger.error(f"Semantic search fallback error: {e}")
179
+ return None
180
+
181
+ def _generic_medical_search(self, user_query: str) -> Optional[Dict[str, str]]:
182
+ """
183
+ Perform generic medical search as final fallback
184
+
185
+ Args:
186
+ user_query: User's medical query
187
+
188
+ Returns:
189
+ Dict with generic medical keywords
190
+ """
191
+ generic_medical_terms = [
192
+ "medical", "treatment", "management", "protocol",
193
+ "guidelines", "emergency", "acute", "chronic"
194
+ ]
195
+
196
+ generic_query = f"{user_query} medical treatment"
197
+
198
+ try:
199
+ # Perform generic medical search
200
+ generic_results = self.retrieval_system.search_generic_medical_content(generic_query)
201
+
202
+ if generic_results:
203
+ return {
204
+ 'condition': 'generic medical query',
205
+ 'emergency_keywords': 'medical|emergency',
206
+ 'treatment_keywords': 'treatment|management',
207
+ 'generic_confidence': 0.5
208
+ }
209
+
210
+ return None
211
+
212
+ except Exception as e:
213
+ logger.error(f"Generic medical search error: {e}")
214
+ return None
215
+
216
+ def _infer_condition_from_text(self, text: str) -> Optional[str]:
217
+ """
218
+ Infer medical condition from text using embedding similarity
219
+
220
+ Args:
221
+ text: Input medical text
222
+
223
+ Returns:
224
+ Inferred condition or None
225
+ """
226
+ # Implement a simple condition inference using embedding similarity
227
+ # This is a placeholder and would need more sophisticated implementation
228
+ conditions = list(CONDITION_KEYWORD_MAPPING.keys())
229
+ text_embedding = self.embedding_model.encode(text)
230
+ condition_embeddings = [self.embedding_model.encode(condition) for condition in conditions]
231
+
232
+ similarities = [
233
+ np.dot(text_embedding, condition_emb) /
234
+ (np.linalg.norm(text_embedding) * np.linalg.norm(condition_emb))
235
+ for condition_emb in condition_embeddings
236
+ ]
237
+
238
+ max_similarity_index = np.argmax(similarities)
239
+ return conditions[max_similarity_index] if similarities[max_similarity_index] > 0.7 else None
240
+
241
+ def validate_keywords(self, keywords: Dict[str, str]) -> bool:
242
+ """
243
+ Validate if extracted keywords exist in our medical indices
244
+
245
+ Args:
246
+ keywords: Dict of emergency and treatment keywords
247
+
248
+ Returns:
249
+ Boolean indicating keyword validity
250
+ """
251
+ emergency_kws = keywords.get('emergency_keywords', '').split('|')
252
+ treatment_kws = keywords.get('treatment_keywords', '').split('|')
253
+
254
+ # Basic validation: check if any keyword is non-empty
255
+ return any(kw.strip() for kw in emergency_kws + treatment_kws)
256
+
257
+ def handle_user_confirmation(self, extracted_info: Dict[str, str]) -> Dict[str, Any]:
258
+ """
259
+ Handle user confirmation for extracted condition and keywords
260
+
261
+ Args:
262
+ extracted_info: Dict with condition and keyword information
263
+
264
+ Returns:
265
+ Dict with confirmation status and options
266
+ """
267
+ # If no condition found, request user to rephrase
268
+ if not extracted_info.get('condition'):
269
+ return {
270
+ 'type': 'rephrase_needed',
271
+ 'message': "Could not identify a specific medical condition. Please rephrase your query.",
272
+ 'suggestions': [
273
+ "Try: 'how to treat chest pain'",
274
+ "Try: 'acute stroke management'",
275
+ "Try: 'pulmonary embolism treatment'"
276
+ ]
277
+ }
278
+
279
+ # Prepare confirmation message
280
+ confirmation_message = f"""
281
+ I understand you're asking about: "{extracted_info.get('condition', 'Unknown Condition')}"
282
+
283
+ Extracted Keywords:
284
+ - Emergency: {extracted_info.get('emergency_keywords', 'None')}
285
+ - Treatment: {extracted_info.get('treatment_keywords', 'None')}
286
+
287
+ Please confirm:
288
+ 1) Yes, proceed with search
289
+ 2) No, please rephrase my query
290
+ 3) Modify keywords
291
+ """
292
+
293
+ return {
294
+ 'type': 'confirmation_needed',
295
+ 'message': confirmation_message,
296
+ 'extracted_info': extracted_info
297
+ }
298
+
299
+ def main():
300
+ """
301
+ Example usage and testing of UserPromptProcessor
302
+ """
303
+ processor = UserPromptProcessor()
304
+
305
+ # Test cases
306
+ test_queries = [
307
+ "how to treat acute MI?",
308
+ "patient with stroke symptoms",
309
+ "chest pain and breathing difficulty"
310
+ ]
311
+
312
+ for query in test_queries:
313
+ print(f"\nQuery: {query}")
314
+ result = processor.extract_condition_keywords(query)
315
+ print("Extracted Keywords:", result)
316
+
317
+ confirmation = processor.handle_user_confirmation(result)
318
+ print("Confirmation:", confirmation['message'])
319
+
320
+ if __name__ == "__main__":
321
+ main()