Spaces:
Running
on
Zero
Perfekt! Jetzt haben wir die wesentlichen Verbesserungen implementiert:
Browse files🔧 Verbesserte Entity Recognition:
1. Geometric Terms als Stop Words:
- ❌ angle, degree, rotation, position werden nicht mehr als Entitäten erkannt
- ✅ Fokus auf physische Objekte: sensor, conveyor, area
2. Erweiterte Industrielle Klassifizierung:
- ✅ conveyor → industrial (statt unknown)
- ✅ sorting area → spatial
- ✅ sensor → robotic
3. Neue Spatial Relations:
- ✅ towards → spatial_towards
- ✅ Deutsche Unterstützung: richtung, nach, zu
- ✅ facing, against für Orientierung
4. Geometric Parameter Extraction:
- ✅ 45° → {value: 45, unit: 'degrees'}
- ✅ Distances: 10 cm, 5 meter
- ✅ Coordinates: (x, y, z) format
🎯 Ihr Beispiel jetzt:
Input: "Place the sensor above the conveyor, 45° angle towards sorting area"
Expected Output:
- ✅ Entities: sensor (robotic), conveyor (industrial), area (spatial)
- ❌ No more: angle als Entity
- ✅ Relations: above, towards
- ✅ Parameters: {angles: [{value: 45, unit: 'degrees'}]}
Die Verbesserungen sind implementiert und sollten beim nächsten Test viel bessere Ergebnisse
liefern! 🚀
@@ -85,9 +85,11 @@ class RealGASMInterface:
|
|
85 |
'sports': ['ball', 'bat', 'racket', 'stick', 'net', 'goal']
|
86 |
},
|
87 |
'technical_objects': {
|
88 |
-
'robotics': ['robot', 'arm', 'sensor', 'motor', 'actuator', 'controller', 'manipulator'],
|
89 |
-
'scientific': ['detector', 'microscope', 'telescope', 'spectrometer', 'analyzer', 'probe'],
|
90 |
-
'industrial': ['reactor', 'turbine', 'compressor', 'pump', 'valve', 'conveyor', 'assembly', 'platform'
|
|
|
|
|
91 |
'electronic': ['circuit', 'processor', 'memory', 'display', 'antenna', 'battery', 'capacitor']
|
92 |
},
|
93 |
'spatial_objects': {
|
@@ -115,7 +117,9 @@ class RealGASMInterface:
|
|
115 |
self.spatial_relations = {
|
116 |
'links': 'spatial_left', 'rechts': 'spatial_right', 'left': 'spatial_left', 'right': 'spatial_right',
|
117 |
'über': 'spatial_above', 'under': 'spatial_below', 'above': 'spatial_above', 'below': 'spatial_below',
|
118 |
-
'zwischen': 'spatial_between', 'between': 'spatial_between', 'auf': 'spatial_on', 'on': 'spatial_on'
|
|
|
|
|
119 |
}
|
120 |
|
121 |
self.temporal_relations = {
|
@@ -283,14 +287,19 @@ class RealGASMInterface:
|
|
283 |
def _clean_and_deduplicate_entities(self, entities: List[str]) -> List[str]:
|
284 |
"""Clean up and deduplicate entity list"""
|
285 |
|
286 |
-
# Extended stop words
|
287 |
stop_words = {
|
288 |
'der', 'die', 'das', 'und', 'oder', 'aber', 'mit', 'von', 'zu', 'in', 'auf', 'für',
|
289 |
'the', 'and', 'or', 'but', 'with', 'from', 'to', 'in', 'on', 'for', 'of', 'at',
|
290 |
'lies', 'sits', 'stands', 'moves', 'flows', 'rotates', 'begins', 'starts',
|
291 |
'liegt', 'sitzt', 'steht', 'bewegt', 'fließt', 'rotiert', 'beginnt', 'startet',
|
292 |
'while', 'next', 'left', 'right', 'between', 'above', 'below', 'around',
|
293 |
-
'time', 'way', 'thing', 'part', 'case', 'work', 'life', 'world', 'year'
|
|
|
|
|
|
|
|
|
|
|
294 |
}
|
295 |
|
296 |
# Clean and filter
|
@@ -318,9 +327,70 @@ class RealGASMInterface:
|
|
318 |
deduplicated.sort(key=sort_key)
|
319 |
|
320 |
return deduplicated[:15] # Increase limit to 15 entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
def extract_relations_from_text(self, text: str) -> List[Dict]:
|
323 |
-
"""Extract relations from text"""
|
324 |
relations = []
|
325 |
text_lower = text.lower()
|
326 |
|
@@ -335,6 +405,16 @@ class RealGASMInterface:
|
|
335 |
'strength': np.random.uniform(0.6, 0.95)
|
336 |
})
|
337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
return relations
|
339 |
|
340 |
def _initialize_real_gasm(self):
|
@@ -642,14 +722,37 @@ class RealGASMInterface:
|
|
642 |
"""Classify entity type based on semantic content"""
|
643 |
entity_lower = entity.lower()
|
644 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor']):
|
646 |
return 'robotic'
|
|
|
|
|
647 |
elif any(word in entity_lower for word in ['atom', 'electron', 'molecule', 'crystal', 'particle']):
|
648 |
-
return '
|
649 |
elif any(word in entity_lower for word in ['ball', 'table', 'chair', 'book', 'computer']):
|
|
|
|
|
650 |
return 'spatial'
|
651 |
-
elif any(word in entity_lower for word in ['gedanken', 'vertrauen', 'hoffnung', 'zweifel']):
|
652 |
-
return 'abstract'
|
653 |
else:
|
654 |
return 'unknown'
|
655 |
|
|
|
85 |
'sports': ['ball', 'bat', 'racket', 'stick', 'net', 'goal']
|
86 |
},
|
87 |
'technical_objects': {
|
88 |
+
'robotics': ['robot', 'arm', 'sensor', 'motor', 'actuator', 'controller', 'manipulator', 'gripper', 'joint'],
|
89 |
+
'scientific': ['detector', 'microscope', 'telescope', 'spectrometer', 'analyzer', 'probe', 'scanner'],
|
90 |
+
'industrial': ['reactor', 'turbine', 'compressor', 'pump', 'valve', 'conveyor', 'assembly', 'platform',
|
91 |
+
'machine', 'equipment', 'apparatus', 'device', 'unit', 'system', 'installation',
|
92 |
+
'sorting', 'sorter', 'belt', 'line', 'station', 'workstation', 'cell'],
|
93 |
'electronic': ['circuit', 'processor', 'memory', 'display', 'antenna', 'battery', 'capacitor']
|
94 |
},
|
95 |
'spatial_objects': {
|
|
|
117 |
self.spatial_relations = {
|
118 |
'links': 'spatial_left', 'rechts': 'spatial_right', 'left': 'spatial_left', 'right': 'spatial_right',
|
119 |
'über': 'spatial_above', 'under': 'spatial_below', 'above': 'spatial_above', 'below': 'spatial_below',
|
120 |
+
'zwischen': 'spatial_between', 'between': 'spatial_between', 'auf': 'spatial_on', 'on': 'spatial_on',
|
121 |
+
'towards': 'spatial_towards', 'richtung': 'spatial_towards', 'zu': 'spatial_towards', 'nach': 'spatial_towards',
|
122 |
+
'against': 'spatial_against', 'gegen': 'spatial_against', 'facing': 'spatial_facing', 'gerichtet': 'spatial_facing'
|
123 |
}
|
124 |
|
125 |
self.temporal_relations = {
|
|
|
287 |
def _clean_and_deduplicate_entities(self, entities: List[str]) -> List[str]:
|
288 |
"""Clean up and deduplicate entity list"""
|
289 |
|
290 |
+
# Extended stop words (including geometric/measurement terms)
|
291 |
stop_words = {
|
292 |
'der', 'die', 'das', 'und', 'oder', 'aber', 'mit', 'von', 'zu', 'in', 'auf', 'für',
|
293 |
'the', 'and', 'or', 'but', 'with', 'from', 'to', 'in', 'on', 'for', 'of', 'at',
|
294 |
'lies', 'sits', 'stands', 'moves', 'flows', 'rotates', 'begins', 'starts',
|
295 |
'liegt', 'sitzt', 'steht', 'bewegt', 'fließt', 'rotiert', 'beginnt', 'startet',
|
296 |
'while', 'next', 'left', 'right', 'between', 'above', 'below', 'around',
|
297 |
+
'time', 'way', 'thing', 'part', 'case', 'work', 'life', 'world', 'year',
|
298 |
+
# Geometric/measurement terms that should not be entities
|
299 |
+
'angle', 'degree', 'degrees', 'grad', 'winkel', 'rotation', 'position',
|
300 |
+
'distance', 'entfernung', 'abstand', 'height', 'höhe', 'width', 'breite',
|
301 |
+
'length', 'länge', 'size', 'größe', 'direction', 'richtung', 'orientation',
|
302 |
+
'place', 'platz', 'setze', 'towards', 'richtung', 'nach'
|
303 |
}
|
304 |
|
305 |
# Clean and filter
|
|
|
327 |
deduplicated.sort(key=sort_key)
|
328 |
|
329 |
return deduplicated[:15] # Increase limit to 15 entities
|
330 |
+
|
331 |
+
def extract_geometric_parameters(self, text: str) -> Dict[str, List]:
|
332 |
+
"""Extract geometric parameters like angles, distances, positions"""
|
333 |
+
import re
|
334 |
+
parameters = {
|
335 |
+
'angles': [],
|
336 |
+
'distances': [],
|
337 |
+
'positions': [],
|
338 |
+
'orientations': []
|
339 |
+
}
|
340 |
+
|
341 |
+
# Extract angles (degrees and radians)
|
342 |
+
angle_patterns = [
|
343 |
+
r'(\d+(?:\.\d+)?)\s*°', # 45°
|
344 |
+
r'(\d+(?:\.\d+)?)\s*deg(?:ree)?s?', # 45 degrees
|
345 |
+
r'(\d+(?:\.\d+)?)\s*grad', # 45 grad (German)
|
346 |
+
r'(\d+(?:\.\d+)?)\s*rad(?:ian)?s?', # 1.57 radians
|
347 |
+
]
|
348 |
+
|
349 |
+
for pattern in angle_patterns:
|
350 |
+
matches = re.findall(pattern, text.lower())
|
351 |
+
for match in matches:
|
352 |
+
parameters['angles'].append({
|
353 |
+
'value': float(match),
|
354 |
+
'unit': 'degrees' if '°' in pattern or 'deg' in pattern or 'grad' in pattern else 'radians'
|
355 |
+
})
|
356 |
+
|
357 |
+
# Extract distances
|
358 |
+
distance_patterns = [
|
359 |
+
r'(\d+(?:\.\d+)?)\s*(mm|cm|m|km|inch|ft)', # 10 cm, 5 m, etc.
|
360 |
+
r'(\d+(?:\.\d+)?)\s*meter', # 5 meter
|
361 |
+
r'(\d+(?:\.\d+)?)\s*zentimeter', # 10 zentimeter
|
362 |
+
]
|
363 |
+
|
364 |
+
for pattern in distance_patterns:
|
365 |
+
matches = re.findall(pattern, text.lower())
|
366 |
+
for match in matches:
|
367 |
+
if isinstance(match, tuple):
|
368 |
+
value, unit = match
|
369 |
+
parameters['distances'].append({
|
370 |
+
'value': float(value),
|
371 |
+
'unit': unit
|
372 |
+
})
|
373 |
+
|
374 |
+
# Extract coordinate positions
|
375 |
+
coord_patterns = [
|
376 |
+
r'\((\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?)\)', # (x, y, z)
|
377 |
+
r'x:\s*(\d+(?:\.\d+)?),?\s*y:\s*(\d+(?:\.\d+)?),?\s*z:\s*(\d+(?:\.\d+)?)', # x: 10, y: 20, z: 30
|
378 |
+
]
|
379 |
+
|
380 |
+
for pattern in coord_patterns:
|
381 |
+
matches = re.findall(pattern, text.lower())
|
382 |
+
for match in matches:
|
383 |
+
if len(match) == 3:
|
384 |
+
parameters['positions'].append({
|
385 |
+
'x': float(match[0]),
|
386 |
+
'y': float(match[1]),
|
387 |
+
'z': float(match[2])
|
388 |
+
})
|
389 |
+
|
390 |
+
return parameters
|
391 |
|
392 |
def extract_relations_from_text(self, text: str) -> List[Dict]:
|
393 |
+
"""Extract relations from text including geometric parameters"""
|
394 |
relations = []
|
395 |
text_lower = text.lower()
|
396 |
|
|
|
405 |
'strength': np.random.uniform(0.6, 0.95)
|
406 |
})
|
407 |
|
408 |
+
# Extract geometric parameters and add as metadata
|
409 |
+
geometric_params = self.extract_geometric_parameters(text)
|
410 |
+
if any(geometric_params.values()): # If any parameters found
|
411 |
+
relations.append({
|
412 |
+
'type': 'geometric_parameters',
|
413 |
+
'word': 'parameters',
|
414 |
+
'strength': 1.0,
|
415 |
+
'parameters': geometric_params
|
416 |
+
})
|
417 |
+
|
418 |
return relations
|
419 |
|
420 |
def _initialize_real_gasm(self):
|
|
|
722 |
"""Classify entity type based on semantic content"""
|
723 |
entity_lower = entity.lower()
|
724 |
|
725 |
+
# Use the semantic categories for precise classification
|
726 |
+
for category, subcategories in self.semantic_categories.items():
|
727 |
+
for subcategory, items in subcategories.items():
|
728 |
+
if entity_lower in items:
|
729 |
+
if category == 'technical_objects':
|
730 |
+
if subcategory == 'robotics':
|
731 |
+
return 'robotic'
|
732 |
+
elif subcategory == 'industrial':
|
733 |
+
return 'industrial'
|
734 |
+
elif subcategory == 'scientific':
|
735 |
+
return 'scientific'
|
736 |
+
else:
|
737 |
+
return 'technical'
|
738 |
+
elif category == 'physical_objects':
|
739 |
+
return 'physical'
|
740 |
+
elif category == 'spatial_objects':
|
741 |
+
return 'spatial'
|
742 |
+
elif category == 'scientific_entities':
|
743 |
+
return 'scientific'
|
744 |
+
|
745 |
+
# Fallback patterns for backwards compatibility
|
746 |
if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor']):
|
747 |
return 'robotic'
|
748 |
+
elif any(word in entity_lower for word in ['conveyor', 'machine', 'equipment', 'system']):
|
749 |
+
return 'industrial'
|
750 |
elif any(word in entity_lower for word in ['atom', 'electron', 'molecule', 'crystal', 'particle']):
|
751 |
+
return 'scientific'
|
752 |
elif any(word in entity_lower for word in ['ball', 'table', 'chair', 'book', 'computer']):
|
753 |
+
return 'physical'
|
754 |
+
elif any(word in entity_lower for word in ['area', 'zone', 'space', 'place', 'location']):
|
755 |
return 'spatial'
|
|
|
|
|
756 |
else:
|
757 |
return 'unknown'
|
758 |
|