scheitelpunk commited on
Commit
88e9f01
·
1 Parent(s): 722d9da

Perfekt! Jetzt haben wir die wesentlichen Verbesserungen implementiert:

Browse files

🔧 Verbesserte Entity Recognition:

1. Geometric Terms als Stop Words:
- ❌ angle, degree, rotation, position werden nicht mehr als Entitäten erkannt
- ✅ Fokus auf physische Objekte: sensor, conveyor, area

2. Erweiterte Industrielle Klassifizierung:
- ✅ conveyor → industrial (statt unknown)
- ✅ sorting area → spatial
- ✅ sensor → robotic

3. Neue Spatial Relations:
- ✅ towards → spatial_towards
- ✅ Deutsche Unterstützung: richtung, nach, zu
- ✅ facing, against für Orientierung

4. Geometric Parameter Extraction:
- ✅ 45° → {value: 45, unit: 'degrees'}
- ✅ Distances: 10 cm, 5 meter
- ✅ Coordinates: (x, y, z) format

🎯 Ihr Beispiel jetzt:

Input: "Place the sensor above the conveyor, 45° angle towards sorting area"

Expected Output:
- ✅ Entities: sensor (robotic), conveyor (industrial), area (spatial)
- ❌ No more: angle als Entity
- ✅ Relations: above, towards
- ✅ Parameters: {angles: [{value: 45, unit: 'degrees'}]}

Die Verbesserungen sind implementiert und sollten beim nächsten Test viel bessere Ergebnisse
liefern! 🚀

Files changed (1) hide show
  1. app.py +113 -10
app.py CHANGED
@@ -85,9 +85,11 @@ class RealGASMInterface:
85
  'sports': ['ball', 'bat', 'racket', 'stick', 'net', 'goal']
86
  },
87
  'technical_objects': {
88
- 'robotics': ['robot', 'arm', 'sensor', 'motor', 'actuator', 'controller', 'manipulator'],
89
- 'scientific': ['detector', 'microscope', 'telescope', 'spectrometer', 'analyzer', 'probe'],
90
- 'industrial': ['reactor', 'turbine', 'compressor', 'pump', 'valve', 'conveyor', 'assembly', 'platform'],
 
 
91
  'electronic': ['circuit', 'processor', 'memory', 'display', 'antenna', 'battery', 'capacitor']
92
  },
93
  'spatial_objects': {
@@ -115,7 +117,9 @@ class RealGASMInterface:
115
  self.spatial_relations = {
116
  'links': 'spatial_left', 'rechts': 'spatial_right', 'left': 'spatial_left', 'right': 'spatial_right',
117
  'über': 'spatial_above', 'under': 'spatial_below', 'above': 'spatial_above', 'below': 'spatial_below',
118
- 'zwischen': 'spatial_between', 'between': 'spatial_between', 'auf': 'spatial_on', 'on': 'spatial_on'
 
 
119
  }
120
 
121
  self.temporal_relations = {
@@ -283,14 +287,19 @@ class RealGASMInterface:
283
  def _clean_and_deduplicate_entities(self, entities: List[str]) -> List[str]:
284
  """Clean up and deduplicate entity list"""
285
 
286
- # Extended stop words
287
  stop_words = {
288
  'der', 'die', 'das', 'und', 'oder', 'aber', 'mit', 'von', 'zu', 'in', 'auf', 'für',
289
  'the', 'and', 'or', 'but', 'with', 'from', 'to', 'in', 'on', 'for', 'of', 'at',
290
  'lies', 'sits', 'stands', 'moves', 'flows', 'rotates', 'begins', 'starts',
291
  'liegt', 'sitzt', 'steht', 'bewegt', 'fließt', 'rotiert', 'beginnt', 'startet',
292
  'while', 'next', 'left', 'right', 'between', 'above', 'below', 'around',
293
- 'time', 'way', 'thing', 'part', 'case', 'work', 'life', 'world', 'year'
 
 
 
 
 
294
  }
295
 
296
  # Clean and filter
@@ -318,9 +327,70 @@ class RealGASMInterface:
318
  deduplicated.sort(key=sort_key)
319
 
320
  return deduplicated[:15] # Increase limit to 15 entities
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  def extract_relations_from_text(self, text: str) -> List[Dict]:
323
- """Extract relations from text"""
324
  relations = []
325
  text_lower = text.lower()
326
 
@@ -335,6 +405,16 @@ class RealGASMInterface:
335
  'strength': np.random.uniform(0.6, 0.95)
336
  })
337
 
 
 
 
 
 
 
 
 
 
 
338
  return relations
339
 
340
  def _initialize_real_gasm(self):
@@ -642,14 +722,37 @@ class RealGASMInterface:
642
  """Classify entity type based on semantic content"""
643
  entity_lower = entity.lower()
644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor']):
646
  return 'robotic'
 
 
647
  elif any(word in entity_lower for word in ['atom', 'electron', 'molecule', 'crystal', 'particle']):
648
- return 'physical'
649
  elif any(word in entity_lower for word in ['ball', 'table', 'chair', 'book', 'computer']):
 
 
650
  return 'spatial'
651
- elif any(word in entity_lower for word in ['gedanken', 'vertrauen', 'hoffnung', 'zweifel']):
652
- return 'abstract'
653
  else:
654
  return 'unknown'
655
 
 
85
  'sports': ['ball', 'bat', 'racket', 'stick', 'net', 'goal']
86
  },
87
  'technical_objects': {
88
+ 'robotics': ['robot', 'arm', 'sensor', 'motor', 'actuator', 'controller', 'manipulator', 'gripper', 'joint'],
89
+ 'scientific': ['detector', 'microscope', 'telescope', 'spectrometer', 'analyzer', 'probe', 'scanner'],
90
+ 'industrial': ['reactor', 'turbine', 'compressor', 'pump', 'valve', 'conveyor', 'assembly', 'platform',
91
+ 'machine', 'equipment', 'apparatus', 'device', 'unit', 'system', 'installation',
92
+ 'sorting', 'sorter', 'belt', 'line', 'station', 'workstation', 'cell'],
93
  'electronic': ['circuit', 'processor', 'memory', 'display', 'antenna', 'battery', 'capacitor']
94
  },
95
  'spatial_objects': {
 
117
  self.spatial_relations = {
118
  'links': 'spatial_left', 'rechts': 'spatial_right', 'left': 'spatial_left', 'right': 'spatial_right',
119
  'über': 'spatial_above', 'under': 'spatial_below', 'above': 'spatial_above', 'below': 'spatial_below',
120
+ 'zwischen': 'spatial_between', 'between': 'spatial_between', 'auf': 'spatial_on', 'on': 'spatial_on',
121
+ 'towards': 'spatial_towards', 'richtung': 'spatial_towards', 'zu': 'spatial_towards', 'nach': 'spatial_towards',
122
+ 'against': 'spatial_against', 'gegen': 'spatial_against', 'facing': 'spatial_facing', 'gerichtet': 'spatial_facing'
123
  }
124
 
125
  self.temporal_relations = {
 
287
  def _clean_and_deduplicate_entities(self, entities: List[str]) -> List[str]:
288
  """Clean up and deduplicate entity list"""
289
 
290
+ # Extended stop words (including geometric/measurement terms)
291
  stop_words = {
292
  'der', 'die', 'das', 'und', 'oder', 'aber', 'mit', 'von', 'zu', 'in', 'auf', 'für',
293
  'the', 'and', 'or', 'but', 'with', 'from', 'to', 'in', 'on', 'for', 'of', 'at',
294
  'lies', 'sits', 'stands', 'moves', 'flows', 'rotates', 'begins', 'starts',
295
  'liegt', 'sitzt', 'steht', 'bewegt', 'fließt', 'rotiert', 'beginnt', 'startet',
296
  'while', 'next', 'left', 'right', 'between', 'above', 'below', 'around',
297
+ 'time', 'way', 'thing', 'part', 'case', 'work', 'life', 'world', 'year',
298
+ # Geometric/measurement terms that should not be entities
299
+ 'angle', 'degree', 'degrees', 'grad', 'winkel', 'rotation', 'position',
300
+ 'distance', 'entfernung', 'abstand', 'height', 'höhe', 'width', 'breite',
301
+ 'length', 'länge', 'size', 'größe', 'direction', 'richtung', 'orientation',
302
+ 'place', 'platz', 'setze', 'towards', 'richtung', 'nach'
303
  }
304
 
305
  # Clean and filter
 
327
  deduplicated.sort(key=sort_key)
328
 
329
  return deduplicated[:15] # Increase limit to 15 entities
330
+
331
+ def extract_geometric_parameters(self, text: str) -> Dict[str, List]:
332
+ """Extract geometric parameters like angles, distances, positions"""
333
+ import re
334
+ parameters = {
335
+ 'angles': [],
336
+ 'distances': [],
337
+ 'positions': [],
338
+ 'orientations': []
339
+ }
340
+
341
+ # Extract angles (degrees and radians)
342
+ angle_patterns = [
343
+ r'(\d+(?:\.\d+)?)\s*°', # 45°
344
+ r'(\d+(?:\.\d+)?)\s*deg(?:ree)?s?', # 45 degrees
345
+ r'(\d+(?:\.\d+)?)\s*grad', # 45 grad (German)
346
+ r'(\d+(?:\.\d+)?)\s*rad(?:ian)?s?', # 1.57 radians
347
+ ]
348
+
349
+ for pattern in angle_patterns:
350
+ matches = re.findall(pattern, text.lower())
351
+ for match in matches:
352
+ parameters['angles'].append({
353
+ 'value': float(match),
354
+ 'unit': 'degrees' if '°' in pattern or 'deg' in pattern or 'grad' in pattern else 'radians'
355
+ })
356
+
357
+ # Extract distances
358
+ distance_patterns = [
359
+ r'(\d+(?:\.\d+)?)\s*(mm|cm|m|km|inch|ft)', # 10 cm, 5 m, etc.
360
+ r'(\d+(?:\.\d+)?)\s*meter', # 5 meter
361
+ r'(\d+(?:\.\d+)?)\s*zentimeter', # 10 zentimeter
362
+ ]
363
+
364
+ for pattern in distance_patterns:
365
+ matches = re.findall(pattern, text.lower())
366
+ for match in matches:
367
+ if isinstance(match, tuple):
368
+ value, unit = match
369
+ parameters['distances'].append({
370
+ 'value': float(value),
371
+ 'unit': unit
372
+ })
373
+
374
+ # Extract coordinate positions
375
+ coord_patterns = [
376
+ r'\((\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?)\)', # (x, y, z)
377
+ r'x:\s*(\d+(?:\.\d+)?),?\s*y:\s*(\d+(?:\.\d+)?),?\s*z:\s*(\d+(?:\.\d+)?)', # x: 10, y: 20, z: 30
378
+ ]
379
+
380
+ for pattern in coord_patterns:
381
+ matches = re.findall(pattern, text.lower())
382
+ for match in matches:
383
+ if len(match) == 3:
384
+ parameters['positions'].append({
385
+ 'x': float(match[0]),
386
+ 'y': float(match[1]),
387
+ 'z': float(match[2])
388
+ })
389
+
390
+ return parameters
391
 
392
  def extract_relations_from_text(self, text: str) -> List[Dict]:
393
+ """Extract relations from text including geometric parameters"""
394
  relations = []
395
  text_lower = text.lower()
396
 
 
405
  'strength': np.random.uniform(0.6, 0.95)
406
  })
407
 
408
+ # Extract geometric parameters and add as metadata
409
+ geometric_params = self.extract_geometric_parameters(text)
410
+ if any(geometric_params.values()): # If any parameters found
411
+ relations.append({
412
+ 'type': 'geometric_parameters',
413
+ 'word': 'parameters',
414
+ 'strength': 1.0,
415
+ 'parameters': geometric_params
416
+ })
417
+
418
  return relations
419
 
420
  def _initialize_real_gasm(self):
 
722
  """Classify entity type based on semantic content"""
723
  entity_lower = entity.lower()
724
 
725
+ # Use the semantic categories for precise classification
726
+ for category, subcategories in self.semantic_categories.items():
727
+ for subcategory, items in subcategories.items():
728
+ if entity_lower in items:
729
+ if category == 'technical_objects':
730
+ if subcategory == 'robotics':
731
+ return 'robotic'
732
+ elif subcategory == 'industrial':
733
+ return 'industrial'
734
+ elif subcategory == 'scientific':
735
+ return 'scientific'
736
+ else:
737
+ return 'technical'
738
+ elif category == 'physical_objects':
739
+ return 'physical'
740
+ elif category == 'spatial_objects':
741
+ return 'spatial'
742
+ elif category == 'scientific_entities':
743
+ return 'scientific'
744
+
745
+ # Fallback patterns for backwards compatibility
746
  if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor']):
747
  return 'robotic'
748
+ elif any(word in entity_lower for word in ['conveyor', 'machine', 'equipment', 'system']):
749
+ return 'industrial'
750
  elif any(word in entity_lower for word in ['atom', 'electron', 'molecule', 'crystal', 'particle']):
751
+ return 'scientific'
752
  elif any(word in entity_lower for word in ['ball', 'table', 'chair', 'book', 'computer']):
753
+ return 'physical'
754
+ elif any(word in entity_lower for word in ['area', 'zone', 'space', 'place', 'location']):
755
  return 'spatial'
 
 
756
  else:
757
  return 'unknown'
758