arjunanand13 commited on
Commit
45102e7
·
verified ·
1 Parent(s): 857328d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +62 -17
main.py CHANGED
@@ -447,11 +447,25 @@ class QualityAssessor:
447
  field_scores = result.confidence_scores.copy()
448
  consistency_score = self._check_consistency(result.data)
449
 
450
- overall_confidence = (
451
- sum(field_scores.values()) / len(field_scores) if field_scores else 0
452
- ) * schema_compliance * consistency_score
 
 
 
 
 
 
 
 
 
 
 
453
 
454
- review_flags = self._generate_review_flags(field_scores, schema_compliance, overall_confidence)
 
 
 
455
  review_time = self._estimate_review_time(review_flags, field_scores)
456
 
457
  return QualityReport(
@@ -467,19 +481,21 @@ class QualityAssessor:
467
  required_fields = schema.get('required', [])
468
  properties = schema.get('properties', {})
469
 
470
- score = 1.0
471
-
472
- for field in required_fields:
473
- if field not in data or data[field] is None:
474
- score -= 0.2
475
 
 
 
476
  for field, value in data.items():
477
  if field in properties:
 
478
  expected_type = properties[field].get('type')
479
  if expected_type and not self._check_type(value, expected_type):
480
- score -= 0.1
481
 
482
- return max(0.0, score)
 
 
483
 
484
  def _check_type(self, value: Any, expected_type: str) -> bool:
485
  if value is None:
@@ -497,20 +513,49 @@ class QualityAssessor:
497
  return isinstance(value, expected_python_type)
498
 
499
  def _check_consistency(self, data: Dict[str, Any]) -> float:
500
- return 0.85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
 
502
- def _generate_review_flags(self, field_scores: Dict[str, float], schema_compliance: float, overall_confidence: float) -> List[str]:
503
  flags = []
504
 
505
- if overall_confidence < 0.7:
506
- flags.append("low_overall_confidence")
 
 
507
 
508
  if schema_compliance < 0.8:
509
  flags.append("schema_compliance_issues")
510
 
511
- low_confidence_fields = [field for field, score in field_scores.items() if score < 0.6]
512
  if low_confidence_fields:
513
- flags.append(f"low_confidence_fields: {', '.join(low_confidence_fields)}")
 
 
 
 
514
 
515
  return flags
516
 
 
447
  field_scores = result.confidence_scores.copy()
448
  consistency_score = self._check_consistency(result.data)
449
 
450
+ required_fields = schema.get('required', [])
451
+
452
+ if field_scores:
453
+ total_weight = 0
454
+ weighted_confidence = 0
455
+
456
+ for field, confidence in field_scores.items():
457
+ weight = 2.0 if field in required_fields else 1.0
458
+ weighted_confidence += confidence * weight
459
+ total_weight += weight
460
+
461
+ avg_field_confidence = weighted_confidence / total_weight
462
+ else:
463
+ avg_field_confidence = 0
464
 
465
+ overall_confidence = avg_field_confidence * (0.8 + 0.2 * schema_compliance) * (0.9 + 0.1 * consistency_score)
466
+ overall_confidence = min(overall_confidence, 1.0)
467
+
468
+ review_flags = self._generate_review_flags(field_scores, schema_compliance, overall_confidence, required_fields, result.data)
469
  review_time = self._estimate_review_time(review_flags, field_scores)
470
 
471
  return QualityReport(
 
481
  required_fields = schema.get('required', [])
482
  properties = schema.get('properties', {})
483
 
484
+ required_present = sum(1 for field in required_fields if field in data and data[field] is not None)
485
+ required_compliance = required_present / len(required_fields) if required_fields else 1.0
 
 
 
486
 
487
+ type_errors = 0
488
+ total_fields = 0
489
  for field, value in data.items():
490
  if field in properties:
491
+ total_fields += 1
492
  expected_type = properties[field].get('type')
493
  if expected_type and not self._check_type(value, expected_type):
494
+ type_errors += 1
495
 
496
+ type_compliance = 1.0 - (type_errors / total_fields) if total_fields > 0 else 1.0
497
+
498
+ return (required_compliance * 0.7 + type_compliance * 0.3)
499
 
500
  def _check_type(self, value: Any, expected_type: str) -> bool:
501
  if value is None:
 
513
  return isinstance(value, expected_python_type)
514
 
515
  def _check_consistency(self, data: Dict[str, Any]) -> float:
516
+ consistency_score = 1.0
517
+
518
+ if 'email' in data and data['email']:
519
+ if '@' not in str(data['email']):
520
+ consistency_score -= 0.1
521
+
522
+ if 'startDate' in data and 'endDate' in data:
523
+ try:
524
+ if data['startDate'] and data['endDate']:
525
+ if str(data['startDate']) > str(data['endDate']):
526
+ consistency_score -= 0.15
527
+ except:
528
+ pass
529
+
530
+ if isinstance(data, dict):
531
+ for key, value in data.items():
532
+ if isinstance(value, list):
533
+ for item in value:
534
+ if isinstance(item, dict):
535
+ consistency_score *= self._check_consistency(item)
536
+ elif isinstance(value, dict):
537
+ consistency_score *= self._check_consistency(value)
538
+
539
+ return max(0.7, consistency_score)
540
 
541
+ def _generate_review_flags(self, field_scores: Dict[str, float], schema_compliance: float, overall_confidence: float, required_fields: List[str], extracted_data: Dict[str, Any]) -> List[str]:
542
  flags = []
543
 
544
+ if overall_confidence < 0.6:
545
+ flags.append("high_priority_review")
546
+ elif overall_confidence < 0.8:
547
+ flags.append("standard_review")
548
 
549
  if schema_compliance < 0.8:
550
  flags.append("schema_compliance_issues")
551
 
552
+ low_confidence_fields = [field for field, score in field_scores.items() if score < 0.7]
553
  if low_confidence_fields:
554
+ flags.append(f"uncertain_fields: {', '.join(low_confidence_fields[:3])}")
555
+
556
+ missing_required = [field for field in required_fields if field not in extracted_data or extracted_data[field] is None]
557
+ if missing_required:
558
+ flags.append(f"missing_required: {', '.join(missing_required[:3])}")
559
 
560
  return flags
561