edouardlgp commited on
Commit
e518da9
Β·
verified Β·
1 Parent(s): 1735116

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -25
app.py CHANGED
@@ -13,6 +13,30 @@ import traceback
13
  import time
14
  import openai
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Load environment variables
17
  load_dotenv()
18
 
@@ -32,31 +56,31 @@ except Exception as e:
32
  try:
33
  occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
34
  except Exception as e:
35
- print(f"Error reading occupational_groups.csv: {e}")
36
  occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
37
 
38
  try:
39
  esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string
40
  except Exception as e:
41
- print(f"Error reading ISCOGroups_en.csv: {e}")
42
  esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
43
 
44
  try:
45
  esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string
46
  except Exception as e:
47
- print(f"Error reading occupations_en.csv: {e}")
48
  esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
49
 
50
  try:
51
  esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
52
  except Exception as e:
53
- print(f"Error reading skills_en.csv: {e}")
54
  esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
55
 
56
  try:
57
  esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
58
  except Exception as e:
59
- print(f"Error reading occupationSkillRelations_en.csv: {e}")
60
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
61
 
62
 
@@ -138,7 +162,7 @@ def classify_job_family(responsibilities: List[str]) -> str:
138
  def get_level_CCOG_info(df, code, level_name):
139
  matches = df[df['code'] == code]
140
  if len(matches) == 0:
141
- print(f"Warning: No {level_name} found for CCOG code {code}")
142
  return {
143
  f'{level_name}_CCOG_code': code,
144
  f'{level_name}_CCOG_name': 'UNKNOWN',
@@ -184,7 +208,7 @@ def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
184
  level_code = code_sanitize(level_code, list_output)
185
  result.update(get_level_CCOG_info(level_df, level_code, f'Level_{level}'))
186
  except Exception as e:
187
- print(f"Error during classification: {str(e)}")
188
  result['error'] = str(e)
189
  return result
190
 
@@ -367,7 +391,7 @@ def get_level_ESCO_info(df, code, level_name):
367
  """Helper function to get level info with error handling"""
368
  matches = df[df['code'] == code]
369
  if len(matches) == 0:
370
- print(f"Warning: No {level_name} found for ESCO code {code}")
371
  return {
372
  f'{level_name}_ESCO_code': code,
373
  f'{level_name}_ESCO_name': 'UNKNOWN',
@@ -435,8 +459,8 @@ def review_skills(Level_5_code: str, top_n: int = 10) -> List[Dict[str, str]]:
435
  result = json.loads(json_text)
436
  skills = result.get("skills", [])
437
  except json.JSONDecodeError as e:
438
- print(f"❌ JSON parsing error: {e}")
439
- print(f"πŸ” Problematic JSON: {json_text}")
440
  return []
441
  validated_skills = []
442
  for skill in skills:
@@ -448,7 +472,7 @@ def review_skills(Level_5_code: str, top_n: int = 10) -> List[Dict[str, str]]:
448
  }
449
  validated_skills.append(validated)
450
  except (KeyError, TypeError) as e:
451
- print(f"⚠️ Skipping invalid skill: {skill}. Error: {e}")
452
  continue
453
  return validated_skills[:top_n]
454
 
@@ -488,8 +512,8 @@ def extract_skills(responsibilities: List[str], top_n: int = 10) -> List[Dict[st
488
  result = json.loads(json_text)
489
  skills = result.get("skills", [])
490
  except json.JSONDecodeError as e:
491
- print(f"❌ JSON parsing error: {e}")
492
- print(f"πŸ” Problematic JSON: {json_text}")
493
  return []
494
  validated_skills = []
495
  for skill in skills:
@@ -501,7 +525,7 @@ def extract_skills(responsibilities: List[str], top_n: int = 10) -> List[Dict[st
501
  }
502
  validated_skills.append(validated)
503
  except (KeyError, TypeError) as e:
504
- print(f"⚠️ Skipping invalid skill: {skill}. Error: {e}")
505
  continue
506
  return validated_skills[:top_n]
507
 
@@ -526,8 +550,8 @@ def map_proficiency_and_assessment(skills: List[str], responsibilities: List[str
526
  try:
527
  results = json.loads(json_text)
528
  except json.JSONDecodeError as e:
529
- print(f"❌ JSON parsing error: {e}")
530
- print(f"πŸ” Problematic JSON: {json_text}")
531
  return []
532
  validated = []
533
  for item in results:
@@ -542,7 +566,7 @@ def map_proficiency_and_assessment(skills: List[str], responsibilities: List[str
542
  "assessment_method": item["assessment_method"].strip()
543
  })
544
  except (KeyError, TypeError) as e:
545
- print(f"⚠️ Skipping invalid item: {item}. Error: {e}")
546
  continue
547
  return validated
548
 
@@ -550,7 +574,7 @@ def _extract_json_array(raw: str) -> str:
550
  json_start = raw.find('[')
551
  json_end = raw.rfind(']') + 1
552
  if json_start == -1 or json_end == 0:
553
- print(f"❌ No JSON array found in response: {raw}")
554
  return ""
555
  json_text = raw[json_start:json_end]
556
  json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
@@ -582,7 +606,7 @@ def _extract_json(raw: str) -> str:
582
  json_start = raw.find('{')
583
  json_end = raw.rfind('}') + 1
584
  if json_start == -1 or json_end == 0:
585
- print(f"❌ No JSON found in response: {raw}")
586
  return ""
587
  json_text = raw[json_start:json_end]
588
  json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
@@ -599,9 +623,10 @@ def process_pdf(file):
599
  extracted_text = extract_text_from_pdf(file.name)
600
  responsibilities = extract_section_from_pdf(extracted_text, section_title="Responsibilities and Accountabilities")
601
  if not responsibilities:
602
- print(f"Skipping {os.path.basename(file.name)} - no responsibilities section found")
603
  return None
604
  job_family = classify_job_family(responsibilities)
 
605
  occ_group = classify_occupational_group_by_level(responsibilities)
606
  esco_occ = classify_esco_by_hierarchical_level(responsibilities)
607
  qualification = extract_qualification(responsibilities)
@@ -615,7 +640,7 @@ def process_pdf(file):
615
  skill_esco_extract = review_skills(Level_5_code)
616
  skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
617
  else:
618
- print(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping")
619
  time.sleep(6)
620
  assessment_lookup = {item['skill_name']: item for item in skill_map}
621
  joined_skills = [
@@ -688,15 +713,27 @@ with gr.Blocks() as demo:
688
  gr.Markdown("Identify Job Family, Occupation, Qualification, match Skills and suggest interview questions.")
689
  with gr.Row():
690
  with gr.Column():
691
- file_input = gr.File(label="Upload a Job Description PDF file", file_types=[".pdf"])
692
- submit_btn = gr.Button("Extract Text")
 
 
693
  with gr.Column():
694
  text_output = gr.Textbox(label="Extracted Text", lines=30, max_lines=50, interactive=False)
 
 
 
 
 
 
 
 
695
  submit_btn.click(
696
  fn=process_pdf,
697
  inputs=file_input,
698
- outputs=text_output
 
699
  )
700
 
701
  if __name__ == "__main__":
702
- demo.launch()
 
 
13
  import time
14
  import openai
15
 
16
+ # Debugging setup
17
+ DEBUG = True
18
+ debug_messages = []
19
+
20
+ def log_debug(message):
21
+ """Log debug messages and keep last 20 entries"""
22
+ if DEBUG:
23
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
24
+ full_message = f"[{timestamp}] {message}"
25
+ debug_messages.append(full_message)
26
+ print(full_message) # Print to console
27
+ # Keep only the last 20 messages
28
+ if len(debug_messages) > 20:
29
+ debug_messages.pop(0)
30
+ return "\n".join(debug_messages)
31
+ return ""
32
+
33
+ # Initialize debug logging
34
+ log_debug("Application starting...")
35
+
36
+
37
+
38
+
39
+
40
  # Load environment variables
41
  load_dotenv()
42
 
 
56
  try:
57
  occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
58
  except Exception as e:
59
+ log_debug(f"Error reading occupational_groups.csv: {e}")
60
  occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
61
 
62
  try:
63
  esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string
64
  except Exception as e:
65
+ log_debug(f"Error reading ISCOGroups_en.csv: {e}")
66
  esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
67
 
68
  try:
69
  esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string
70
  except Exception as e:
71
+ log_debug(f"Error reading occupations_en.csv: {e}")
72
  esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
73
 
74
  try:
75
  esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
76
  except Exception as e:
77
+ log_debug(f"Error reading skills_en.csv: {e}")
78
  esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
79
 
80
  try:
81
  esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
82
  except Exception as e:
83
+ log_debug(f"Error reading occupationSkillRelations_en.csv: {e}")
84
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
85
 
86
 
 
162
  def get_level_CCOG_info(df, code, level_name):
163
  matches = df[df['code'] == code]
164
  if len(matches) == 0:
165
+ log_debug(f"Warning: No {level_name} found for CCOG code {code}")
166
  return {
167
  f'{level_name}_CCOG_code': code,
168
  f'{level_name}_CCOG_name': 'UNKNOWN',
 
208
  level_code = code_sanitize(level_code, list_output)
209
  result.update(get_level_CCOG_info(level_df, level_code, f'Level_{level}'))
210
  except Exception as e:
211
+ log_debug(f"Error during classification: {str(e)}")
212
  result['error'] = str(e)
213
  return result
214
 
 
391
  """Helper function to get level info with error handling"""
392
  matches = df[df['code'] == code]
393
  if len(matches) == 0:
394
+ log_debug(f"Warning: No {level_name} found for ESCO code {code}")
395
  return {
396
  f'{level_name}_ESCO_code': code,
397
  f'{level_name}_ESCO_name': 'UNKNOWN',
 
459
  result = json.loads(json_text)
460
  skills = result.get("skills", [])
461
  except json.JSONDecodeError as e:
462
+ log_debug(f"❌ JSON Skills parsing error: {e}")
463
+ log_debug(f"πŸ” Problematic JSON Skills: {json_text}")
464
  return []
465
  validated_skills = []
466
  for skill in skills:
 
472
  }
473
  validated_skills.append(validated)
474
  except (KeyError, TypeError) as e:
475
+ log_debug(f"⚠️ Skipping invalid skill: {skill}. Error: {e}")
476
  continue
477
  return validated_skills[:top_n]
478
 
 
512
  result = json.loads(json_text)
513
  skills = result.get("skills", [])
514
  except json.JSONDecodeError as e:
515
+ log_debug(f"❌ JSON Skills extrac parsing error: {e}")
516
+ log_debug(f"πŸ” Problematic JSON Skills extract: {json_text}")
517
  return []
518
  validated_skills = []
519
  for skill in skills:
 
525
  }
526
  validated_skills.append(validated)
527
  except (KeyError, TypeError) as e:
528
+ log_debug(f"⚠️ Skipping invalid skill extract: {skill}. Error: {e}")
529
  continue
530
  return validated_skills[:top_n]
531
 
 
550
  try:
551
  results = json.loads(json_text)
552
  except json.JSONDecodeError as e:
553
+ log_debug(f"❌ JSON proficiency parsing error: {e}")
554
+ log_debug(f"πŸ” Problematic JSON proficiency: {json_text}")
555
  return []
556
  validated = []
557
  for item in results:
 
566
  "assessment_method": item["assessment_method"].strip()
567
  })
568
  except (KeyError, TypeError) as e:
569
+ log_debug(f"⚠️ Skipping invalid profiency item: {item}. Error: {e}")
570
  continue
571
  return validated
572
 
 
574
  json_start = raw.find('[')
575
  json_end = raw.rfind(']') + 1
576
  if json_start == -1 or json_end == 0:
577
+ log_debug(f"❌ No JSON array found in response: {raw}")
578
  return ""
579
  json_text = raw[json_start:json_end]
580
  json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
 
606
  json_start = raw.find('{')
607
  json_end = raw.rfind('}') + 1
608
  if json_start == -1 or json_end == 0:
609
+ log_debug(f"❌ No JSON found in response: {raw}")
610
  return ""
611
  json_text = raw[json_start:json_end]
612
  json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
 
623
  extracted_text = extract_text_from_pdf(file.name)
624
  responsibilities = extract_section_from_pdf(extracted_text, section_title="Responsibilities and Accountabilities")
625
  if not responsibilities:
626
+ log_debug(f"Skipping {os.path.basename(file.name)} - no responsibilities section found")
627
  return None
628
  job_family = classify_job_family(responsibilities)
629
+ log_debug(f"Identified {job_family} ")
630
  occ_group = classify_occupational_group_by_level(responsibilities)
631
  esco_occ = classify_esco_by_hierarchical_level(responsibilities)
632
  qualification = extract_qualification(responsibilities)
 
640
  skill_esco_extract = review_skills(Level_5_code)
641
  skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
642
  else:
643
+ log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping")
644
  time.sleep(6)
645
  assessment_lookup = {item['skill_name']: item for item in skill_map}
646
  joined_skills = [
 
713
  gr.Markdown("Identify Job Family, Occupation, Qualification, match Skills and suggest interview questions.")
714
  with gr.Row():
715
  with gr.Column():
716
+ file_input = gr.File(label="Upload a Post Description PDF file", file_types=[".pdf"])
717
+ submit_btn = gr.Button("Analyse Post Description")
718
+ # with gr.Column():
719
+ # text_output = gr.Textbox(label="Extracted Text", lines=30, max_lines=50, interactive=False)
720
  with gr.Column():
721
  text_output = gr.Textbox(label="Extracted Text", lines=30, max_lines=50, interactive=False)
722
+ if DEBUG:
723
+ gr.Markdown("### Debug Console", elem_classes=["debug-title"])
724
+ debug_console = gr.Textbox(
725
+ label="",
726
+ interactive=False,
727
+ lines=15,
728
+ elem_classes=["debug-console"]
729
+ )
730
  submit_btn.click(
731
  fn=process_pdf,
732
  inputs=file_input,
733
+ #outputs=text_output
734
+ outputs=[text_output, debug_console] if DEBUG else [text_output],
735
  )
736
 
737
  if __name__ == "__main__":
738
+ demo.launch(show_error=True,
739
+ debug=DEBUG)