CR7CAD commited on
Commit
6088e9d
·
verified ·
1 Parent(s): 9b62bb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -41
app.py CHANGED
@@ -13,7 +13,7 @@ from pdf2image import convert_from_bytes
13
  # Load the OCR Pipeline (Uses Torch)
14
  #####################################
15
  try:
16
- # Make sure that you're using an updated version of the transformers library (>=4.x)
17
  ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
18
  st.write("Model loaded successfully!")
19
  except Exception as e:
@@ -68,11 +68,12 @@ def extract_resume_info(text):
68
  "Expected Industry/Direction": None,
69
  }
70
 
71
- # Extract name, e.g., "Name: John Doe"
72
  name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
73
  if name_match:
74
  info["Name"] = name_match.group(1).strip()
75
  else:
 
76
  potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
77
  if potential_names:
78
  info["Name"] = potential_names[0]
@@ -91,7 +92,7 @@ def extract_resume_info(text):
91
  if exp_line:
92
  info["Job Experience"] = exp_line.group(2).strip()
93
 
94
- # Extract skills
95
  skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
96
  if skills_match:
97
  skills_text = skills_match.group(1)
@@ -105,64 +106,34 @@ def extract_resume_info(text):
105
 
106
  return info
107
 
108
- #####################################
109
- # Candidate Comparison Function
110
- #####################################
111
- def compare_candidate_with_company(resume_info, company_requirements):
112
- candidate_industry = resume_info.get("Expected Industry/Direction", "")
113
- candidate_keywords = set(candidate_industry.lower().split())
114
- company_keywords = set(company_requirements.lower().split())
115
- common = candidate_keywords.intersection(company_keywords)
116
- suitable = len(common) > 0
117
-
118
- # Check skills matching if available
119
- if resume_info.get("Skills"):
120
- candidate_skills = {skill.lower() for skill in resume_info["Skills"]}
121
- company_skills = set(company_requirements.lower().split())
122
- common_skills = candidate_skills.intersection(company_skills)
123
- if len(common_skills) >= 1:
124
- suitable = True
125
-
126
- return {
127
- "Common Keywords": list(common) if common else [],
128
- "Suitable": "Yes" if suitable else "No"
129
- }
130
-
131
  #####################################
132
  # Main Processing Logic
133
  #####################################
134
- def process_resume(file_obj, company_requirements):
135
  if file_obj is None:
136
- return None, None, None
137
 
138
  resume_text = extract_text_from_file(file_obj)
139
  resume_info = extract_resume_info(resume_text)
140
- comparison = compare_candidate_with_company(resume_info, company_requirements)
141
- return resume_text, resume_info, comparison
142
 
143
  #####################################
144
  # Streamlit UI
145
  #####################################
146
- st.title("Resume Extraction and Candidate Matching")
147
  st.markdown("""
148
- This app uses an image-to-text pipeline (powered by `YouLiXiya/tinyllava-v1.0-1.1b-hf` and PyTorch) to
149
- extract details from uploaded resume files (PDF or image formats). It then parses critical candidate
150
- information and compares it against company requirements.
151
  """)
152
 
153
  uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
154
- company_requirements = st.text_input("Enter Company Requirements/Criteria (e.g., industry, skills)",
155
- placeholder="Example: Technology, Python, Software Development")
156
 
157
- if st.button("Process Resume"):
158
  if uploaded_file is None:
159
  st.error("Please upload a file first.")
160
  else:
161
  with st.spinner("Processing..."):
162
- resume_text, resume_info, comparison = process_resume(uploaded_file, company_requirements)
163
  st.subheader("Extracted Resume Text")
164
  st.text_area("", resume_text, height=200)
165
  st.subheader("Parsed Resume Information")
166
- st.json(resume_info)
167
- st.subheader("Comparison with Company Requirements")
168
- st.json(comparison)
 
13
  # Load the OCR Pipeline (Uses Torch)
14
  #####################################
15
  try:
16
+ # Ensure your transformers library is updated (>=4.x)
17
  ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
18
  st.write("Model loaded successfully!")
19
  except Exception as e:
 
68
  "Expected Industry/Direction": None,
69
  }
70
 
71
+ # Extract name (e.g., "Name: John Doe")
72
  name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
73
  if name_match:
74
  info["Name"] = name_match.group(1).strip()
75
  else:
76
+ # Heuristic: pick the first sequence of capitalized words
77
  potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
78
  if potential_names:
79
  info["Name"] = potential_names[0]
 
92
  if exp_line:
93
  info["Job Experience"] = exp_line.group(2).strip()
94
 
95
+ # Extract skills (e.g., "Skills: Python, Java, SQL")
96
  skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
97
  if skills_match:
98
  skills_text = skills_match.group(1)
 
106
 
107
  return info
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  #####################################
110
  # Main Processing Logic
111
  #####################################
112
+ def process_resume(file_obj):
113
  if file_obj is None:
114
+ return None, None
115
 
116
  resume_text = extract_text_from_file(file_obj)
117
  resume_info = extract_resume_info(resume_text)
118
+ return resume_text, resume_info
 
119
 
120
  #####################################
121
  # Streamlit UI
122
  #####################################
123
+ st.title("Resume Extraction and Information Parsing")
124
  st.markdown("""
125
+ Upload a resume file (in PDF or image format) and the app will extract its text and parse critical candidate information.
 
 
126
  """)
127
 
128
  uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
 
 
129
 
130
+ if st.button("Extract Info"):
131
  if uploaded_file is None:
132
  st.error("Please upload a file first.")
133
  else:
134
  with st.spinner("Processing..."):
135
+ resume_text, resume_info = process_resume(uploaded_file)
136
  st.subheader("Extracted Resume Text")
137
  st.text_area("", resume_text, height=200)
138
  st.subheader("Parsed Resume Information")
139
+ st.json(resume_info)