Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ from pdf2image import convert_from_bytes
|
|
13 |
# Load the OCR Pipeline (Uses Torch)
|
14 |
#####################################
|
15 |
try:
|
16 |
-
#
|
17 |
ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
|
18 |
st.write("Model loaded successfully!")
|
19 |
except Exception as e:
|
@@ -68,11 +68,12 @@ def extract_resume_info(text):
|
|
68 |
"Expected Industry/Direction": None,
|
69 |
}
|
70 |
|
71 |
-
# Extract name
|
72 |
name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
|
73 |
if name_match:
|
74 |
info["Name"] = name_match.group(1).strip()
|
75 |
else:
|
|
|
76 |
potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
|
77 |
if potential_names:
|
78 |
info["Name"] = potential_names[0]
|
@@ -91,7 +92,7 @@ def extract_resume_info(text):
|
|
91 |
if exp_line:
|
92 |
info["Job Experience"] = exp_line.group(2).strip()
|
93 |
|
94 |
-
# Extract skills
|
95 |
skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
|
96 |
if skills_match:
|
97 |
skills_text = skills_match.group(1)
|
@@ -105,64 +106,34 @@ def extract_resume_info(text):
|
|
105 |
|
106 |
return info
|
107 |
|
108 |
-
#####################################
|
109 |
-
# Candidate Comparison Function
|
110 |
-
#####################################
|
111 |
-
def compare_candidate_with_company(resume_info, company_requirements):
|
112 |
-
candidate_industry = resume_info.get("Expected Industry/Direction", "")
|
113 |
-
candidate_keywords = set(candidate_industry.lower().split())
|
114 |
-
company_keywords = set(company_requirements.lower().split())
|
115 |
-
common = candidate_keywords.intersection(company_keywords)
|
116 |
-
suitable = len(common) > 0
|
117 |
-
|
118 |
-
# Check skills matching if available
|
119 |
-
if resume_info.get("Skills"):
|
120 |
-
candidate_skills = {skill.lower() for skill in resume_info["Skills"]}
|
121 |
-
company_skills = set(company_requirements.lower().split())
|
122 |
-
common_skills = candidate_skills.intersection(company_skills)
|
123 |
-
if len(common_skills) >= 1:
|
124 |
-
suitable = True
|
125 |
-
|
126 |
-
return {
|
127 |
-
"Common Keywords": list(common) if common else [],
|
128 |
-
"Suitable": "Yes" if suitable else "No"
|
129 |
-
}
|
130 |
-
|
131 |
#####################################
|
132 |
# Main Processing Logic
|
133 |
#####################################
|
134 |
-
def process_resume(file_obj
|
135 |
if file_obj is None:
|
136 |
-
return None, None
|
137 |
|
138 |
resume_text = extract_text_from_file(file_obj)
|
139 |
resume_info = extract_resume_info(resume_text)
|
140 |
-
|
141 |
-
return resume_text, resume_info, comparison
|
142 |
|
143 |
#####################################
|
144 |
# Streamlit UI
|
145 |
#####################################
|
146 |
-
st.title("Resume Extraction and
|
147 |
st.markdown("""
|
148 |
-
|
149 |
-
extract details from uploaded resume files (PDF or image formats). It then parses critical candidate
|
150 |
-
information and compares it against company requirements.
|
151 |
""")
|
152 |
|
153 |
uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
|
154 |
-
company_requirements = st.text_input("Enter Company Requirements/Criteria (e.g., industry, skills)",
|
155 |
-
placeholder="Example: Technology, Python, Software Development")
|
156 |
|
157 |
-
if st.button("
|
158 |
if uploaded_file is None:
|
159 |
st.error("Please upload a file first.")
|
160 |
else:
|
161 |
with st.spinner("Processing..."):
|
162 |
-
resume_text, resume_info
|
163 |
st.subheader("Extracted Resume Text")
|
164 |
st.text_area("", resume_text, height=200)
|
165 |
st.subheader("Parsed Resume Information")
|
166 |
-
st.json(resume_info)
|
167 |
-
st.subheader("Comparison with Company Requirements")
|
168 |
-
st.json(comparison)
|
|
|
13 |
# Load the OCR Pipeline (Uses Torch)
|
14 |
#####################################
|
15 |
try:
|
16 |
+
# Ensure your transformers library is updated (>=4.x)
|
17 |
ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
|
18 |
st.write("Model loaded successfully!")
|
19 |
except Exception as e:
|
|
|
68 |
"Expected Industry/Direction": None,
|
69 |
}
|
70 |
|
71 |
+
# Extract name (e.g., "Name: John Doe")
|
72 |
name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
|
73 |
if name_match:
|
74 |
info["Name"] = name_match.group(1).strip()
|
75 |
else:
|
76 |
+
# Heuristic: pick the first sequence of capitalized words
|
77 |
potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
|
78 |
if potential_names:
|
79 |
info["Name"] = potential_names[0]
|
|
|
92 |
if exp_line:
|
93 |
info["Job Experience"] = exp_line.group(2).strip()
|
94 |
|
95 |
+
# Extract skills (e.g., "Skills: Python, Java, SQL")
|
96 |
skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
|
97 |
if skills_match:
|
98 |
skills_text = skills_match.group(1)
|
|
|
106 |
|
107 |
return info
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
#####################################
|
110 |
# Main Processing Logic
|
111 |
#####################################
|
112 |
+
def process_resume(file_obj):
|
113 |
if file_obj is None:
|
114 |
+
return None, None
|
115 |
|
116 |
resume_text = extract_text_from_file(file_obj)
|
117 |
resume_info = extract_resume_info(resume_text)
|
118 |
+
return resume_text, resume_info
|
|
|
119 |
|
120 |
#####################################
|
121 |
# Streamlit UI
|
122 |
#####################################
|
123 |
+
st.title("Resume Extraction and Information Parsing")
|
124 |
st.markdown("""
|
125 |
+
Upload a resume file (in PDF or image format) and the app will extract its text and parse critical candidate information.
|
|
|
|
|
126 |
""")
|
127 |
|
128 |
uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
|
|
|
|
|
129 |
|
130 |
+
if st.button("Extract Info"):
|
131 |
if uploaded_file is None:
|
132 |
st.error("Please upload a file first.")
|
133 |
else:
|
134 |
with st.spinner("Processing..."):
|
135 |
+
resume_text, resume_info = process_resume(uploaded_file)
|
136 |
st.subheader("Extracted Resume Text")
|
137 |
st.text_area("", resume_text, height=200)
|
138 |
st.subheader("Parsed Resume Information")
|
139 |
+
st.json(resume_info)
|
|
|
|