Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 16

Commit

6088e9d

verified ·

1 Parent(s): 9b62bb7

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -41

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from pdf2image import convert_from_bytes
 # Load the OCR Pipeline (Uses Torch)
 #####################################
 try:
-    # Make sure that you're using an updated version of the transformers library (>=4.x)
     ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
     st.write("Model loaded successfully!")
 except Exception as e:
@@ -68,11 +68,12 @@ def extract_resume_info(text):
         "Expected Industry/Direction": None,
     }
-    # Extract name, e.g., "Name: John Doe"
     name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
     if name_match:
         info["Name"] = name_match.group(1).strip()
     else:
         potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
         if potential_names:
             info["Name"] = potential_names[0]
@@ -91,7 +92,7 @@ def extract_resume_info(text):
         if exp_line:
             info["Job Experience"] = exp_line.group(2).strip()
-    # Extract skills
     skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
     if skills_match:
         skills_text = skills_match.group(1)
@@ -105,64 +106,34 @@ def extract_resume_info(text):
     return info
-#####################################
-# Candidate Comparison Function
-#####################################
-def compare_candidate_with_company(resume_info, company_requirements):
-    candidate_industry = resume_info.get("Expected Industry/Direction", "")
-    candidate_keywords = set(candidate_industry.lower().split())
-    company_keywords = set(company_requirements.lower().split())
-    common = candidate_keywords.intersection(company_keywords)
-    suitable = len(common) > 0
-    # Check skills matching if available
-    if resume_info.get("Skills"):
-        candidate_skills = {skill.lower() for skill in resume_info["Skills"]}
-        company_skills = set(company_requirements.lower().split())
-        common_skills = candidate_skills.intersection(company_skills)
-        if len(common_skills) >= 1:
-            suitable = True
-    return {
-        "Common Keywords": list(common) if common else [],
-        "Suitable": "Yes" if suitable else "No"
-    }
 #####################################
 # Main Processing Logic
 #####################################
-def process_resume(file_obj, company_requirements):
     if file_obj is None:
-        return None, None, None
     resume_text = extract_text_from_file(file_obj)
     resume_info = extract_resume_info(resume_text)
-    comparison = compare_candidate_with_company(resume_info, company_requirements)
-    return resume_text, resume_info, comparison
 #####################################
 # Streamlit UI
 #####################################
-st.title("Resume Extraction and Candidate Matching")
 st.markdown("""
-This app uses an image-to-text pipeline (powered by `YouLiXiya/tinyllava-v1.0-1.1b-hf` and PyTorch) to
-extract details from uploaded resume files (PDF or image formats). It then parses critical candidate
-information and compares it against company requirements.
 """)
 uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
-company_requirements = st.text_input("Enter Company Requirements/Criteria (e.g., industry, skills)",
-                                     placeholder="Example: Technology, Python, Software Development")
-if st.button("Process Resume"):
     if uploaded_file is None:
         st.error("Please upload a file first.")
     else:
         with st.spinner("Processing..."):
-            resume_text, resume_info, comparison = process_resume(uploaded_file, company_requirements)
         st.subheader("Extracted Resume Text")
         st.text_area("", resume_text, height=200)
         st.subheader("Parsed Resume Information")
-        st.json(resume_info)
-        st.subheader("Comparison with Company Requirements")
-        st.json(comparison)

 # Load the OCR Pipeline (Uses Torch)
 #####################################
 try:
+    # Ensure your transformers library is updated (>=4.x)
     ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
     st.write("Model loaded successfully!")
 except Exception as e:
         "Expected Industry/Direction": None,
     }
+    # Extract name (e.g., "Name: John Doe")
     name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
     if name_match:
         info["Name"] = name_match.group(1).strip()
     else:
+        # Heuristic: pick the first sequence of capitalized words
         potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', text)
         if potential_names:
             info["Name"] = potential_names[0]
         if exp_line:
             info["Job Experience"] = exp_line.group(2).strip()
+    # Extract skills (e.g., "Skills: Python, Java, SQL")
     skills_match = re.search(r"[Ss]kills[:\-]\s*(.+)", text)
     if skills_match:
         skills_text = skills_match.group(1)
     return info
 #####################################
 # Main Processing Logic
 #####################################
+def process_resume(file_obj):
     if file_obj is None:
+        return None, None
     resume_text = extract_text_from_file(file_obj)
     resume_info = extract_resume_info(resume_text)
+    return resume_text, resume_info
 #####################################
 # Streamlit UI
 #####################################
+st.title("Resume Extraction and Information Parsing")
 st.markdown("""
+Upload a resume file (in PDF or image format) and the app will extract its text and parse critical candidate information.
 """)
 uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
+if st.button("Extract Info"):
     if uploaded_file is None:
         st.error("Please upload a file first.")
     else:
         with st.spinner("Processing..."):
+            resume_text, resume_info = process_resume(uploaded_file)
         st.subheader("Extracted Resume Text")
         st.text_area("", resume_text, height=200)
         st.subheader("Parsed Resume Information")
+        st.json(resume_info)