File size: 5,321 Bytes
cf8a522
92f45fe
7716c5c
8e1d297
92f45fe
 
8e1d297
 
4c77f62
8e1d297
 
92f45fe
7716c5c
 
92f45fe
 
 
 
7716c5c
 
9753cc9
92f45fe
 
9753cc9
92f45fe
 
 
7716c5c
92f45fe
 
 
 
 
 
 
 
 
 
 
 
 
 
7716c5c
92f45fe
8e1d297
 
7716c5c
 
 
 
 
 
 
d836318
7716c5c
d836318
7716c5c
 
 
 
 
 
 
 
d836318
7716c5c
 
 
 
d836318
7716c5c
 
 
 
d836318
7716c5c
 
 
 
d836318
7716c5c
 
 
 
d836318
7716c5c
 
 
 
d836318
7716c5c
 
 
 
 
 
d836318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7716c5c
 
8e1d297
6088e9d
8e1d297
6088e9d
d836318
7716c5c
8e1d297
d836318
7716c5c
d836318
 
 
8e1d297
 
586dcd2
8e1d297
d836318
7716c5c
d836318
 
7716c5c
8e1d297
7716c5c
8e1d297
d836318
8e1d297
9753cc9
8e1d297
d836318
 
7716c5c
d836318
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import tempfile
import re
import streamlit as st
import docx
import textract

#####################################
# Function: Extract Text from File
#####################################
def extract_text_from_file(file_obj):
    """
    Extract text from .doc and .docx files.
    Returns the extracted text or an error message if extraction fails.
    """
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""

    if ext == ".docx":
        try:
            document = docx.Document(file_obj)
            text = "\n".join([para.text for para in document.paragraphs])
        except Exception as e:
            text = f"Error processing DOCX file: {e}"
    elif ext == ".doc":
        try:
            # textract requires a filename, so create a temporary file.
            with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
                tmp.write(file_obj.read())
                tmp.flush()
                tmp_filename = tmp.name
            text = textract.process(tmp_filename).decode("utf-8")
        except Exception as e:
            text = f"Error processing DOC file: {e}"
        finally:
            try:
                os.remove(tmp_filename)
            except Exception:
                pass
    else:
        text = "Unsupported file type."

    return text

#####################################
# Function: Extract Basic Resume Information
#####################################
def extract_basic_resume_info(text):
    """
    Parse the extracted text to summarize basic info:
    - Name
    - Age
    - Work Experience
    - Expected Industry/Direction
    Returns a dictionary of extracted data.
    """
    info = {
        "Name": None,
        "Age": None,
        "Work Experience": None,
        "Expected Industry/Direction": None,
    }

    # Extract Name (e.g., "Name: John Doe")
    name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
    if name_match:
        info["Name"] = name_match.group(1).strip()
    else:
        # Heuristic: search for a line with two or three capitalized words.
        potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
        if potential_names:
            info["Name"] = potential_names[0]

    # Extract Age (e.g., "Age: 28")
    age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,2})", text)
    if age_match:
        info["Age"] = age_match.group(1)

    # Extract Work Experience (e.g., "5 years of experience")
    exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
    if exp_match:
        info["Work Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
    else:
        # Fallback: look for overall experience information.
        exp_line = re.search(r"(Experience|Background)[:\-]\s*(.*)", text, re.IGNORECASE)
        if exp_line:
            info["Work Experience"] = exp_line.group(2).strip()

    # Extract Expected Industry/Direction
    industry_match = re.search(r"(Industry|Interest|Direction)[:\-]\s*(.+)", text, re.IGNORECASE)
    if industry_match:
        info["Expected Industry/Direction"] = industry_match.group(2).strip()

    return info

#####################################
# Function: Summarize Basic Info into a Paragraph
#####################################
def summarize_basic_info(info):
    """
    Create a paragraph summary from the basic resume information.
    """
    parts = []
    if info.get("Name"):
        parts.append(f"{info['Name']}")
    if info.get("Age"):
        parts.append(f"aged {info['Age']}")
    if info.get("Work Experience"):
        parts.append(f"with {info['Work Experience']} of work experience")
    if info.get("Expected Industry/Direction"):
        parts.append(f"seeking opportunities in {info['Expected Industry/Direction']}")

    if parts:
        summary_paragraph = "The candidate is " + ", ".join(parts) + "."
    else:
        summary_paragraph = "Basic information could not be extracted from the resume."
    return summary_paragraph

#####################################
# Main Resume Processing Logic
#####################################
def process_resume(file_obj):
    if file_obj is None:
        return None, None
    
    # Extract text content from the file.
    resume_text = extract_text_from_file(file_obj)
    # Extract and summarize basic info.
    basic_info = extract_basic_resume_info(resume_text)
    summary_paragraph = summarize_basic_info(basic_info)
    
    return resume_text, summary_paragraph

#####################################
# Streamlit Interface
#####################################
st.title("Resume Basic Info Summary")
st.markdown("""
Upload your resume file in **.doc** or **.docx** format. The app will extract the content and generate a summary paragraph
that highlights the candidate's basic information (name, age, work experience, and expected industry/direction).
""")

uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])

if st.button("Process Resume"):
    if uploaded_file is None:
        st.error("Please upload a file first.")
    else:
        with st.spinner("Processing resume..."):
            resume_text, summary_paragraph = process_resume(uploaded_file)

        st.subheader("Summary of Basic Information")
        st.markdown(summary_paragraph)