File size: 4,646 Bytes
cf8a522
92f45fe
7716c5c
8e1d297
92f45fe
 
8e1d297
 
4c77f62
8e1d297
 
92f45fe
7716c5c
 
92f45fe
 
 
 
7716c5c
 
9753cc9
92f45fe
 
9753cc9
92f45fe
 
 
7716c5c
92f45fe
 
 
 
 
 
 
 
 
 
 
 
 
 
7716c5c
92f45fe
8e1d297
 
7716c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e1d297
6088e9d
8e1d297
6088e9d
7716c5c
8e1d297
7716c5c
 
 
8e1d297
 
586dcd2
8e1d297
7716c5c
 
 
 
 
8e1d297
7716c5c
8e1d297
7716c5c
8e1d297
9753cc9
8e1d297
7716c5c
 
 
8e1d297
7716c5c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import tempfile
import re
import streamlit as st
import docx
import textract

#####################################
# Function: Extract Text from File
#####################################
def extract_text_from_file(file_obj):
    """
    Extract text from .doc and .docx files.
    Returns the extracted text or an error message if extraction fails.
    """
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""

    if ext == ".docx":
        try:
            document = docx.Document(file_obj)
            text = "\n".join([para.text for para in document.paragraphs])
        except Exception as e:
            text = f"Error processing DOCX file: {e}"
    elif ext == ".doc":
        try:
            # textract requires a filename, so create a temporary file.
            with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
                tmp.write(file_obj.read())
                tmp.flush()
                tmp_filename = tmp.name
            text = textract.process(tmp_filename).decode("utf-8")
        except Exception as e:
            text = f"Error processing DOC file: {e}"
        finally:
            try:
                os.remove(tmp_filename)
            except Exception:
                pass
    else:
        text = "Unsupported file type."

    return text

#####################################
# Function: Extract Basic Resume Information
#####################################
def extract_basic_resume_info(text):
    """
    Parse the extracted text to summarize basic info:
    - Name
    - Age
    - Work Experience (e.g., number of years or description)
    - Expected Industry/Direction
    """
    info = {
        "Name": None,
        "Age": None,
        "Work Experience": None,
        "Expected Industry/Direction": None,
    }

    # Try to extract Name (e.g., lines like "Name: John Doe")
    name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
    if name_match:
        info["Name"] = name_match.group(1).strip()
    else:
        # Fallback: Look for a potential name (heuristic: two or three capitalized words)
        potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
        if potential_names:
            info["Name"] = potential_names[0]

    # Extract Age (assuming a line like "Age: 28")
    age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,2})", text)
    if age_match:
        info["Age"] = age_match.group(1)

    # Extract Work Experience (e.g., "5 years of experience" or "Experience: 5 years in...")
    exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
    if exp_match:
        info["Work Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
    else:
        # Look for a line that has work experience info
        exp_line = re.search(r"(Experience|Background)[:\-]\s*(.*)", text, re.IGNORECASE)
        if exp_line:
            info["Work Experience"] = exp_line.group(2).strip()

    # Extract Expected Industry/Direction 
    # (e.g., "Interest: Software Development" or "Expected Industry: Healthcare")
    industry_match = re.search(r"(Industry|Interest|Direction)[:\-]\s*(.+)", text, re.IGNORECASE)
    if industry_match:
        info["Expected Industry/Direction"] = industry_match.group(2).strip()

    return info

#####################################
# Main Resume Processing Logic
#####################################
def process_resume(file_obj):
    if file_obj is None:
        return None, None
    # Extract text content from the file.
    resume_text = extract_text_from_file(file_obj)
    # Extract summarized basic info from the resume text.
    basic_info = extract_basic_resume_info(resume_text)
    return resume_text, basic_info

#####################################
# Streamlit Interface
#####################################
st.title("Resume Summary App")
st.markdown("""
Upload your resume file (supported formats: **.doc** or **.docx**) to extract and summarize its content.
The basic details, including name, age, work experience, and expected industry, will be displayed along with the full text content.
""")

uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])

if st.button("Extract Information"):
    if uploaded_file is None:
        st.error("Please upload a file first.")
    else:
        with st.spinner("Extracting information..."):
            resume_text, resume_info = process_resume(uploaded_file)

        st.subheader("Extracted Resume Text")
        st.text_area("", resume_text, height=300)

        st.subheader("Basic Resume Information")
        st.json(resume_info)