File size: 2,112 Bytes
b0df9c3
 
 
 
8f2afd2
b0df9c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af5c171
0e2eef4
4b01813
0e2eef4
 
04ff643
0e2eef4
04ff643
 
 
 
0e2eef4
 
 
 
 
553973a
4b01813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import streamlit as st 
import pandas as pd

from functions import *



backgroundPattern = """
<style>
[data-testid="stAppViewContainer"] {
    background-color: #0E1117;
    opacity: 1;
    background-image: radial-gradient(#282C34 0.75px, #0E1117 0.75px);
    background-size: 15px 15px;
}
</style>
"""

st.markdown(backgroundPattern, unsafe_allow_html=True)

st.write("""
# Resume Screening & Classification
""")

st.header('Input')
jobs_data= job_desc_pdf()
resume_data= resume_pdf()


# setup_nltk_resources()

# # Unzip wordnet
# corpora_path = "/kaggle/working/nltk_data/corpora"
# wordnet_zip = os.path.join(corpora_path, "wordnet.zip")
# unzip_nltk_resource(wordnet_zip, corpora_path)

# Apply preprocessing
jobs_data['processed_description'] = jobs_data['description'].apply(preprocess_text)
jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description')


resume_data['processed_resume'] = resume_data['Resume'].apply(preprocess_text)
resume_data_cleaned = drop_duplicates(resume_data, column_name='Resume')

jobs_data_cleaned_with_tokens = add_token_count_column(jobs_data_cleaned, column_name='processed_description')
resume_data_cleaned_with_tokens = add_token_count_column(resume_data_cleaned, column_name='processed_resume')

# Dropping unnecessary columns from jobs data
jobs_data_final = jobs_data_cleaned_with_tokens[['processed_description', 'token_count']]

# Dropping unnecessary columns from resume data
resume_data_final = resume_data_cleaned_with_tokens[['processed_resume', 'token_count']]


summarizer = TextSummarizer("geekradius/bart-large-cnn-fintetuned-samsum-repo")

# Summarize the top 100 'processed_description' of jobs_data_final
top_jobs_data = jobs_data_final.head(100)

# Summariz jobs description
jobs_data_summarized = batch_summarize(top_jobs_data, 'processed_description', summarizer, batch_size=10, output_col='summarized_description')

# Summarize all 'processed_resume' in resume_data_final
resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume')