Spaces:
Sleeping
Sleeping
import pandas as pd | |
import streamlit as st | |
import pandas as pd | |
from functions import * | |
backgroundPattern = """ | |
<style> | |
[data-testid="stAppViewContainer"] { | |
background-color: #0E1117; | |
opacity: 1; | |
background-image: radial-gradient(#282C34 0.75px, #0E1117 0.75px); | |
background-size: 15px 15px; | |
} | |
</style> | |
""" | |
st.markdown(backgroundPattern, unsafe_allow_html=True) | |
st.write(""" | |
# Resume Screening & Classification | |
""") | |
st.header('Input') | |
jobs_data= job_desc_pdf() | |
resume_data= resume_pdf() | |
# setup_nltk_resources() | |
# # Unzip wordnet | |
# corpora_path = "/kaggle/working/nltk_data/corpora" | |
# wordnet_zip = os.path.join(corpora_path, "wordnet.zip") | |
# unzip_nltk_resource(wordnet_zip, corpora_path) | |
# Apply preprocessing | |
jobs_data['processed_description'] = jobs_data['description'].apply(preprocess_text) | |
jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description') | |
resume_data['processed_resume'] = resume_data['Resume'].apply(preprocess_text) | |
resume_data_cleaned = drop_duplicates(resume_data, column_name='Resume') | |
jobs_data_cleaned_with_tokens = add_token_count_column(jobs_data_cleaned, column_name='processed_description') | |
resume_data_cleaned_with_tokens = add_token_count_column(resume_data_cleaned, column_name='processed_resume') | |
# Dropping unnecessary columns from jobs data | |
jobs_data_final = jobs_data_cleaned_with_tokens[['processed_description', 'token_count']] | |
# Dropping unnecessary columns from resume data | |
resume_data_final = resume_data_cleaned_with_tokens[['processed_resume', 'token_count']] | |
summarizer = TextSummarizer("geekradius/bart-large-cnn-fintetuned-samsum-repo") | |
# Summarize the top 100 'processed_description' of jobs_data_final | |
top_jobs_data = jobs_data_final.head(100) | |
# Summariz jobs description | |
jobs_data_summarized = batch_summarize(top_jobs_data, 'processed_description', summarizer, batch_size=10, output_col='summarized_description') | |
# Summarize all 'processed_resume' in resume_data_final | |
resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume') | |