Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ st.write("""
|
|
25 |
|
26 |
st.header('Input')
|
27 |
jobs_data= job_desc_pdf()
|
28 |
-
|
29 |
|
30 |
|
31 |
setup_nltk_resources()
|
@@ -40,3 +40,30 @@ jobs_data['processed_description'] = jobs_data['description'].apply(preprocess_t
|
|
40 |
jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description')
|
41 |
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
st.header('Input')
|
27 |
jobs_data= job_desc_pdf()
|
28 |
+
resume_data= resume_pdf()
|
29 |
|
30 |
|
31 |
setup_nltk_resources()
|
|
|
40 |
jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description')
|
41 |
|
42 |
|
43 |
+
resume_data['processed_resume'] = resume_data['Resume'].apply(preprocess_text)
|
44 |
+
resume_data_cleaned = drop_duplicates(resume_data, column_name='Resume')
|
45 |
+
|
46 |
+
jobs_data_cleaned_with_tokens = add_token_count_column(jobs_data_cleaned, column_name='processed_description')
|
47 |
+
resume_data_cleaned_with_tokens = add_token_count_column(resume_data_cleaned, column_name='processed_resume')
|
48 |
+
|
49 |
+
# Dropping unnecessary columns from jobs data
|
50 |
+
jobs_data_final = jobs_data_cleaned_with_tokens[['processed_description', 'token_count']]
|
51 |
+
|
52 |
+
# Dropping unnecessary columns from resume data
|
53 |
+
resume_data_final = resume_data_cleaned_with_tokens[['processed_resume', 'token_count']]
|
54 |
+
|
55 |
+
|
56 |
+
summarizer = TextSummarizer("geekradius/bart-large-cnn-fintetuned-samsum-repo")
|
57 |
+
|
58 |
+
# Summarize the top 100 'processed_description' of jobs_data_final
|
59 |
+
top_jobs_data = jobs_data_final.head(100)
|
60 |
+
|
61 |
+
# Summariz jobs description
|
62 |
+
jobs_data_summarized = batch_summarize(top_jobs_data, 'processed_description', summarizer, batch_size=10, output_col='summarized_description')
|
63 |
+
|
64 |
+
# Summarize all 'processed_resume' in resume_data_final
|
65 |
+
resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume')
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
|