manasvinid commited on
Commit
4b01813
·
verified ·
1 Parent(s): 0b3264d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -1
app.py CHANGED
@@ -25,7 +25,7 @@ st.write("""
25
 
26
  st.header('Input')
27
  jobs_data= job_desc_pdf()
28
- resume_df= resume_pdf()
29
 
30
 
31
  setup_nltk_resources()
@@ -40,3 +40,30 @@ jobs_data['processed_description'] = jobs_data['description'].apply(preprocess_t
40
  jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description')
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  st.header('Input')
27
  jobs_data= job_desc_pdf()
28
+ resume_data= resume_pdf()
29
 
30
 
31
  setup_nltk_resources()
 
40
  jobs_data_cleaned = drop_duplicates(jobs_data, column_name='description')
41
 
42
 
43
+ resume_data['processed_resume'] = resume_data['Resume'].apply(preprocess_text)
44
+ resume_data_cleaned = drop_duplicates(resume_data, column_name='Resume')
45
+
46
+ jobs_data_cleaned_with_tokens = add_token_count_column(jobs_data_cleaned, column_name='processed_description')
47
+ resume_data_cleaned_with_tokens = add_token_count_column(resume_data_cleaned, column_name='processed_resume')
48
+
49
+ # Dropping unnecessary columns from jobs data
50
+ jobs_data_final = jobs_data_cleaned_with_tokens[['processed_description', 'token_count']]
51
+
52
+ # Dropping unnecessary columns from resume data
53
+ resume_data_final = resume_data_cleaned_with_tokens[['processed_resume', 'token_count']]
54
+
55
+
56
+ summarizer = TextSummarizer("geekradius/bart-large-cnn-fintetuned-samsum-repo")
57
+
58
+ # Summarize the top 100 'processed_description' of jobs_data_final
59
+ top_jobs_data = jobs_data_final.head(100)
60
+
61
+ # Summariz jobs description
62
+ jobs_data_summarized = batch_summarize(top_jobs_data, 'processed_description', summarizer, batch_size=10, output_col='summarized_description')
63
+
64
+ # Summarize all 'processed_resume' in resume_data_final
65
+ resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume')
66
+
67
+
68
+
69
+