athulnambiar commited on
Commit
af2aec4
·
verified ·
1 Parent(s): 6938be6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -12
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
- from sklearn.metrics.pairwise import cosine_similarity
 
5
  import re
6
  from PyPDF2 import PdfReader
7
 
@@ -22,35 +24,44 @@ def clean_text(text):
22
  text = re.sub(r'\W', ' ', text)
23
  return text.lower()
24
 
25
- def calculate_cosine_similarity(resumes, keywords):
26
  tfidf_vectorizer = TfidfVectorizer()
27
  tfidf_matrix = tfidf_vectorizer.fit_transform(resumes + [keywords])
28
- cosine_sim = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
29
- return cosine_sim.flatten()
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  st.title("Resume Analyzer")
32
 
33
  st.sidebar.subheader("Enter Keywords and Priority")
34
-
35
  data = pd.DataFrame({
36
  'Keyword': ['']*10,
37
  'Priority': ['']*10
38
  })
39
-
40
  keywords_df = st.sidebar.data_editor(data, num_rows="dynamic", key="keyword_table")
41
 
42
  if not keywords_df['Keyword'].isnull().all():
43
  keywords_combined = " ".join(keywords_df.apply(lambda row: f"{row['Keyword']} " * int(row['Priority']) if row['Priority'].isdigit() else row['Keyword'], axis=1))
44
-
45
  st.subheader("Upload up to 5 resumes (PDF or Text files)")
46
  uploaded_files = st.file_uploader("Choose Resume Files", accept_multiple_files=True, type=["txt", "pdf"])
47
-
48
  if len(uploaded_files) > 0 and keywords_combined:
49
  with st.spinner("Analyzing Resumes..."):
50
  resumes = []
51
  for file in uploaded_files:
52
  try:
53
-
54
  resume_text = extract_text_from_file(file)
55
  clean_resume = clean_text(resume_text)
56
  resumes.append(clean_resume)
@@ -59,13 +70,26 @@ if not keywords_df['Keyword'].isnull().all():
59
 
60
  clean_keywords = clean_text(keywords_combined)
61
 
62
- scores = calculate_cosine_similarity(resumes, clean_keywords)
63
 
64
  st.subheader("Resume Analysis Results")
65
  results_df = pd.DataFrame({
66
  'Resume': [file.name for file in uploaded_files],
67
- 'Similarity Score': scores
 
 
68
  })
 
 
 
 
 
 
 
 
 
 
 
69
  st.dataframe(results_df)
70
  else:
71
- st.info("Please upload resumes and enter keywords with priority.")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
6
+ from sklearn.preprocessing import MinMaxScaler
7
  import re
8
  from PyPDF2 import PdfReader
9
 
 
24
  text = re.sub(r'\W', ' ', text)
25
  return text.lower()
26
 
27
+ def calculate_similarity_metrics(resumes, keywords):
28
  tfidf_vectorizer = TfidfVectorizer()
29
  tfidf_matrix = tfidf_vectorizer.fit_transform(resumes + [keywords])
30
+
31
+ cosine_sim = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
32
+
33
+ def jaccard_similarity(doc1, doc2):
34
+ set1 = set(doc1.split())
35
+ set2 = set(doc2.split())
36
+ return len(set1.intersection(set2)) / len(set1.union(set2))
37
+
38
+ jaccard_sim = [jaccard_similarity(keywords, resume) for resume in resumes]
39
+
40
+ euclidean_dist = euclidean_distances(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
41
+ euclidean_sim = 1 / (1 + euclidean_dist)
42
+
43
+ return cosine_sim, jaccard_sim, euclidean_sim
44
 
45
  st.title("Resume Analyzer")
46
 
47
  st.sidebar.subheader("Enter Keywords and Priority")
 
48
  data = pd.DataFrame({
49
  'Keyword': ['']*10,
50
  'Priority': ['']*10
51
  })
 
52
  keywords_df = st.sidebar.data_editor(data, num_rows="dynamic", key="keyword_table")
53
 
54
  if not keywords_df['Keyword'].isnull().all():
55
  keywords_combined = " ".join(keywords_df.apply(lambda row: f"{row['Keyword']} " * int(row['Priority']) if row['Priority'].isdigit() else row['Keyword'], axis=1))
56
+
57
  st.subheader("Upload up to 5 resumes (PDF or Text files)")
58
  uploaded_files = st.file_uploader("Choose Resume Files", accept_multiple_files=True, type=["txt", "pdf"])
59
+
60
  if len(uploaded_files) > 0 and keywords_combined:
61
  with st.spinner("Analyzing Resumes..."):
62
  resumes = []
63
  for file in uploaded_files:
64
  try:
 
65
  resume_text = extract_text_from_file(file)
66
  clean_resume = clean_text(resume_text)
67
  resumes.append(clean_resume)
 
70
 
71
  clean_keywords = clean_text(keywords_combined)
72
 
73
+ cosine_scores, jaccard_scores, euclidean_scores = calculate_similarity_metrics(resumes, clean_keywords)
74
 
75
  st.subheader("Resume Analysis Results")
76
  results_df = pd.DataFrame({
77
  'Resume': [file.name for file in uploaded_files],
78
+ 'Cosine Similarity': cosine_scores,
79
+ 'Jaccard Index': jaccard_scores,
80
+ 'Euclidean Similarity': euclidean_scores
81
  })
82
+
83
+ scaler = MinMaxScaler()
84
+ normalized_scores = scaler.fit_transform(results_df[['Cosine Similarity', 'Jaccard Index', 'Euclidean Similarity']])
85
+
86
+ overall_scores = np.mean(normalized_scores, axis=1)
87
+ results_df['Overall Score'] = overall_scores
88
+
89
+ results_df['Rank'] = results_df['Overall Score'].rank(ascending=False, method='min').astype(int)
90
+
91
+ results_df = results_df.sort_values('Rank')
92
+
93
  st.dataframe(results_df)
94
  else:
95
+ st.info("Please upload resumes and enter keywords with priority.")