billyxx commited on
Commit
bb27315
·
verified ·
1 Parent(s): 829e202

Upload 20 files

Browse files
__pycache__/recommender.cpython-312.pyc ADDED
Binary file (4.04 kB). View file
 
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import pdfplumber
4
+ from recommender import rank_resumes, summarize_resume_flan, extract_applicant_name
5
+
6
+ UPLOAD_FOLDER = "uploads"
7
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
8
+
9
+ def process_resumes(job_description, uploaded_files):
10
+ if not job_description.strip():
11
+ return "Please provide a job description.", None
12
+
13
+ # Save uploaded files
14
+ for file in uploaded_files:
15
+ filepath = os.path.join(UPLOAD_FOLDER, file.name)
16
+ with open(filepath, "wb") as f:
17
+ f.write(file.read())
18
+
19
+ # Read resumes
20
+ resume_texts = []
21
+ for filename in os.listdir(UPLOAD_FOLDER):
22
+ filepath = os.path.join(UPLOAD_FOLDER, filename)
23
+ if filename.endswith(".txt"):
24
+ with open(filepath, "r", encoding="utf-8") as f:
25
+ text = f.read()
26
+ elif filename.endswith(".pdf"):
27
+ with pdfplumber.open(filepath) as pdf:
28
+ pages = [page.extract_text() for page in pdf.pages]
29
+ text = "\n".join(pages)
30
+ else:
31
+ continue
32
+
33
+ resume_texts.append((filename, text))
34
+
35
+ if not resume_texts:
36
+ return "No valid resumes found. Please upload .txt or .pdf files.", None
37
+
38
+ # Rank resumes
39
+ results = rank_resumes(job_description, resume_texts)
40
+
41
+ # Generate summaries
42
+ for candidate in results:
43
+ candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
44
+
45
+ # Prepare table
46
+ table_data = [
47
+ [candidate.get("applicant_name", extract_applicant_name(candidate["text"])),
48
+ candidate["filename"],
49
+ f"{candidate['score']:.4f}",
50
+ candidate["summary"]]
51
+ for candidate in results
52
+ ]
53
+
54
+ return "", table_data
55
+
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("## Candidate Recommendation Engine")
58
+ with gr.Row():
59
+ job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste job description here...")
60
+ resumes = gr.File(label="Upload Resumes (.txt or .pdf)", file_types=[".txt", ".pdf"], file_types_display="extensions", file_types_multiple=True)
61
+ btn = gr.Button("Rank Candidates")
62
+
63
+ msg = gr.Markdown()
64
+ output_table = gr.Dataframe(headers=["Candidate", "File Name", "Similarity Score", "Why a Good Fit"], wrap=True)
65
+
66
+ btn.click(process_resumes, inputs=[job_desc, resumes], outputs=[msg, output_table])
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch()
recommender.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import torch
5
+
6
+ #Embedding Model
7
+ embedder = SentenceTransformer("all-mpnet-base-v2")
8
+
9
+ #Summarization Model
10
+ model_name = "MBZUAI/LaMini-Flan-T5-248M"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
13
+
14
+
15
+ device = torch.device("cpu")
16
+ model.to(device)
17
+
18
+
19
+ def extract_key_sections(resume_text):
20
+ sections = {"education": [], "experience": [], "skills": [], "projects": []}
21
+ lines = resume_text.splitlines()
22
+ current = None
23
+
24
+ for line in lines:
25
+ line = line.strip()
26
+ if not line:
27
+ continue
28
+
29
+ l = line.lower()
30
+ if "education" in l:
31
+ current = "education"
32
+ elif "experience" in l or "work history" in l:
33
+ current = "experience"
34
+ elif "skills" in l:
35
+ current = "skills"
36
+ elif "projects" in l or "certifications" in l:
37
+ current = "projects"
38
+ elif current:
39
+ sections[current].append(line)
40
+
41
+ return sections
42
+
43
+ def extract_applicant_name(resume_text, filename):
44
+
45
+ # first 3 lines
46
+ lines = resume_text.strip().split("\n")[:3]
47
+ possible_name = None
48
+
49
+ for line in lines:
50
+ clean_line = line.strip()
51
+ if clean_line and 2 <= len(clean_line.split()) <= 4:
52
+
53
+ possible_name = clean_line
54
+ break
55
+
56
+ if possible_name:
57
+ return possible_name
58
+ return filename.rsplit(".", 1)[0] #fallback to filename if name not found.
59
+
60
+
61
+ def rank_resumes(job_description, resume_texts):
62
+ if not resume_texts:
63
+ return []
64
+
65
+ texts = [job_description] + [text for _, text in resume_texts]
66
+ embeddings = embedder.encode(texts)
67
+ job_embedding = embeddings[0].reshape(1, -1)
68
+ resume_embeddings = embeddings[1:]
69
+
70
+ similarities = cosine_similarity(job_embedding, resume_embeddings)[0]
71
+
72
+ results = []
73
+ for (filename, resume_text), sim in zip(resume_texts, similarities):
74
+ applicant_name = extract_applicant_name(resume_text, filename)
75
+ results.append({
76
+ "filename": filename,
77
+ "applicant_name": applicant_name,
78
+ "text": resume_text,
79
+ "score": round(float(sim), 4)
80
+ })
81
+
82
+ results.sort(key=lambda x: x["score"], reverse=True)
83
+ return results[:4]
84
+
85
+
86
+ # ===== Summarization =====
87
+ def summarize_resume_flan(resume_text, job_description):
88
+ prompt = f"""
89
+ Summarize this resume in 3 bullet points, focusing on skills and experience relevant to the job description.
90
+
91
+ Job Description:
92
+ {job_description}
93
+
94
+ Resume:
95
+ {resume_text}
96
+ """
97
+
98
+ inputs = tokenizer(
99
+ prompt,
100
+ return_tensors="pt",
101
+ truncation=True,
102
+ max_length=512
103
+ ).to(device)
104
+
105
+ outputs = model.generate(
106
+ **inputs,
107
+ max_length=200,
108
+ num_beams=4,
109
+ early_stopping=True
110
+ )
111
+
112
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ flask
3
+ sentence-transformers
4
+ scikit-learn
5
+ transformers
6
+ accelerate
7
+ torch
8
+ pdfplumber
9
+
uploads/.DS_Store ADDED
Binary file (6.15 kB). View file
 
uploads/res1.pdf ADDED
Binary file (29.7 kB). View file
 
uploads/res10.pdf ADDED
Binary file (32.8 kB). View file
 
uploads/res11.pdf ADDED
Binary file (33.2 kB). View file
 
uploads/res12.pdf ADDED
Binary file (33.2 kB). View file
 
uploads/res13.pdf ADDED
Binary file (32.1 kB). View file
 
uploads/res14.pdf ADDED
Binary file (32.5 kB). View file
 
uploads/res15.pdf ADDED
Binary file (31.8 kB). View file
 
uploads/res2.pdf ADDED
Binary file (30.2 kB). View file
 
uploads/res3.pdf ADDED
Binary file (23.7 kB). View file
 
uploads/res4.pdf ADDED
Binary file (17.9 kB). View file
 
uploads/res5.pdf ADDED
Binary file (32.3 kB). View file
 
uploads/res6.pdf ADDED
Binary file (33.2 kB). View file
 
uploads/res7.pdf ADDED
Binary file (34.6 kB). View file
 
uploads/res8.pdf ADDED
Binary file (32.5 kB). View file
 
uploads/res9.pdf ADDED
Binary file (33 kB). View file