Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
import pdfplumber
|
4 |
from recommender import rank_resumes, summarize_resume_flan, extract_applicant_name
|
5 |
from docx import Document
|
6 |
|
7 |
-
|
8 |
UPLOAD_FOLDER = "uploads"
|
9 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
10 |
|
@@ -17,45 +15,37 @@ def process_resumes(job_description, uploaded_files):
|
|
17 |
|
18 |
for uploaded_file in uploaded_files:
|
19 |
filename = getattr(uploaded_file, "name", None)
|
20 |
-
|
21 |
if filename is None:
|
22 |
return "One of the uploaded files is missing a filename. Please upload files, not text.", None
|
23 |
|
24 |
ext = filename.lower().split(".")[-1]
|
25 |
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
content
|
31 |
-
|
32 |
-
|
33 |
|
34 |
-
#
|
35 |
if ext == "txt":
|
36 |
-
|
|
|
37 |
|
38 |
elif ext == "pdf":
|
39 |
-
temp_path = os.path.join(UPLOAD_FOLDER, filename)
|
40 |
-
with open(temp_path, "wb") as f:
|
41 |
-
if isinstance(content, bytes):
|
42 |
-
f.write(content)
|
43 |
-
else:
|
44 |
-
f.write(content.encode("utf-8"))
|
45 |
import pdfplumber
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
elif ext == "docx":
|
51 |
-
|
52 |
-
with open(temp_path, "wb") as f:
|
53 |
-
if isinstance(content, bytes):
|
54 |
-
f.write(content)
|
55 |
-
else:
|
56 |
-
f.write(content.encode("utf-8"))
|
57 |
-
from docx import Document
|
58 |
-
doc = Document(temp_path)
|
59 |
text = "\n".join([p.text for p in doc.paragraphs])
|
60 |
|
61 |
else:
|
@@ -63,10 +53,9 @@ def process_resumes(job_description, uploaded_files):
|
|
63 |
|
64 |
resume_texts.append((filename, text))
|
65 |
|
66 |
-
# Rank resumes
|
67 |
results = rank_resumes(job_description, resume_texts)
|
68 |
|
69 |
-
# Generate summaries
|
70 |
for candidate in results:
|
71 |
candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
|
72 |
|
@@ -81,26 +70,15 @@ def process_resumes(job_description, uploaded_files):
|
|
81 |
|
82 |
return "", table_data
|
83 |
|
84 |
-
def extract_text_from_docx(filepath):
|
85 |
-
doc = Document(filepath)
|
86 |
-
full_text = []
|
87 |
-
for para in doc.paragraphs:
|
88 |
-
full_text.append(para.text)
|
89 |
-
return "\n".join(full_text)
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
|
95 |
with gr.Blocks() as demo:
|
96 |
gr.Markdown("## Candidate Recommendation Engine")
|
97 |
with gr.Row():
|
98 |
job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste job description here...")
|
99 |
-
|
100 |
resumes = gr.Files(label="Upload Resumes (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"])
|
101 |
|
102 |
btn = gr.Button("Rank Candidates")
|
103 |
-
|
104 |
|
105 |
msg = gr.Markdown()
|
106 |
output_table = gr.Dataframe(headers=["Candidate", "File Name", "Similarity Score", "Why a Good Fit"], wrap=True)
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
3 |
from recommender import rank_resumes, summarize_resume_flan, extract_applicant_name
|
4 |
from docx import Document
|
5 |
|
|
|
6 |
UPLOAD_FOLDER = "uploads"
|
7 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
8 |
|
|
|
15 |
|
16 |
for uploaded_file in uploaded_files:
|
17 |
filename = getattr(uploaded_file, "name", None)
|
|
|
18 |
if filename is None:
|
19 |
return "One of the uploaded files is missing a filename. Please upload files, not text.", None
|
20 |
|
21 |
ext = filename.lower().split(".")[-1]
|
22 |
|
23 |
+
# Save uploaded file to disk
|
24 |
+
file_path = os.path.join(UPLOAD_FOLDER, filename)
|
25 |
+
with open(file_path, "wb") as f:
|
26 |
+
content = uploaded_file.read() if hasattr(uploaded_file, "read") else uploaded_file
|
27 |
+
if isinstance(content, str):
|
28 |
+
content = content.encode("utf-8")
|
29 |
+
f.write(content)
|
30 |
|
31 |
+
# Read text depending on file type
|
32 |
if ext == "txt":
|
33 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
34 |
+
text = f.read()
|
35 |
|
36 |
elif ext == "pdf":
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
import pdfplumber
|
38 |
+
try:
|
39 |
+
with pdfplumber.open(file_path) as pdf:
|
40 |
+
pages = [page.extract_text() for page in pdf.pages if page.extract_text() is not None]
|
41 |
+
if not pages:
|
42 |
+
return f"No extractable text found in PDF: {filename}. Is it scanned or image-only?", None
|
43 |
+
text = "\n".join(pages)
|
44 |
+
except Exception as e:
|
45 |
+
return f"Failed to process PDF {filename}: {str(e)}", None
|
46 |
|
47 |
elif ext == "docx":
|
48 |
+
doc = Document(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
text = "\n".join([p.text for p in doc.paragraphs])
|
50 |
|
51 |
else:
|
|
|
53 |
|
54 |
resume_texts.append((filename, text))
|
55 |
|
56 |
+
# Rank resumes and generate summaries
|
57 |
results = rank_resumes(job_description, resume_texts)
|
58 |
|
|
|
59 |
for candidate in results:
|
60 |
candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
|
61 |
|
|
|
70 |
|
71 |
return "", table_data
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
with gr.Blocks() as demo:
|
75 |
gr.Markdown("## Candidate Recommendation Engine")
|
76 |
with gr.Row():
|
77 |
job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste job description here...")
|
78 |
+
|
79 |
resumes = gr.Files(label="Upload Resumes (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"])
|
80 |
|
81 |
btn = gr.Button("Rank Candidates")
|
|
|
82 |
|
83 |
msg = gr.Markdown()
|
84 |
output_table = gr.Dataframe(headers=["Candidate", "File Name", "Similarity Score", "Why a Good Fit"], wrap=True)
|