billyxx commited on
Commit
1fa36d8
·
verified ·
1 Parent(s): fc1e181

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -43
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import gradio as gr
2
  import os
3
- import pdfplumber
4
  from recommender import rank_resumes, summarize_resume_flan, extract_applicant_name
5
  from docx import Document
6
 
7
-
8
  UPLOAD_FOLDER = "uploads"
9
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
10
 
@@ -17,45 +15,37 @@ def process_resumes(job_description, uploaded_files):
17
 
18
  for uploaded_file in uploaded_files:
19
  filename = getattr(uploaded_file, "name", None)
20
-
21
  if filename is None:
22
  return "One of the uploaded files is missing a filename. Please upload files, not text.", None
23
 
24
  ext = filename.lower().split(".")[-1]
25
 
26
- # Read file content or bytes
27
- if hasattr(uploaded_file, "read"):
28
- content = uploaded_file.read()
29
- elif isinstance(uploaded_file, str):
30
- content = uploaded_file
31
- else:
32
- return f"Unsupported upload type for file: {filename}", None
33
 
34
- # Process by file type
35
  if ext == "txt":
36
- text = content.decode("utf-8") if isinstance(content, bytes) else content
 
37
 
38
  elif ext == "pdf":
39
- temp_path = os.path.join(UPLOAD_FOLDER, filename)
40
- with open(temp_path, "wb") as f:
41
- if isinstance(content, bytes):
42
- f.write(content)
43
- else:
44
- f.write(content.encode("utf-8"))
45
  import pdfplumber
46
- with pdfplumber.open(temp_path) as pdf:
47
- pages = [page.extract_text() for page in pdf.pages if page.extract_text()]
48
- text = "\n".join(pages)
 
 
 
 
 
49
 
50
  elif ext == "docx":
51
- temp_path = os.path.join(UPLOAD_FOLDER, filename)
52
- with open(temp_path, "wb") as f:
53
- if isinstance(content, bytes):
54
- f.write(content)
55
- else:
56
- f.write(content.encode("utf-8"))
57
- from docx import Document
58
- doc = Document(temp_path)
59
  text = "\n".join([p.text for p in doc.paragraphs])
60
 
61
  else:
@@ -63,10 +53,9 @@ def process_resumes(job_description, uploaded_files):
63
 
64
  resume_texts.append((filename, text))
65
 
66
- # Rank resumes
67
  results = rank_resumes(job_description, resume_texts)
68
 
69
- # Generate summaries
70
  for candidate in results:
71
  candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
72
 
@@ -81,26 +70,15 @@ def process_resumes(job_description, uploaded_files):
81
 
82
  return "", table_data
83
 
84
- def extract_text_from_docx(filepath):
85
- doc = Document(filepath)
86
- full_text = []
87
- for para in doc.paragraphs:
88
- full_text.append(para.text)
89
- return "\n".join(full_text)
90
-
91
-
92
-
93
-
94
 
95
  with gr.Blocks() as demo:
96
  gr.Markdown("## Candidate Recommendation Engine")
97
  with gr.Row():
98
  job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste job description here...")
99
-
100
  resumes = gr.Files(label="Upload Resumes (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"])
101
 
102
  btn = gr.Button("Rank Candidates")
103
-
104
 
105
  msg = gr.Markdown()
106
  output_table = gr.Dataframe(headers=["Candidate", "File Name", "Similarity Score", "Why a Good Fit"], wrap=True)
 
1
  import gradio as gr
2
  import os
 
3
  from recommender import rank_resumes, summarize_resume_flan, extract_applicant_name
4
  from docx import Document
5
 
 
6
  UPLOAD_FOLDER = "uploads"
7
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
8
 
 
15
 
16
  for uploaded_file in uploaded_files:
17
  filename = getattr(uploaded_file, "name", None)
 
18
  if filename is None:
19
  return "One of the uploaded files is missing a filename. Please upload files, not text.", None
20
 
21
  ext = filename.lower().split(".")[-1]
22
 
23
+ # Save uploaded file to disk
24
+ file_path = os.path.join(UPLOAD_FOLDER, filename)
25
+ with open(file_path, "wb") as f:
26
+ content = uploaded_file.read() if hasattr(uploaded_file, "read") else uploaded_file
27
+ if isinstance(content, str):
28
+ content = content.encode("utf-8")
29
+ f.write(content)
30
 
31
+ # Read text depending on file type
32
  if ext == "txt":
33
+ with open(file_path, "r", encoding="utf-8") as f:
34
+ text = f.read()
35
 
36
  elif ext == "pdf":
 
 
 
 
 
 
37
  import pdfplumber
38
+ try:
39
+ with pdfplumber.open(file_path) as pdf:
40
+ pages = [page.extract_text() for page in pdf.pages if page.extract_text() is not None]
41
+ if not pages:
42
+ return f"No extractable text found in PDF: {filename}. Is it scanned or image-only?", None
43
+ text = "\n".join(pages)
44
+ except Exception as e:
45
+ return f"Failed to process PDF {filename}: {str(e)}", None
46
 
47
  elif ext == "docx":
48
+ doc = Document(file_path)
 
 
 
 
 
 
 
49
  text = "\n".join([p.text for p in doc.paragraphs])
50
 
51
  else:
 
53
 
54
  resume_texts.append((filename, text))
55
 
56
+ # Rank resumes and generate summaries
57
  results = rank_resumes(job_description, resume_texts)
58
 
 
59
  for candidate in results:
60
  candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
61
 
 
70
 
71
  return "", table_data
72
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  with gr.Blocks() as demo:
75
  gr.Markdown("## Candidate Recommendation Engine")
76
  with gr.Row():
77
  job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste job description here...")
78
+
79
  resumes = gr.Files(label="Upload Resumes (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"])
80
 
81
  btn = gr.Button("Rank Candidates")
 
82
 
83
  msg = gr.Markdown()
84
  output_table = gr.Dataframe(headers=["Candidate", "File Name", "Similarity Score", "Why a Good Fit"], wrap=True)