billyxx commited on
Commit
31aa939
·
verified ·
1 Parent(s): 272e246

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -12,59 +12,60 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
12
  def process_resumes(job_description, uploaded_files):
13
  if not job_description.strip():
14
  return "Please provide a job description.", None
 
 
15
 
16
  resume_texts = []
17
 
18
  for uploaded_file in uploaded_files:
19
- # If the uploaded_file is a file-like object (has 'read' method)
20
- if hasattr(uploaded_file, "read"):
21
- # Reset file pointer just in case
 
 
 
22
  uploaded_file.seek(0)
23
- content = uploaded_file.read()
24
- # Get filename attribute, fallback if not available
25
- filename = getattr(uploaded_file, "name", "unknown")
26
- # Save the file to disk if you want or just process in-memory
27
- # For example, save to UPLOAD_FOLDER
28
- filepath = os.path.join(UPLOAD_FOLDER, os.path.basename(filename))
29
- with open(filepath, "wb") as f:
30
- f.write(content)
31
- else:
32
- # uploaded_file is probably a NamedString (str-like)
33
- # Gradio provides the filename differently in this case,
34
- # so you might have to assign a default or get from UI
35
- content = uploaded_file
36
- filepath = None
37
- filename = "unknown"
38
-
39
- # Process content depending on extension
40
- if filename.endswith(".txt") or (filepath and filepath.endswith(".txt")):
41
- text = content.decode("utf-8") if isinstance(content, bytes) else content
42
- elif filename.endswith(".pdf") or (filepath and filepath.endswith(".pdf")):
43
- # If saved to file, open from file
44
- if filepath:
45
- import pdfplumber
46
- with pdfplumber.open(filepath) as pdf:
47
- pages = [page.extract_text() for page in pdf.pages if page.extract_text()]
48
- text = "\n".join(pages)
49
- else:
50
- # No file saved, cannot process PDF bytes easily here
51
- return "Please upload PDF files via file upload.", None
52
- elif filename.endswith(".docx") or (filepath and filepath.endswith(".docx")):
53
- if filepath:
54
- from docx import Document
55
- doc = Document(filepath)
56
- text = "\n".join([p.text for p in doc.paragraphs])
57
  else:
58
- return "Please upload DOCX files via file upload.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  else:
60
  return f"Unsupported file format: {filename}", None
61
 
62
  resume_texts.append((filename, text))
63
 
64
- # Now call rank_resumes etc.
65
  results = rank_resumes(job_description, resume_texts)
66
 
67
- # Generate summaries
68
  for candidate in results:
69
  candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
70
 
 
12
  def process_resumes(job_description, uploaded_files):
13
  if not job_description.strip():
14
  return "Please provide a job description.", None
15
+ if not uploaded_files:
16
+ return "Please upload at least one resume file.", None
17
 
18
  resume_texts = []
19
 
20
  for uploaded_file in uploaded_files:
21
+ filename = getattr(uploaded_file, "name", None)
22
+ if filename is None:
23
+ return "One of the uploaded files is missing a filename. Please upload files, not text.", None
24
+
25
+ # Reset file pointer
26
+ if hasattr(uploaded_file, "seek"):
27
  uploaded_file.seek(0)
28
+
29
+ # Process based on extension
30
+ ext = filename.lower().split(".")[-1]
31
+ if ext == "txt":
32
+ # Read text directly
33
+ if hasattr(uploaded_file, "read"):
34
+ content = uploaded_file.read()
35
+ # bytes? decode
36
+ text = content.decode("utf-8") if isinstance(content, bytes) else content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  else:
38
+ return f"Unexpected content for {filename}", None
39
+
40
+ elif ext == "pdf":
41
+ # Save temporarily to disk to use pdfplumber (which needs a file path)
42
+ temp_path = os.path.join(UPLOAD_FOLDER, filename)
43
+ with open(temp_path, "wb") as f:
44
+ f.write(uploaded_file.read())
45
+
46
+ import pdfplumber
47
+ with pdfplumber.open(temp_path) as pdf:
48
+ pages = [page.extract_text() for page in pdf.pages if page.extract_text()]
49
+ text = "\n".join(pages)
50
+
51
+ elif ext == "docx":
52
+ # Save temporarily to disk for python-docx
53
+ temp_path = os.path.join(UPLOAD_FOLDER, filename)
54
+ with open(temp_path, "wb") as f:
55
+ f.write(uploaded_file.read())
56
+
57
+ from docx import Document
58
+ doc = Document(temp_path)
59
+ text = "\n".join([p.text for p in doc.paragraphs])
60
+
61
  else:
62
  return f"Unsupported file format: {filename}", None
63
 
64
  resume_texts.append((filename, text))
65
 
66
+ # Rank resumes and generate summaries
67
  results = rank_resumes(job_description, resume_texts)
68
 
 
69
  for candidate in results:
70
  candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)
71