CR7CAD commited on
Commit
fa79427
Β·
verified Β·
1 Parent(s): 99adfcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -16
app.py CHANGED
@@ -8,13 +8,16 @@ from nltk.tokenize import word_tokenize
8
  import torch
9
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
10
  import time
 
 
 
 
11
 
12
  # Set page title and configuration
13
  st.set_page_config(
14
  page_title="Resume-Job Fit Analyzer",
15
  page_icon="πŸ“Š",
16
- layout="wide",
17
- initial_sidebar_state="expanded"
18
  )
19
 
20
  # Download NLTK resources if needed
@@ -49,6 +52,61 @@ def load_models():
49
 
50
  return models
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Extract skills from text
53
  def extract_skills(text, skill_keywords):
54
  """Extract skills from text based on a predefined list of skills"""
@@ -353,28 +411,30 @@ def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status
353
  st.title("Resume-Job Fit Analyzer")
354
  st.markdown("### Evaluate how well a resume matches a job description")
355
 
356
- # Setup columns for input
357
- col1, col2 = st.columns(2)
 
358
 
359
- with col1:
360
- # Resume input
361
- st.subheader("Resume")
362
- resume_text = st.text_area("Paste resume text here", height=300,
363
- placeholder="Paste the candidate's resume text here...")
364
 
365
- with col2:
366
- # Job description input
367
- st.subheader("Job Description")
368
- job_description = st.text_area("Paste job description here", height=300,
369
- placeholder="Paste the job description here...")
 
 
370
 
371
  # Analysis button
372
- analyze_button = st.button("Analyze Match", type="primary", use_container_width=True)
373
 
374
  # Main analysis logic
375
  if analyze_button:
376
  if not resume_text or not job_description:
377
- st.error("Please provide both a resume and a job description.")
378
  else:
379
  with st.spinner("Analyzing resume and job match..."):
380
  # Record start time
 
8
  import torch
9
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
10
  import time
11
+ import os
12
+ import docx2txt
13
+ import io
14
+ import docx
15
 
16
  # Set page title and configuration
17
  st.set_page_config(
18
  page_title="Resume-Job Fit Analyzer",
19
  page_icon="πŸ“Š",
20
+ layout="wide"
 
21
  )
22
 
23
  # Download NLTK resources if needed
 
52
 
53
  return models
54
 
55
+ # Read resume file
56
+ def read_resume_file(uploaded_file):
57
+ """Extract text from uploaded resume file"""
58
+ file_extension = os.path.splitext(uploaded_file.name)[1].lower()
59
+
60
+ if file_extension == ".txt":
61
+ # Text file
62
+ text = uploaded_file.read().decode('utf-8')
63
+ return text
64
+
65
+ elif file_extension == ".docx":
66
+ # Modern Word document
67
+ try:
68
+ text = docx2txt.process(uploaded_file)
69
+ return text
70
+ except Exception as e:
71
+ st.error(f"Error reading DOCX file: {str(e)}")
72
+ return None
73
+
74
+ elif file_extension == ".doc":
75
+ # Legacy Word document - this is more complex
76
+ try:
77
+ # For .doc files, we'll return a warning that the conversion might not be perfect
78
+ st.warning("Note: .doc files might not convert perfectly. For best results, upload .docx or .txt files.")
79
+
80
+ # Save the uploaded file temporarily
81
+ with open("temp_file.doc", "wb") as f:
82
+ f.write(uploaded_file.getbuffer())
83
+
84
+ # Use an external converter - this is a placeholder
85
+ # In a real implementation, you might want to use antiword, textract or similar
86
+ # Here we'll try using docx2txt as a fallback but it might not work well
87
+ try:
88
+ text = docx2txt.process("temp_file.doc")
89
+ except:
90
+ # If that fails, try a very basic approach
91
+ with open("temp_file.doc", "rb") as f:
92
+ content = f.read()
93
+ text = content.decode('utf-8', errors='ignore')
94
+ # Try to extract readable text by removing binary parts
95
+ text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
96
+
97
+ # Clean up
98
+ if os.path.exists("temp_file.doc"):
99
+ os.remove("temp_file.doc")
100
+
101
+ return text
102
+ except Exception as e:
103
+ st.error(f"Error reading DOC file: {str(e)}")
104
+ return None
105
+
106
+ else:
107
+ st.error(f"Unsupported file format: {file_extension}")
108
+ return None
109
+
110
  # Extract skills from text
111
  def extract_skills(text, skill_keywords):
112
  """Extract skills from text based on a predefined list of skills"""
 
411
  st.title("Resume-Job Fit Analyzer")
412
  st.markdown("### Evaluate how well a resume matches a job description")
413
 
414
+ # Resume upload
415
+ st.subheader("Resume")
416
+ uploaded_file = st.file_uploader("Upload Resume (.doc, .docx, .txt)", type=["doc", "docx", "txt"])
417
 
418
+ # Job description input
419
+ st.subheader("Job Description")
420
+ job_description = st.text_area("Paste job description here", height=200,
421
+ placeholder="Paste the job description here...")
 
422
 
423
+ # Display resume text if file is uploaded
424
+ resume_text = None
425
+ if uploaded_file is not None:
426
+ resume_text = read_resume_file(uploaded_file)
427
+ if resume_text:
428
+ with st.expander("View Resume Text"):
429
+ st.text(resume_text[:1000] + ("..." if len(resume_text) > 1000 else ""))
430
 
431
  # Analysis button
432
+ analyze_button = st.button("Analyze Match", type="primary")
433
 
434
  # Main analysis logic
435
  if analyze_button:
436
  if not resume_text or not job_description:
437
+ st.error("Please upload a resume file and provide a job description.")
438
  else:
439
  with st.spinner("Analyzing resume and job match..."):
440
  # Record start time