CR7CAD commited on
Commit
6713758
·
verified ·
1 Parent(s): 41d8604

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -17
app.py CHANGED
@@ -4,6 +4,8 @@ import streamlit as st
4
  import docx
5
  from transformers import pipeline
6
  import time
 
 
7
 
8
  # Set page title and hide sidebar
9
  st.set_page_config(
@@ -28,10 +30,10 @@ def load_models():
28
  with st.spinner("Loading AI models... This may take a minute on first run."):
29
  models = {}
30
  # Load summarization model
31
- models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum")
32
 
33
  # Load text generation model for suitability assessment
34
- models['text_generator'] = pipeline("text-generation", model="gpt2") # You can use different models
35
 
36
  return models
37
 
@@ -43,7 +45,7 @@ models = load_models()
43
  #####################################
44
  def extract_text_from_file(file_obj):
45
  """
46
- Extract text from .docx files.
47
  Returns the extracted text or an error message if extraction fails.
48
  """
49
  filename = file_obj.name
@@ -56,13 +58,33 @@ def extract_text_from_file(file_obj):
56
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
57
  except Exception as e:
58
  text = f"Error processing DOCX file: {e}"
59
- elif ext == ".txt":
60
  try:
61
- text = file_obj.getvalue().decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  except Exception as e:
63
- text = f"Error processing TXT file: {e}"
64
  else:
65
- text = "Unsupported file type. Please upload a .docx or .txt file."
 
66
  return text
67
 
68
  #####################################
@@ -103,24 +125,24 @@ def summarize_resume_text(resume_text, models):
103
  #####################################
104
  def generate_suitability_assessment(candidate_summary, company_prompt, models):
105
  """
106
- Generate a suitability assessment using text generation instead of similarity.
107
  Returns the generated assessment text and execution time.
108
  """
109
  start_time = time.time()
110
 
111
  text_generator = models['text_generator']
112
 
113
- # Create a prompt for the text generation model
114
  prompt = f"""
115
  Resume Summary: {candidate_summary}
116
 
117
  Company Description: {company_prompt}
118
 
119
  Suitability Assessment:
120
- This candidate is a"""
121
 
122
  # Generate text
123
- max_length = 80 + len(prompt.split()) # Limit output length
124
  generated_text = text_generator(
125
  prompt,
126
  max_length=max_length,
@@ -135,8 +157,8 @@ This candidate is a"""
135
 
136
  # Determine a numerical score from the text
137
  # This is a simplified approach - we're looking for positive and negative words
138
- positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified']
139
- negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good']
140
 
141
  assessment_lower = assessment.lower()
142
 
@@ -164,15 +186,15 @@ This candidate is a"""
164
  st.title("Resume Analyzer and Company Suitability Checker")
165
  st.markdown(
166
  """
167
- Upload your resume file in **.docx** or **.txt** format. The app performs the following tasks:
168
  1. Extracts text from the resume.
169
  2. Uses a transformer-based model to generate a concise candidate summary.
170
- 3. Uses text generation to assess the candidate's suitability for the company.
171
  """
172
  )
173
 
174
  # File uploader
175
- uploaded_file = st.file_uploader("Upload your resume (.docx or .txt)", type=["docx", "txt"])
176
 
177
  # Company description text area
178
  company_prompt = st.text_area(
@@ -187,7 +209,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
187
  # Extract text from resume
188
  resume_text = extract_text_from_file(uploaded_file)
189
 
190
- if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx or .txt file.":
191
  st.error(resume_text)
192
  else:
193
  # Generate summary
 
4
  import docx
5
  from transformers import pipeline
6
  import time
7
+ import tempfile
8
+ import subprocess
9
 
10
  # Set page title and hide sidebar
11
  st.set_page_config(
 
30
  with st.spinner("Loading AI models... This may take a minute on first run."):
31
  models = {}
32
  # Load summarization model
33
+ models['summarizer'] = pipeline("summarization", model="marianna13/flan-t5-base-summarization")
34
 
35
  # Load text generation model for suitability assessment
36
+ models['text_generator'] = pipeline("text-generation", model="gpt2")
37
 
38
  return models
39
 
 
45
  #####################################
46
  def extract_text_from_file(file_obj):
47
  """
48
+ Extract text from .doc or .docx files.
49
  Returns the extracted text or an error message if extraction fails.
50
  """
51
  filename = file_obj.name
 
58
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
59
  except Exception as e:
60
  text = f"Error processing DOCX file: {e}"
61
+ elif ext == ".doc":
62
  try:
63
+ # For .doc files, we need to save to a temp file and use an external tool
64
+ # This example uses antiword which needs to be installed in the environment
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
66
+ temp_file.write(file_obj.getvalue())
67
+ temp_path = temp_file.name
68
+
69
+ # Try using python-docx2txt if available
70
+ try:
71
+ import docx2txt
72
+ text = docx2txt.process(temp_path)
73
+ except ImportError:
74
+ # Fallback to antiword if installed
75
+ try:
76
+ text = subprocess.check_output(['antiword', temp_path]).decode('utf-8')
77
+ except:
78
+ # If all else fails, inform the user
79
+ text = "Could not process .doc file. Please convert to .docx format."
80
+
81
+ # Clean up temp file
82
+ os.unlink(temp_path)
83
  except Exception as e:
84
+ text = f"Error processing DOC file: {e}"
85
  else:
86
+ text = "Unsupported file type. Please upload a .doc or .docx file."
87
+
88
  return text
89
 
90
  #####################################
 
125
  #####################################
126
  def generate_suitability_assessment(candidate_summary, company_prompt, models):
127
  """
128
+ Generate a suitability assessment using text generation.
129
  Returns the generated assessment text and execution time.
130
  """
131
  start_time = time.time()
132
 
133
  text_generator = models['text_generator']
134
 
135
+ # Create a prompt for the text generation model that focuses on candidate alignment with company
136
  prompt = f"""
137
  Resume Summary: {candidate_summary}
138
 
139
  Company Description: {company_prompt}
140
 
141
  Suitability Assessment:
142
+ Based on an analysis of the candidate's profile compared to the company requirements, this candidate"""
143
 
144
  # Generate text
145
+ max_length = 100 + len(prompt.split()) # Limit output length
146
  generated_text = text_generator(
147
  prompt,
148
  max_length=max_length,
 
157
 
158
  # Determine a numerical score from the text
159
  # This is a simplified approach - we're looking for positive and negative words
160
+ positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified', 'aligns well', 'matches', 'suitable']
161
+ negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good fit', 'misaligned', 'lacks', 'does not align']
162
 
163
  assessment_lower = assessment.lower()
164
 
 
186
  st.title("Resume Analyzer and Company Suitability Checker")
187
  st.markdown(
188
  """
189
+ Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
190
  1. Extracts text from the resume.
191
  2. Uses a transformer-based model to generate a concise candidate summary.
192
+ 3. Evaluates how well the candidate aligns with the company requirements.
193
  """
194
  )
195
 
196
  # File uploader
197
+ uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"])
198
 
199
  # Company description text area
200
  company_prompt = st.text_area(
 
209
  # Extract text from resume
210
  resume_text = extract_text_from_file(uploaded_file)
211
 
212
+ if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .doc or .docx file.":
213
  st.error(resume_text)
214
  else:
215
  # Generate summary