Spaces:
Sleeping
Sleeping
Removed mocking from tests in favor of uof using actual sample PDF, cleaned up
Browse files- tests/test_gradio.py +86 -451
- tests/test_linkedin_resume.py +189 -158
tests/test_gradio.py
CHANGED
@@ -3,479 +3,114 @@ Unit tests for the gradio module.
|
|
3 |
"""
|
4 |
|
5 |
import unittest
|
6 |
-
from
|
|
|
7 |
from functions import gradio
|
8 |
|
9 |
|
10 |
class TestProcessInputs(unittest.TestCase):
|
11 |
"""Test cases for the process_inputs function."""
|
12 |
|
13 |
-
def
|
14 |
-
"""Test
|
15 |
-
with patch('functions.gradio.get_github_repositories') as mock_github, \
|
16 |
-
patch('functions.gradio.summarize_job_call') as mock_summarize:
|
17 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
18 |
-
mock_summarize.return_value = "Mocked job summary"
|
19 |
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
self.assertIn("❌ Job post not provided", result)
|
25 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
26 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
41 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
42 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
43 |
-
|
44 |
-
def test_all_inputs_provided_success(self):
|
45 |
-
"""Test when all inputs are provided and successful."""
|
46 |
-
|
47 |
-
# Mock LinkedIn PDF file
|
48 |
-
mock_pdf = MagicMock()
|
49 |
-
mock_pdf.name = "test_resume.pdf"
|
50 |
-
|
51 |
-
# Mock successful extraction results
|
52 |
-
mock_linkedin_result = {
|
53 |
-
"status": "success",
|
54 |
-
"structured_text": {"sections": {}, "llm_formatted": "test content"},
|
55 |
-
"metadata": {"filename": "test_resume.pdf"}
|
56 |
-
}
|
57 |
-
|
58 |
-
mock_github_result = {
|
59 |
-
"status": "success",
|
60 |
-
"repositories": [{"name": "test-repo"}],
|
61 |
-
"metadata": {"username": "testuser"}
|
62 |
-
}
|
63 |
-
|
64 |
-
with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_linkedin, \
|
65 |
-
patch('functions.gradio.get_github_repositories') as mock_github, \
|
66 |
-
patch('functions.gradio.write_resume') as mock_write_resume, \
|
67 |
-
patch('functions.gradio.summarize_job_call') as mock_summarize: #, \
|
68 |
-
#patch('functions.gradio.shutil.copy2') as mock_copy:
|
69 |
-
|
70 |
-
mock_linkedin.return_value = mock_linkedin_result
|
71 |
-
mock_github.return_value = mock_github_result
|
72 |
-
mock_write_resume.return_value = "Generated resume content"
|
73 |
-
mock_summarize.return_value = "Job summary content\n"
|
74 |
-
|
75 |
-
result = gradio.process_inputs(
|
76 |
-
mock_pdf,
|
77 |
-
"https://github.com/testuser",
|
78 |
-
"Job posting text here",
|
79 |
-
"Please emphasize technical skills"
|
80 |
-
)
|
81 |
-
|
82 |
-
self.assertIn("✅ LinkedIn Resume PDF provided", result)
|
83 |
-
self.assertIn("✅ Text extraction successful", result)
|
84 |
-
self.assertIn("✅ GitHub Profile URL provided", result)
|
85 |
-
self.assertIn("✅ GitHub list download successful", result)
|
86 |
-
self.assertIn("✅ Job post text provided", result)
|
87 |
-
self.assertIn("✅ Job post summary generated", result)
|
88 |
-
self.assertIn("✅ Additional instructions provided", result)
|
89 |
-
self.assertIn("✅ Resume generated successfully", result)
|
90 |
-
|
91 |
-
# Verify write_resume was called with user instructions and job summary
|
92 |
-
mock_write_resume.assert_called_with(
|
93 |
-
mock_linkedin_result,
|
94 |
-
"Please emphasize technical skills",
|
95 |
-
"Job summary content\n"
|
96 |
-
)
|
97 |
-
|
98 |
-
@patch('functions.gradio.extract_text_from_linkedin_pdf')
|
99 |
-
@patch('functions.gradio.write_resume')
|
100 |
-
def test_linkedin_extraction_failure(self, mock_write_resume, mock_extract):
|
101 |
-
"""Test LinkedIn PDF extraction failure."""
|
102 |
-
|
103 |
-
mock_pdf = MagicMock()
|
104 |
-
mock_pdf.name = "test_resume.pdf"
|
105 |
|
|
|
106 |
mock_extract.return_value = {
|
107 |
-
"
|
108 |
-
"
|
109 |
-
|
110 |
-
mock_write_resume.return_value = "Generated resume content"
|
111 |
-
|
112 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
113 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
114 |
-
|
115 |
-
result = gradio.process_inputs(mock_pdf, "", "", "")
|
116 |
-
|
117 |
-
self.assertIn("✅ LinkedIn Resume PDF provided", result)
|
118 |
-
self.assertIn("❌ Text extraction failed: Failed to read PDF", result)
|
119 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
120 |
-
|
121 |
-
# Verify write_resume was NOT called since extraction failed
|
122 |
-
mock_write_resume.assert_not_called()
|
123 |
-
|
124 |
-
@patch('functions.gradio.extract_text_from_linkedin_pdf')
|
125 |
-
@patch('functions.gradio.write_resume')
|
126 |
-
def test_linkedin_extraction_warning(self, mock_write_resume, mock_extract):
|
127 |
-
"""Test LinkedIn PDF extraction warning."""
|
128 |
-
|
129 |
-
mock_pdf = MagicMock()
|
130 |
-
mock_pdf.name = "test_resume.pdf"
|
131 |
-
|
132 |
-
mock_extract.return_value = {
|
133 |
-
"status": "warning",
|
134 |
-
"message": "No text found in PDF"
|
135 |
-
}
|
136 |
-
mock_write_resume.return_value = "Generated resume content"
|
137 |
-
|
138 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
139 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
140 |
-
|
141 |
-
result = gradio.process_inputs(mock_pdf, "", "", "")
|
142 |
-
|
143 |
-
self.assertIn("✅ LinkedIn Resume PDF provided", result)
|
144 |
-
self.assertIn("⚠️ Text extraction: No text found in PDF", result)
|
145 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
146 |
-
|
147 |
-
# Verify write_resume was NOT called since extraction had warning status
|
148 |
-
mock_write_resume.assert_not_called()
|
149 |
-
|
150 |
-
@patch('functions.gradio.get_github_repositories')
|
151 |
-
@patch('functions.gradio.write_resume')
|
152 |
-
def test_github_retrieval_failure(self, mock_write_resume, mock_github):
|
153 |
-
"""Test GitHub repository retrieval failure."""
|
154 |
-
|
155 |
-
mock_github.return_value = {
|
156 |
-
"status": "error",
|
157 |
-
"message": "User not found"
|
158 |
-
}
|
159 |
-
mock_write_resume.return_value = "Generated resume content"
|
160 |
-
|
161 |
-
result = gradio.process_inputs(None, "https://github.com/nonexistent", "", "")
|
162 |
-
|
163 |
-
self.assertIn("✅ GitHub Profile URL provided", result)
|
164 |
-
self.assertIn("❌ GitHub extraction failed: User not found", result)
|
165 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
166 |
-
|
167 |
-
# Verify write_resume was NOT called since no LinkedIn data
|
168 |
-
mock_write_resume.assert_not_called()
|
169 |
-
|
170 |
-
def test_whitespace_only_inputs(self):
|
171 |
-
"""Test inputs with only whitespace."""
|
172 |
-
with patch('functions.gradio.get_github_repositories') as mock_github, \
|
173 |
-
patch('functions.gradio.summarize_job_call') as mock_summarize:
|
174 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
175 |
-
mock_summarize.return_value = "Mocked job summary"
|
176 |
-
|
177 |
-
result = gradio.process_inputs(None, " ", " ", " ")
|
178 |
-
|
179 |
-
self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
|
180 |
-
self.assertIn("❌ No GitHub profile URL provided", result)
|
181 |
-
self.assertIn("❌ Job post not provided", result)
|
182 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
183 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
184 |
-
|
185 |
-
def test_whitespace_only_inputs_no_default(self):
|
186 |
-
"""Test inputs with only whitespace (same as test_whitespace_only_inputs now)."""
|
187 |
-
with patch('functions.gradio.get_github_repositories') as mock_github, \
|
188 |
-
patch('functions.gradio.summarize_job_call') as mock_summarize:
|
189 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
190 |
-
mock_summarize.return_value = None # No summarization since no job post
|
191 |
-
|
192 |
-
result = gradio.process_inputs(None, " ", " ", " ")
|
193 |
-
|
194 |
-
self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
|
195 |
-
self.assertIn("❌ No GitHub profile URL provided", result)
|
196 |
-
self.assertIn("❌ Job post not provided", result)
|
197 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
198 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
199 |
-
|
200 |
-
@patch('functions.gradio.write_resume')
|
201 |
-
@patch('functions.gradio.summarize_job_call')
|
202 |
-
def test_job_post_with_content(self, mock_summarize, mock_write_resume):
|
203 |
-
|
204 |
-
"""Test job post with actual content."""
|
205 |
-
job_text = "Software Engineer position at Tech Company"
|
206 |
-
mock_write_resume.return_value = "Generated resume content"
|
207 |
-
mock_summarize.return_value = "Job summary content\n"
|
208 |
-
|
209 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
210 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
211 |
-
|
212 |
-
result = gradio.process_inputs(None, "", job_text, "")
|
213 |
-
|
214 |
-
self.assertIn("✅ Job post text provided", result)
|
215 |
-
self.assertIn("✅ Job post summary generated", result)
|
216 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
217 |
-
# Verify write_resume was NOT called since no LinkedIn data
|
218 |
-
mock_write_resume.assert_not_called()
|
219 |
-
|
220 |
-
@patch('functions.gradio.logger')
|
221 |
-
@patch('functions.gradio.write_resume')
|
222 |
-
def test_logging_calls(self, mock_write_resume, mock_logger):
|
223 |
-
|
224 |
-
"""Test that appropriate logging calls are made."""
|
225 |
-
mock_pdf = MagicMock()
|
226 |
-
mock_pdf.name = "test.pdf"
|
227 |
-
mock_write_resume.return_value = "Generated resume content"
|
228 |
-
|
229 |
-
with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_extract, \
|
230 |
-
patch('functions.gradio.get_github_repositories') as mock_github, \
|
231 |
-
patch('functions.gradio.summarize_job_call') as mock_summarize:
|
232 |
-
|
233 |
-
mock_extract.return_value = {"status": "success"}
|
234 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "test"}}
|
235 |
-
mock_summarize.return_value = "Job summary\n"
|
236 |
-
|
237 |
-
gradio.process_inputs(
|
238 |
-
mock_pdf,
|
239 |
-
"https://github.com/test",
|
240 |
-
"job text",
|
241 |
-
"custom instructions"
|
242 |
-
)
|
243 |
-
|
244 |
-
# Verify logging calls were made
|
245 |
-
mock_logger.info.assert_called()
|
246 |
-
|
247 |
-
@patch('functions.gradio.write_resume')
|
248 |
-
def test_user_instructions_with_content(self, mock_write_resume):
|
249 |
-
|
250 |
-
"""Test user instructions with actual content."""
|
251 |
-
instructions = "Please emphasize leadership skills and highlight remote work experience"
|
252 |
-
mock_write_resume.return_value = "Generated resume content"
|
253 |
-
|
254 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
255 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
256 |
-
|
257 |
-
result = gradio.process_inputs(None, "", "", instructions)
|
258 |
-
|
259 |
-
self.assertIn("✅ Additional instructions provided", result)
|
260 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
261 |
-
# Verify write_resume was NOT called since no valid extraction result
|
262 |
-
mock_write_resume.assert_not_called()
|
263 |
-
|
264 |
-
@patch('functions.gradio.write_resume')
|
265 |
-
def test_user_instructions_empty(self, mock_write_resume):
|
266 |
-
|
267 |
-
"""Test user instructions when empty."""
|
268 |
-
mock_write_resume.return_value = "Generated resume content"
|
269 |
-
|
270 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
271 |
-
mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
|
272 |
-
|
273 |
-
result = gradio.process_inputs(None, "", "", "")
|
274 |
-
|
275 |
-
self.assertIn("ℹ️ No additional instructions provided", result)
|
276 |
-
self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
|
277 |
-
# Verify write_resume was NOT called since no valid extraction result
|
278 |
-
mock_write_resume.assert_not_called()
|
279 |
-
|
280 |
-
|
281 |
-
class TestGetProcessedData(unittest.TestCase):
|
282 |
-
"""Test cases for the get_processed_data function."""
|
283 |
-
|
284 |
-
def test_no_inputs(self):
|
285 |
-
"""Test with no inputs provided."""
|
286 |
-
result = gradio.get_processed_data(None, "", "", "")
|
287 |
-
|
288 |
-
self.assertIsNone(result["linkedin"])
|
289 |
-
self.assertIsNone(result["github"])
|
290 |
-
self.assertIsNone(result["job_post"])
|
291 |
-
self.assertIsNone(result["user_instructions"])
|
292 |
-
self.assertEqual(len(result["errors"]), 0)
|
293 |
-
|
294 |
-
def test_no_inputs_no_default_job(self):
|
295 |
-
"""Test with no inputs provided (same as test_no_inputs now)."""
|
296 |
-
result = gradio.get_processed_data(None, "", "", "")
|
297 |
-
|
298 |
-
self.assertIsNone(result["linkedin"])
|
299 |
-
self.assertIsNone(result["github"])
|
300 |
-
self.assertIsNone(result["job_post"])
|
301 |
-
self.assertIsNone(result["user_instructions"])
|
302 |
-
self.assertEqual(len(result["errors"]), 0)
|
303 |
-
|
304 |
-
def test_all_successful_inputs(self):
|
305 |
-
"""Test with all successful inputs."""
|
306 |
-
|
307 |
-
mock_pdf = MagicMock()
|
308 |
-
mock_pdf.name = "test.pdf"
|
309 |
-
|
310 |
-
mock_linkedin_result = {
|
311 |
-
"status": "success",
|
312 |
-
"structured_text": {"sections": {}, "llm_formatted": "content"}
|
313 |
}
|
314 |
|
315 |
-
|
316 |
-
"
|
317 |
-
"
|
318 |
-
"
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
result = gradio.get_processed_data(None, "https://github.com/invalid", "", "")
|
368 |
-
|
369 |
-
self.assertIsNone(result["github"])
|
370 |
-
self.assertEqual(len(result["errors"]), 1)
|
371 |
-
self.assertIn("GitHub: User not found", result["errors"])
|
372 |
-
|
373 |
-
def test_multiple_errors(self):
|
374 |
-
"""Test with multiple processing errors."""
|
375 |
-
|
376 |
-
mock_pdf = MagicMock()
|
377 |
-
mock_pdf.name = "test.pdf"
|
378 |
-
|
379 |
-
with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_linkedin, \
|
380 |
-
patch('functions.gradio.get_github_repositories') as mock_github:
|
381 |
-
|
382 |
-
mock_linkedin.return_value = {
|
383 |
-
"status": "error",
|
384 |
-
"message": "LinkedIn error"
|
385 |
-
}
|
386 |
-
mock_github.return_value = {
|
387 |
-
"status": "error",
|
388 |
-
"message": "GitHub error"
|
389 |
-
}
|
390 |
-
|
391 |
-
result = gradio.get_processed_data(
|
392 |
-
mock_pdf,
|
393 |
-
"https://github.com/user",
|
394 |
-
"",
|
395 |
-
""
|
396 |
-
)
|
397 |
-
|
398 |
-
self.assertIsNone(result["linkedin"])
|
399 |
-
self.assertIsNone(result["github"])
|
400 |
-
self.assertEqual(len(result["errors"]), 2)
|
401 |
-
self.assertIn("LinkedIn: LinkedIn error", result["errors"])
|
402 |
-
self.assertIn("GitHub: GitHub error", result["errors"])
|
403 |
-
|
404 |
-
def test_job_post_whitespace_handling(self):
|
405 |
-
"""Test job post whitespace handling."""
|
406 |
-
# Test with leading/trailing whitespace
|
407 |
-
result = gradio.get_processed_data(None, "", " Job content ", "")
|
408 |
-
self.assertEqual(result["job_post"], "Job content")
|
409 |
-
|
410 |
-
# Test with only whitespace - should be None
|
411 |
-
result = gradio.get_processed_data(None, "", " ", "")
|
412 |
-
self.assertIsNone(result["job_post"])
|
413 |
-
|
414 |
-
# Test with empty string - should be None
|
415 |
-
result = gradio.get_processed_data(None, "", "", "")
|
416 |
-
self.assertIsNone(result["job_post"])
|
417 |
-
|
418 |
-
def test_github_url_whitespace_handling(self):
|
419 |
-
"""Test GitHub URL whitespace handling."""
|
420 |
-
|
421 |
-
with patch('functions.gradio.get_github_repositories') as mock_github:
|
422 |
-
mock_github.return_value = {"status": "success", "repositories": []}
|
423 |
-
|
424 |
-
# Test with leading/trailing whitespace
|
425 |
-
_ = gradio.get_processed_data(None, " https://github.com/user ", "", "")
|
426 |
-
mock_github.assert_called_with(" https://github.com/user ")
|
427 |
-
|
428 |
-
# Test with only whitespace - should not call function
|
429 |
-
mock_github.reset_mock()
|
430 |
-
_ = gradio.get_processed_data(None, " ", "", "")
|
431 |
-
mock_github.assert_not_called()
|
432 |
-
|
433 |
-
def test_data_structure_consistency(self):
|
434 |
-
"""Test that returned data structure is consistent."""
|
435 |
-
|
436 |
-
result = gradio.get_processed_data(None, "", "", "")
|
437 |
-
|
438 |
-
# Check all required keys exist
|
439 |
-
required_keys = ["linkedin", "github", "job_post", "user_instructions", "errors"]
|
440 |
-
for key in required_keys:
|
441 |
-
self.assertIn(key, result)
|
442 |
-
|
443 |
-
# Check data types
|
444 |
-
self.assertIsInstance(result["errors"], list)
|
445 |
-
|
446 |
-
@patch('functions.gradio.extract_text_from_linkedin_pdf')
|
447 |
-
def test_linkedin_warning_status(self, mock_extract):
|
448 |
-
"""Test handling of LinkedIn warning status."""
|
449 |
-
|
450 |
-
mock_pdf = MagicMock()
|
451 |
-
mock_pdf.name = "test.pdf"
|
452 |
-
|
453 |
mock_extract.return_value = {
|
454 |
-
"
|
455 |
-
"message": "Some warning"
|
456 |
}
|
457 |
|
458 |
-
|
459 |
-
|
460 |
-
# Warning status should not be treated as success
|
461 |
-
self.assertIsNone(result["linkedin"])
|
462 |
-
self.assertEqual(len(result["errors"]), 1)
|
463 |
-
self.assertIn("LinkedIn: Some warning", result["errors"])
|
464 |
|
465 |
-
|
466 |
-
|
|
|
|
|
|
|
|
|
467 |
|
468 |
-
#
|
469 |
-
|
470 |
-
self.assertEqual(result["user_instructions"], "Custom instructions")
|
471 |
|
472 |
-
#
|
473 |
-
|
474 |
-
self.assertIsNone(result["user_instructions"])
|
475 |
|
476 |
-
# Test with empty string
|
477 |
-
result = gradio.get_processed_data(None, "", "", "")
|
478 |
-
self.assertIsNone(result["user_instructions"])
|
479 |
|
480 |
if __name__ == '__main__':
|
481 |
unittest.main()
|
|
|
3 |
"""
|
4 |
|
5 |
import unittest
|
6 |
+
from pathlib import Path
|
7 |
+
from unittest.mock import patch
|
8 |
from functions import gradio
|
9 |
|
10 |
|
11 |
class TestProcessInputs(unittest.TestCase):
|
12 |
"""Test cases for the process_inputs function."""
|
13 |
|
14 |
+
def test_process_inputs_with_real_pdf(self):
|
15 |
+
"""Test process_inputs with the actual test PDF file."""
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Get path to the test PDF file
|
18 |
+
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
|
19 |
|
20 |
+
# Verify the test file exists
|
21 |
+
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
|
|
|
|
|
|
|
22 |
|
23 |
+
result = gradio.process_inputs(
|
24 |
+
linkedin_pdf_path=str(test_pdf_path),
|
25 |
+
github_url="https://github.com/user",
|
26 |
+
job_post_text="Software engineer position",
|
27 |
+
user_instructions="Custom instructions"
|
28 |
+
)
|
29 |
|
30 |
+
# Since most functionality is commented out, result should be empty string
|
31 |
+
self.assertEqual(result, "")
|
32 |
|
33 |
+
@patch('functions.gradio.extract_text')
|
34 |
+
def test_process_inputs_with_pdf_path_mocked(self, mock_extract):
|
35 |
+
"""Test process_inputs with a PDF file path (mocked for controlled testing)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
# Mock successful LinkedIn text extraction
|
38 |
mock_extract.return_value = {
|
39 |
+
"contact_info": "John Doe, [email protected]",
|
40 |
+
"summary": "Experienced software engineer",
|
41 |
+
"experience": "Software Engineer at Company"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
|
44 |
+
result = gradio.process_inputs(
|
45 |
+
linkedin_pdf_path="/path/to/resume.pdf",
|
46 |
+
github_url="https://github.com/user",
|
47 |
+
job_post_text="Software engineer position",
|
48 |
+
user_instructions="Custom instructions"
|
49 |
+
)
|
50 |
+
|
51 |
+
# Verify extract_text was called with the correct path
|
52 |
+
mock_extract.assert_called_once_with("/path/to/resume.pdf")
|
53 |
+
|
54 |
+
# Since most functionality is commented out, result should be empty string
|
55 |
+
self.assertEqual(result, "")
|
56 |
+
|
57 |
+
@patch('functions.gradio.extract_text')
|
58 |
+
def test_process_inputs_extraction_failure(self, mock_extract):
|
59 |
+
"""Test process_inputs when LinkedIn extraction fails."""
|
60 |
+
# Mock failed LinkedIn text extraction
|
61 |
+
mock_extract.return_value = None
|
62 |
+
|
63 |
+
result = gradio.process_inputs(
|
64 |
+
linkedin_pdf_path="/path/to/resume.pdf",
|
65 |
+
github_url="https://github.com/user",
|
66 |
+
job_post_text="Software engineer position",
|
67 |
+
user_instructions="Custom instructions"
|
68 |
+
)
|
69 |
+
|
70 |
+
# Verify extract_text was called
|
71 |
+
mock_extract.assert_called_once_with("/path/to/resume.pdf")
|
72 |
+
|
73 |
+
# Result should be empty string since functionality is commented out
|
74 |
+
self.assertEqual(result, "")
|
75 |
+
|
76 |
+
@patch('functions.gradio.extract_text')
|
77 |
+
def test_process_inputs_no_pdf_path(self, mock_extract):
|
78 |
+
"""Test process_inputs with no PDF path provided."""
|
79 |
+
result = gradio.process_inputs(
|
80 |
+
linkedin_pdf_path=None,
|
81 |
+
github_url="https://github.com/user",
|
82 |
+
job_post_text="Software engineer position",
|
83 |
+
user_instructions="Custom instructions"
|
84 |
+
)
|
85 |
+
|
86 |
+
# extract_text should be called with None
|
87 |
+
mock_extract.assert_called_once_with(None)
|
88 |
+
|
89 |
+
# Result should be empty string
|
90 |
+
self.assertEqual(result, "")
|
91 |
+
|
92 |
+
@patch('functions.gradio.extract_text')
|
93 |
+
def test_process_inputs_with_long_job_post(self, mock_extract):
|
94 |
+
"""Test process_inputs with a long job post text (for logging truncation)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
mock_extract.return_value = {
|
96 |
+
"summary": "Test summary"
|
|
|
97 |
}
|
98 |
|
99 |
+
long_job_post = "This is a very long job posting " * 50 # Make it longer than 100 chars
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
result = gradio.process_inputs(
|
102 |
+
linkedin_pdf_path="/path/to/resume.pdf",
|
103 |
+
github_url="https://github.com/user",
|
104 |
+
job_post_text=long_job_post,
|
105 |
+
user_instructions="Custom instructions"
|
106 |
+
)
|
107 |
|
108 |
+
# Verify extract_text was called
|
109 |
+
mock_extract.assert_called_once_with("/path/to/resume.pdf")
|
|
|
110 |
|
111 |
+
# Result should be empty string
|
112 |
+
self.assertEqual(result, "")
|
|
|
113 |
|
|
|
|
|
|
|
114 |
|
115 |
if __name__ == '__main__':
|
116 |
unittest.main()
|
tests/test_linkedin_resume.py
CHANGED
@@ -1,215 +1,246 @@
|
|
1 |
"""
|
2 |
-
Unit tests for the
|
3 |
"""
|
4 |
|
5 |
import unittest
|
6 |
import tempfile
|
7 |
import os
|
8 |
-
from
|
9 |
-
from functions import linkedin_resume
|
10 |
|
11 |
# pylint: disable=protected-access
|
12 |
|
13 |
|
14 |
-
class
|
15 |
-
"""Test cases for the
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
expected = "Line 1\nLine 2\nLine 3"
|
22 |
-
self.assertEqual(ca._clean_extracted_text(raw), expected)
|
23 |
-
|
24 |
-
def test_remove_artifacts(self):
|
25 |
-
"""Test removal of PDF artifacts."""
|
26 |
-
|
27 |
-
raw = " 123 \n|---|\nSome text\n"
|
28 |
-
expected = "Some text"
|
29 |
-
self.assertEqual(ca._clean_extracted_text(raw), expected)
|
30 |
-
|
31 |
-
def test_normalize_spaces(self):
|
32 |
-
"""Test normalization of multiple spaces."""
|
33 |
-
|
34 |
-
raw = "A B C"
|
35 |
-
expected = "A B C"
|
36 |
-
self.assertEqual(ca._clean_extracted_text(raw), expected)
|
37 |
-
|
38 |
-
def test_empty_string(self):
|
39 |
-
"""Test handling of empty string."""
|
40 |
-
|
41 |
-
self.assertEqual(ca._clean_extracted_text(""), "")
|
42 |
-
|
43 |
-
def test_none_input(self):
|
44 |
-
"""Test handling of None input."""
|
45 |
-
|
46 |
-
self.assertEqual(ca._clean_extracted_text(None), "")
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
-
|
57 |
|
58 |
-
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
self.assertIn("education", result["sections"])
|
64 |
-
self.assertIn("skills", result["sections"])
|
65 |
-
self.assertGreater(result["word_count"], 0)
|
66 |
-
self.assertGreaterEqual(result["section_count"], 5)
|
67 |
|
68 |
-
|
69 |
-
|
|
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
self.assertEqual(result["full_text"], "")
|
74 |
-
self.assertEqual(result["word_count"], 0)
|
75 |
-
self.assertEqual(result["section_count"], 0)
|
76 |
|
77 |
-
def
|
78 |
-
"""Test
|
79 |
|
80 |
-
|
81 |
-
result = ca._structure_resume_text(text)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
for field in required_fields:
|
86 |
-
self.assertIn(field, result)
|
87 |
|
88 |
|
89 |
-
class
|
90 |
-
"""Test cases for the
|
91 |
|
92 |
-
def
|
93 |
-
"""Test
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
96 |
-
"summary": "A summary.",
|
97 |
-
"contact_info": "Contact details.",
|
98 |
-
"experience": "Work exp.",
|
99 |
-
"education": "School info.",
|
100 |
-
"skills": "Python, C++"
|
101 |
-
}
|
102 |
-
formatted = ca._format_for_llm(sections)
|
103 |
|
104 |
-
self.
|
105 |
-
self.assertIn("
|
106 |
-
self.assertIn("
|
107 |
-
self.assertIn("
|
108 |
-
self.assertIn("
|
109 |
-
self.
|
110 |
-
self.assertTrue(formatted.endswith("=== END RESUME ==="))
|
111 |
|
112 |
-
def
|
113 |
-
"""Test
|
114 |
|
115 |
-
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
|
|
|
121 |
|
122 |
-
|
123 |
-
"""Test cases for the get_llm_context_from_resume function."""
|
124 |
|
125 |
-
|
126 |
-
"""Test successful extraction with LLM formatted text."""
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
"structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"}
|
131 |
-
}
|
132 |
-
result = ca.get_llm_context_from_resume(extraction_result)
|
133 |
-
self.assertEqual(result, "LLM text")
|
134 |
|
135 |
-
def
|
136 |
-
"""Test
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
144 |
|
145 |
-
|
146 |
-
"""Test handling of error status."""
|
147 |
|
148 |
-
|
149 |
-
result
|
150 |
-
|
|
|
|
|
|
|
151 |
|
152 |
-
def test_missing_structured_text(self):
|
153 |
-
"""Test handling of missing structured_text."""
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
self.assertEqual(result, "")
|
158 |
|
|
|
|
|
159 |
|
160 |
-
|
161 |
-
|
162 |
|
163 |
-
|
164 |
-
|
|
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
self.assertIn("No PDF file provided", result["message"])
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
def test_successful_extraction(self, mock_open, mock_pdf_reader):
|
173 |
-
"""Test successful PDF text extraction with mocked PyPDF2."""
|
174 |
|
175 |
-
#
|
176 |
-
|
177 |
-
|
|
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
mock_file = MagicMock()
|
182 |
-
mock_file.read.return_value = b"fake pdf content"
|
183 |
-
mock_open.return_value.__enter__.return_value = mock_file
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \
|
188 |
-
"\nDeveloper\nExperience\nCompany X"
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
|
194 |
-
|
195 |
-
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
self.assertIn("metadata", result)
|
200 |
-
self.assertIn("contact_info", result["structured_text"]["sections"])
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
os.remove(tmp_path)
|
206 |
|
207 |
-
def
|
208 |
-
"""Test handling of
|
209 |
|
210 |
-
|
211 |
-
self.assertEqual(
|
212 |
-
self.assertIn("Failed to extract text from PDF", result["message"])
|
213 |
|
214 |
|
215 |
if __name__ == '__main__':
|
|
|
1 |
"""
|
2 |
+
Unit tests for the linkedin_resume module.
|
3 |
"""
|
4 |
|
5 |
import unittest
|
6 |
import tempfile
|
7 |
import os
|
8 |
+
from pathlib import Path
|
9 |
+
from functions import linkedin_resume
|
10 |
|
11 |
# pylint: disable=protected-access
|
12 |
|
13 |
|
14 |
+
class TestExtractText(unittest.TestCase):
|
15 |
+
"""Test cases for the extract_text function."""
|
16 |
+
|
17 |
+
def test_extract_text_with_real_pdf(self):
|
18 |
+
"""Test text extraction using the actual test PDF file."""
|
19 |
+
# Get path to the test PDF file
|
20 |
+
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
|
21 |
+
|
22 |
+
# Verify the test file exists
|
23 |
+
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
|
24 |
+
|
25 |
+
# Call extract_text with the real PDF
|
26 |
+
result = linkedin_resume.extract_text(str(test_pdf_path))
|
27 |
+
|
28 |
+
# Verify we get a result (should be a dict with sections)
|
29 |
+
if result is not None:
|
30 |
+
self.assertIsInstance(result, dict)
|
31 |
+
# Check that we have at least some content
|
32 |
+
self.assertGreater(len(result), 0)
|
33 |
+
# Each value should be a string
|
34 |
+
for _, content in result.items():
|
35 |
+
self.assertIsInstance(content, str)
|
36 |
+
else:
|
37 |
+
# If result is None, it means the PDF couldn't be processed
|
38 |
+
# This might happen with some PDF formats, which is acceptable
|
39 |
+
self.assertIsNone(result)
|
40 |
+
|
41 |
+
def test_extract_text_success(self):
|
42 |
+
"""Test successful text extraction from the actual test PDF file."""
|
43 |
+
# Get path to the test PDF file
|
44 |
+
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
|
45 |
+
|
46 |
+
# Verify the test file exists
|
47 |
+
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
|
48 |
+
|
49 |
+
# Call extract_text with the real PDF
|
50 |
+
result = linkedin_resume.extract_text(str(test_pdf_path))
|
51 |
+
|
52 |
+
# Verify we get a result (should be a dict with sections)
|
53 |
+
if result is not None:
|
54 |
+
self.assertIsInstance(result, dict)
|
55 |
+
|
56 |
+
# Check that we have at least some content
|
57 |
+
self.assertGreater(len(result), 0)
|
58 |
+
|
59 |
+
# Each value should be a string
|
60 |
+
for section_name, content in result.items():
|
61 |
+
self.assertIsInstance(content, str)
|
62 |
+
self.assertGreater(
|
63 |
+
len(content.strip()),
|
64 |
+
0,
|
65 |
+
f"Section {section_name} should have content"
|
66 |
+
)
|
67 |
+
|
68 |
+
else:
|
69 |
+
# If result is None, it means the PDF couldn't be processed
|
70 |
+
# This might happen with some PDF formats, which is acceptable
|
71 |
+
self.assertIsNone(result)
|
72 |
+
|
73 |
+
def test_extract_text_with_invalid_pdf(self):
|
74 |
+
"""Test handling of invalid PDF content by creating a temporary invalid file."""
|
75 |
+
|
76 |
+
# Create a temporary file with invalid content
|
77 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as temp_file:
|
78 |
+
temp_file.write("This is not a valid PDF file")
|
79 |
+
temp_path = temp_file.name
|
80 |
|
81 |
+
try:
|
82 |
+
# This should return None due to invalid PDF format
|
83 |
+
result = linkedin_resume.extract_text(temp_path)
|
84 |
+
self.assertIsNone(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
finally:
|
87 |
+
# Clean up the temporary file
|
88 |
+
os.unlink(temp_path)
|
89 |
|
90 |
+
def test_extract_text_parsing_behavior(self):
|
91 |
+
"""Test text extraction and parsing with the real PDF file."""
|
92 |
|
93 |
+
# Get path to the test PDF file
|
94 |
+
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
|
95 |
|
96 |
+
# Verify the test file exists
|
97 |
+
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
|
98 |
|
99 |
+
# Call extract_text with the real PDF
|
100 |
+
result = linkedin_resume.extract_text(str(test_pdf_path))
|
101 |
|
102 |
+
# Test the parsing behavior - if we get a result, it should be structured properly
|
103 |
+
if result is not None:
|
104 |
+
self.assertIsInstance(result, dict)
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
# If we have content, verify it's been parsed into logical sections
|
107 |
+
for _, content in result.items():
|
108 |
+
self.assertIsInstance(content, str)
|
109 |
|
110 |
+
# Content should be cleaned (no excessive whitespace at start/end)
|
111 |
+
self.assertEqual(content, content.strip())
|
|
|
|
|
|
|
112 |
|
113 |
+
def test_extract_text_file_not_found(self):
|
114 |
+
"""Test handling when file doesn't exist."""
|
115 |
|
116 |
+
result = linkedin_resume.extract_text("/nonexistent/file.pdf")
|
|
|
117 |
|
118 |
+
# Should return None when file not found
|
119 |
+
self.assertIsNone(result)
|
|
|
|
|
120 |
|
121 |
|
122 |
+
class TestParseResumeText(unittest.TestCase):
|
123 |
+
"""Test cases for the _parse_resume_text function."""
|
124 |
|
125 |
+
def test_parse_with_sections(self):
|
126 |
+
"""Test parsing text with recognizable sections."""
|
127 |
+
text = """
|
128 |
+
Contact Information
|
129 |
+
John Doe
|
130 | |
131 |
+
|
132 |
+
Summary
|
133 |
+
Experienced software engineer with 5 years experience
|
134 |
+
|
135 |
+
Experience
|
136 |
+
Software Engineer at Tech Company
|
137 |
+
Built web applications
|
138 |
+
|
139 |
+
Skills
|
140 |
+
Python, JavaScript, React
|
141 |
+
|
142 |
+
Education
|
143 |
+
Bachelor's in Computer Science
|
144 |
+
University of Technology
|
145 |
+
"""
|
146 |
|
147 |
+
result = linkedin_resume._parse_resume_text(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
self.assertIsInstance(result, dict)
|
150 |
+
self.assertIn("contact_info", result)
|
151 |
+
self.assertIn("summary", result)
|
152 |
+
self.assertIn("experience", result)
|
153 |
+
self.assertIn("skills", result)
|
154 |
+
self.assertIn("education", result)
|
|
|
155 |
|
156 |
+
def test_parse_empty_text(self):
|
157 |
+
"""Test parsing empty or None text."""
|
158 |
|
159 |
+
self.assertIsNone(linkedin_resume._parse_resume_text(""))
|
160 |
+
self.assertIsNone(linkedin_resume._parse_resume_text(None))
|
161 |
|
162 |
+
def test_parse_text_no_sections(self):
|
163 |
+
"""Test parsing text without recognizable sections."""
|
164 |
|
165 |
+
text = "Just some random text without any section headers"
|
166 |
|
167 |
+
result = linkedin_resume._parse_resume_text(text)
|
|
|
168 |
|
169 |
+
self.assertIsInstance(result, dict)
|
|
|
170 |
|
171 |
+
# Should still return a dict with at least the general section
|
172 |
+
self.assertIn("general", result)
|
|
|
|
|
|
|
|
|
173 |
|
174 |
+
def test_parse_calls_clean_section(self):
|
175 |
+
"""Test that parsing calls _clean_section on each section using real text processing."""
|
176 |
|
177 |
+
text = """
|
178 |
+
Summary
|
179 |
+
Some summary text with extra spaces
|
180 |
+
|
181 |
+
Experience
|
182 |
+
Some experience text
|
183 |
+
"""
|
184 |
|
185 |
+
result = linkedin_resume._parse_resume_text(text)
|
|
|
186 |
|
187 |
+
# Should be called and content should be cleaned
|
188 |
+
if result:
|
189 |
+
for _, content in result.items():
|
190 |
+
# Verify that cleaning has occurred (no excessive spaces)
|
191 |
+
self.assertNotIn(" ", content) # No triple spaces should remain
|
192 |
+
self.assertEqual(content, content.strip()) # Should be stripped
|
193 |
|
|
|
|
|
194 |
|
195 |
+
class TestCleanSection(unittest.TestCase):
|
196 |
+
"""Test cases for the _clean_section function."""
|
|
|
197 |
|
198 |
+
def test_clean_unicode_normalization(self):
|
199 |
+
"""Test unicode normalization."""
|
200 |
|
201 |
+
text = "Café résumé naïve" # Text with accented characters
|
202 |
+
result = linkedin_resume._clean_section(text)
|
203 |
|
204 |
+
# Should normalize unicode characters
|
205 |
+
self.assertIsInstance(result, str)
|
206 |
+
self.assertNotEqual(result, "")
|
207 |
|
208 |
+
def test_clean_remove_page_numbers(self):
|
209 |
+
"""Test removal of LinkedIn page numbers."""
|
|
|
210 |
|
211 |
+
text = "Some content\nPage 1 of 3\nMore content"
|
212 |
+
result = linkedin_resume._clean_section(text)
|
|
|
|
|
213 |
|
214 |
+
# Should remove page indicators
|
215 |
+
self.assertNotIn("Page 1 of 3", result)
|
216 |
+
self.assertIn("Some content", result)
|
217 |
+
self.assertIn("More content", result)
|
218 |
|
219 |
+
def test_clean_calls_whitespace_cleaner(self):
|
220 |
+
"""Test that _clean_section properly cleans whitespace."""
|
|
|
|
|
|
|
221 |
|
222 |
+
text = "Some text with spaces"
|
223 |
+
result = linkedin_resume._clean_section(text)
|
|
|
|
|
224 |
|
225 |
+
# Should clean multiple spaces to single spaces
|
226 |
+
self.assertNotIn(" ", result) # No double spaces should remain
|
227 |
+
self.assertIn("Some text with spaces", result) # Should have single spaces
|
228 |
|
229 |
+
def test_clean_strip_whitespace(self):
|
230 |
+
"""Test stripping leading/trailing whitespace."""
|
231 |
|
232 |
+
text = " Some content "
|
233 |
+
result = linkedin_resume._clean_section(text)
|
|
|
|
|
234 |
|
235 |
+
# Should strip leading and trailing whitespace
|
236 |
+
self.assertFalse(result.startswith(" "))
|
237 |
+
self.assertFalse(result.endswith(" "))
|
|
|
238 |
|
239 |
+
def test_clean_empty_input(self):
|
240 |
+
"""Test handling of empty input."""
|
241 |
|
242 |
+
self.assertEqual(linkedin_resume._clean_section(""), "")
|
243 |
+
self.assertEqual(linkedin_resume._clean_section(" "), "")
|
|
|
244 |
|
245 |
|
246 |
if __name__ == '__main__':
|