Spaces:

gperdrizet
/

resumate

Sleeping

App Files Files Community

gperdrizet commited on Jul 24

Commit

71c8aa1

verified ·

1 Parent(s): f70c1ff

Removed mocking from tests in favor of uof using actual sample PDF, cleaned up

Browse files

Files changed (2) hide show

tests/test_gradio.py +86 -451
tests/test_linkedin_resume.py +189 -158

tests/test_gradio.py CHANGED Viewed

@@ -3,479 +3,114 @@ Unit tests for the gradio module.
 """
 import unittest
-from unittest.mock import patch, MagicMock
 from functions import gradio
 class TestProcessInputs(unittest.TestCase):
     """Test cases for the process_inputs function."""
-    def test_no_inputs_provided(self):
-        """Test when no inputs are provided."""
-        with patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            mock_summarize.return_value = "Mocked job summary"
-            result = gradio.process_inputs(None, "", "", "")
-            self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
-            self.assertIn("❌ No GitHub profile URL provided", result)
-            self.assertIn("❌ Job post not provided", result)
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-    def test_no_inputs_no_default_job(self):
-        """Test when no inputs are provided (same as test_no_inputs_provided now)."""
-        with patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            mock_summarize.return_value = None  # No summarization since no job post
-            result = gradio.process_inputs(None, "", "", "")
-            self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
-            self.assertIn("❌ No GitHub profile URL provided", result)
-            self.assertIn("❌ Job post not provided", result)
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-    def test_all_inputs_provided_success(self):
-        """Test when all inputs are provided and successful."""
-        # Mock LinkedIn PDF file
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test_resume.pdf"
-        # Mock successful extraction results
-        mock_linkedin_result = {
-            "status": "success",
-            "structured_text": {"sections": {}, "llm_formatted": "test content"},
-            "metadata": {"filename": "test_resume.pdf"}
-        }
-        mock_github_result = {
-            "status": "success",
-            "repositories": [{"name": "test-repo"}],
-            "metadata": {"username": "testuser"}
-        }
-        with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_linkedin, \
-             patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.write_resume') as mock_write_resume, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize: #, \
-             #patch('functions.gradio.shutil.copy2') as mock_copy:
-            mock_linkedin.return_value = mock_linkedin_result
-            mock_github.return_value = mock_github_result
-            mock_write_resume.return_value = "Generated resume content"
-            mock_summarize.return_value = "Job summary content\n"
-            result = gradio.process_inputs(
-                mock_pdf,
-                "https://github.com/testuser",
-                "Job posting text here",
-                "Please emphasize technical skills"
-            )
-            self.assertIn("✅ LinkedIn Resume PDF provided", result)
-            self.assertIn("✅ Text extraction successful", result)
-            self.assertIn("✅ GitHub Profile URL provided", result)
-            self.assertIn("✅ GitHub list download successful", result)
-            self.assertIn("✅ Job post text provided", result)
-            self.assertIn("✅ Job post summary generated", result)
-            self.assertIn("✅ Additional instructions provided", result)
-            self.assertIn("✅ Resume generated successfully", result)
-            # Verify write_resume was called with user instructions and job summary
-            mock_write_resume.assert_called_with(
-                mock_linkedin_result,
-                "Please emphasize technical skills",
-                "Job summary content\n"
-            )
-    @patch('functions.gradio.extract_text_from_linkedin_pdf')
-    @patch('functions.gradio.write_resume')
-    def test_linkedin_extraction_failure(self, mock_write_resume, mock_extract):
-        """Test LinkedIn PDF extraction failure."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test_resume.pdf"
         mock_extract.return_value = {
-            "status": "error",
-            "message": "Failed to read PDF"
-        }
-        mock_write_resume.return_value = "Generated resume content"
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            result = gradio.process_inputs(mock_pdf, "", "", "")
-            self.assertIn("✅ LinkedIn Resume PDF provided", result)
-            self.assertIn("❌ Text extraction failed: Failed to read PDF", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-            # Verify write_resume was NOT called since extraction failed
-            mock_write_resume.assert_not_called()
-    @patch('functions.gradio.extract_text_from_linkedin_pdf')
-    @patch('functions.gradio.write_resume')
-    def test_linkedin_extraction_warning(self, mock_write_resume, mock_extract):
-        """Test LinkedIn PDF extraction warning."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test_resume.pdf"
-        mock_extract.return_value = {
-            "status": "warning",
-            "message": "No text found in PDF"
-        }
-        mock_write_resume.return_value = "Generated resume content"
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            result = gradio.process_inputs(mock_pdf, "", "", "")
-            self.assertIn("✅ LinkedIn Resume PDF provided", result)
-            self.assertIn("⚠️  Text extraction: No text found in PDF", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-            # Verify write_resume was NOT called since extraction had warning status
-            mock_write_resume.assert_not_called()
-    @patch('functions.gradio.get_github_repositories')
-    @patch('functions.gradio.write_resume')
-    def test_github_retrieval_failure(self, mock_write_resume, mock_github):
-        """Test GitHub repository retrieval failure."""
-        mock_github.return_value = {
-            "status": "error",
-            "message": "User not found"
-        }
-        mock_write_resume.return_value = "Generated resume content"
-        result = gradio.process_inputs(None, "https://github.com/nonexistent", "", "")
-        self.assertIn("✅ GitHub Profile URL provided", result)
-        self.assertIn("❌ GitHub extraction failed: User not found", result)
-        self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-        # Verify write_resume was NOT called since no LinkedIn data
-        mock_write_resume.assert_not_called()
-    def test_whitespace_only_inputs(self):
-        """Test inputs with only whitespace."""
-        with patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            mock_summarize.return_value = "Mocked job summary"
-            result = gradio.process_inputs(None, "   ", "   ", "   ")
-            self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
-            self.assertIn("❌ No GitHub profile URL provided", result)
-            self.assertIn("❌ Job post not provided", result)
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-    def test_whitespace_only_inputs_no_default(self):
-        """Test inputs with only whitespace (same as test_whitespace_only_inputs now)."""
-        with patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            mock_summarize.return_value = None  # No summarization since no job post
-            result = gradio.process_inputs(None, "   ", "   ", "   ")
-            self.assertIn("❌ No LinkedIn resume PDF file uploaded", result)
-            self.assertIn("❌ No GitHub profile URL provided", result)
-            self.assertIn("❌ Job post not provided", result)
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-    @patch('functions.gradio.write_resume')
-    @patch('functions.gradio.summarize_job_call')
-    def test_job_post_with_content(self, mock_summarize, mock_write_resume):
-        """Test job post with actual content."""
-        job_text = "Software Engineer position at Tech Company"
-        mock_write_resume.return_value = "Generated resume content"
-        mock_summarize.return_value = "Job summary content\n"
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            result = gradio.process_inputs(None, "", job_text, "")
-            self.assertIn("✅ Job post text provided", result)
-            self.assertIn("✅ Job post summary generated", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-            # Verify write_resume was NOT called since no LinkedIn data
-            mock_write_resume.assert_not_called()
-    @patch('functions.gradio.logger')
-    @patch('functions.gradio.write_resume')
-    def test_logging_calls(self, mock_write_resume, mock_logger):
-        """Test that appropriate logging calls are made."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test.pdf"
-        mock_write_resume.return_value = "Generated resume content"
-        with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_extract, \
-             patch('functions.gradio.get_github_repositories') as mock_github, \
-             patch('functions.gradio.summarize_job_call') as mock_summarize:
-            mock_extract.return_value = {"status": "success"}
-            mock_github.return_value = {"status": "success", "metadata": {"username": "test"}}
-            mock_summarize.return_value = "Job summary\n"
-            gradio.process_inputs(
-                mock_pdf,
-                "https://github.com/test",
-                "job text",
-                "custom instructions"
-            )
-            # Verify logging calls were made
-            mock_logger.info.assert_called()
-    @patch('functions.gradio.write_resume')
-    def test_user_instructions_with_content(self, mock_write_resume):
-        """Test user instructions with actual content."""
-        instructions = "Please emphasize leadership skills and highlight remote work experience"
-        mock_write_resume.return_value = "Generated resume content"
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            result = gradio.process_inputs(None, "", "", instructions)
-            self.assertIn("✅ Additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-            # Verify write_resume was NOT called since no valid extraction result
-            mock_write_resume.assert_not_called()
-    @patch('functions.gradio.write_resume')
-    def test_user_instructions_empty(self, mock_write_resume):
-        """Test user instructions when empty."""
-        mock_write_resume.return_value = "Generated resume content"
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "metadata": {"username": "gperdrizet"}}
-            result = gradio.process_inputs(None, "", "", "")
-            self.assertIn("ℹ️ No additional instructions provided", result)
-            self.assertIn("❌ Cannot generate resume: No valid LinkedIn data extracted", result)
-            # Verify write_resume was NOT called since no valid extraction result
-            mock_write_resume.assert_not_called()
-class TestGetProcessedData(unittest.TestCase):
-    """Test cases for the get_processed_data function."""
-    def test_no_inputs(self):
-        """Test with no inputs provided."""
-        result = gradio.get_processed_data(None, "", "", "")
-        self.assertIsNone(result["linkedin"])
-        self.assertIsNone(result["github"])
-        self.assertIsNone(result["job_post"])
-        self.assertIsNone(result["user_instructions"])
-        self.assertEqual(len(result["errors"]), 0)
-    def test_no_inputs_no_default_job(self):
-        """Test with no inputs provided (same as test_no_inputs now)."""
-        result = gradio.get_processed_data(None, "", "", "")
-        self.assertIsNone(result["linkedin"])
-        self.assertIsNone(result["github"])
-        self.assertIsNone(result["job_post"])
-        self.assertIsNone(result["user_instructions"])
-        self.assertEqual(len(result["errors"]), 0)
-    def test_all_successful_inputs(self):
-        """Test with all successful inputs."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test.pdf"
-        mock_linkedin_result = {
-            "status": "success",
-            "structured_text": {"sections": {}, "llm_formatted": "content"}
         }
-        mock_github_result = {
-            "status": "success",
-            "repositories": [{"name": "repo"}],
-            "metadata": {"username": "user"}
-        }
-        with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_linkedin, \
-             patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_linkedin.return_value = mock_linkedin_result
-            mock_github.return_value = mock_github_result
-            result = gradio.get_processed_data(
-                mock_pdf,
-                "https://github.com/user",
-                "Job posting content",
-                "Custom instructions"
-            )
-            self.assertEqual(result["linkedin"], mock_linkedin_result)
-            self.assertEqual(result["github"], mock_github_result)
-            self.assertEqual(result["job_post"], "Job posting content")
-            self.assertEqual(result["user_instructions"], "Custom instructions")
-            self.assertEqual(len(result["errors"]), 0)
-    def test_linkedin_error(self):
-        """Test with LinkedIn processing error."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test.pdf"
-        with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_extract:
-            mock_extract.return_value = {
-                "status": "error",
-                "message": "PDF read failed"
-            }
-            result = gradio.get_processed_data(mock_pdf, "", "", "")
-            self.assertIsNone(result["linkedin"])
-            self.assertEqual(len(result["errors"]), 1)
-            self.assertIn("LinkedIn: PDF read failed", result["errors"])
-    def test_github_error(self):
-        """Test with GitHub processing error."""
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {
-                "status": "error",
-                "message": "User not found"
-            }
-            result = gradio.get_processed_data(None, "https://github.com/invalid", "", "")
-            self.assertIsNone(result["github"])
-            self.assertEqual(len(result["errors"]), 1)
-            self.assertIn("GitHub: User not found", result["errors"])
-    def test_multiple_errors(self):
-        """Test with multiple processing errors."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test.pdf"
-        with patch('functions.gradio.extract_text_from_linkedin_pdf') as mock_linkedin, \
-             patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_linkedin.return_value = {
-                "status": "error",
-                "message": "LinkedIn error"
-            }
-            mock_github.return_value = {
-                "status": "error",
-                "message": "GitHub error"
-            }
-            result = gradio.get_processed_data(
-                mock_pdf,
-                "https://github.com/user",
-                "",
-                ""
-            )
-            self.assertIsNone(result["linkedin"])
-            self.assertIsNone(result["github"])
-            self.assertEqual(len(result["errors"]), 2)
-            self.assertIn("LinkedIn: LinkedIn error", result["errors"])
-            self.assertIn("GitHub: GitHub error", result["errors"])
-    def test_job_post_whitespace_handling(self):
-        """Test job post whitespace handling."""
-        # Test with leading/trailing whitespace
-        result = gradio.get_processed_data(None, "", "  Job content  ", "")
-        self.assertEqual(result["job_post"], "Job content")
-        # Test with only whitespace - should be None
-        result = gradio.get_processed_data(None, "", "   ", "")
-        self.assertIsNone(result["job_post"])
-        # Test with empty string - should be None
-        result = gradio.get_processed_data(None, "", "", "")
-        self.assertIsNone(result["job_post"])
-    def test_github_url_whitespace_handling(self):
-        """Test GitHub URL whitespace handling."""
-        with patch('functions.gradio.get_github_repositories') as mock_github:
-            mock_github.return_value = {"status": "success", "repositories": []}
-            # Test with leading/trailing whitespace
-            _ = gradio.get_processed_data(None, "  https://github.com/user  ", "", "")
-            mock_github.assert_called_with("  https://github.com/user  ")
-            # Test with only whitespace - should not call function
-            mock_github.reset_mock()
-            _ = gradio.get_processed_data(None, "   ", "", "")
-            mock_github.assert_not_called()
-    def test_data_structure_consistency(self):
-        """Test that returned data structure is consistent."""
-        result = gradio.get_processed_data(None, "", "", "")
-        # Check all required keys exist
-        required_keys = ["linkedin", "github", "job_post", "user_instructions", "errors"]
-        for key in required_keys:
-            self.assertIn(key, result)
-        # Check data types
-        self.assertIsInstance(result["errors"], list)
-    @patch('functions.gradio.extract_text_from_linkedin_pdf')
-    def test_linkedin_warning_status(self, mock_extract):
-        """Test handling of LinkedIn warning status."""
-        mock_pdf = MagicMock()
-        mock_pdf.name = "test.pdf"
         mock_extract.return_value = {
-            "status": "warning",
-            "message": "Some warning"
         }
-        result = gradio.get_processed_data(mock_pdf, "", "", "")
-        # Warning status should not be treated as success
-        self.assertIsNone(result["linkedin"])
-        self.assertEqual(len(result["errors"]), 1)
-        self.assertIn("LinkedIn: Some warning", result["errors"])
-    def test_user_instructions_whitespace_handling(self):
-        """Test user instructions whitespace handling."""
-        # Test with leading/trailing whitespace
-        result = gradio.get_processed_data(None, "", "", "  Custom instructions  ")
-        self.assertEqual(result["user_instructions"], "Custom instructions")
-        # Test with only whitespace
-        result = gradio.get_processed_data(None, "", "", "   ")
-        self.assertIsNone(result["user_instructions"])
-        # Test with empty string
-        result = gradio.get_processed_data(None, "", "", "")
-        self.assertIsNone(result["user_instructions"])
 if __name__ == '__main__':
     unittest.main()

 """
 import unittest
+from pathlib import Path
+from unittest.mock import patch
 from functions import gradio
 class TestProcessInputs(unittest.TestCase):
     """Test cases for the process_inputs function."""
+    def test_process_inputs_with_real_pdf(self):
+        """Test process_inputs with the actual test PDF file."""
+        # Get path to the test PDF file
+        test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
+        # Verify the test file exists
+        self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
+        result = gradio.process_inputs(
+            linkedin_pdf_path=str(test_pdf_path),
+            github_url="https://github.com/user",
+            job_post_text="Software engineer position",
+            user_instructions="Custom instructions"
+        )
+        # Since most functionality is commented out, result should be empty string
+        self.assertEqual(result, "")
+    @patch('functions.gradio.extract_text')
+    def test_process_inputs_with_pdf_path_mocked(self, mock_extract):
+        """Test process_inputs with a PDF file path (mocked for controlled testing)."""
+        # Mock successful LinkedIn text extraction
         mock_extract.return_value = {
+            "contact_info": "John Doe, [email protected]",
+            "summary": "Experienced software engineer",
+            "experience": "Software Engineer at Company"
         }
+        result = gradio.process_inputs(
+            linkedin_pdf_path="/path/to/resume.pdf",
+            github_url="https://github.com/user",
+            job_post_text="Software engineer position",
+            user_instructions="Custom instructions"
+        )
+        # Verify extract_text was called with the correct path
+        mock_extract.assert_called_once_with("/path/to/resume.pdf")
+        # Since most functionality is commented out, result should be empty string
+        self.assertEqual(result, "")
+    @patch('functions.gradio.extract_text')
+    def test_process_inputs_extraction_failure(self, mock_extract):
+        """Test process_inputs when LinkedIn extraction fails."""
+        # Mock failed LinkedIn text extraction
+        mock_extract.return_value = None
+        result = gradio.process_inputs(
+            linkedin_pdf_path="/path/to/resume.pdf",
+            github_url="https://github.com/user",
+            job_post_text="Software engineer position",
+            user_instructions="Custom instructions"
+        )
+        # Verify extract_text was called
+        mock_extract.assert_called_once_with("/path/to/resume.pdf")
+        # Result should be empty string since functionality is commented out
+        self.assertEqual(result, "")
+    @patch('functions.gradio.extract_text')
+    def test_process_inputs_no_pdf_path(self, mock_extract):
+        """Test process_inputs with no PDF path provided."""
+        result = gradio.process_inputs(
+            linkedin_pdf_path=None,
+            github_url="https://github.com/user",
+            job_post_text="Software engineer position",
+            user_instructions="Custom instructions"
+        )
+        # extract_text should be called with None
+        mock_extract.assert_called_once_with(None)
+        # Result should be empty string
+        self.assertEqual(result, "")
+    @patch('functions.gradio.extract_text')
+    def test_process_inputs_with_long_job_post(self, mock_extract):
+        """Test process_inputs with a long job post text (for logging truncation)."""
         mock_extract.return_value = {
+            "summary": "Test summary"
         }
+        long_job_post = "This is a very long job posting " * 50  # Make it longer than 100 chars
+        result = gradio.process_inputs(
+            linkedin_pdf_path="/path/to/resume.pdf",
+            github_url="https://github.com/user",
+            job_post_text=long_job_post,
+            user_instructions="Custom instructions"
+        )
+        # Verify extract_text was called
+        mock_extract.assert_called_once_with("/path/to/resume.pdf")
+        # Result should be empty string
+        self.assertEqual(result, "")
 if __name__ == '__main__':
     unittest.main()

tests/test_linkedin_resume.py CHANGED Viewed

@@ -1,215 +1,246 @@
 """
-Unit tests for the context_acquisition module.
 """
 import unittest
 import tempfile
 import os
-from unittest.mock import patch, MagicMock
-from functions import linkedin_resume as ca
 # pylint: disable=protected-access
-class TestCleanExtractedText(unittest.TestCase):
-    """Test cases for the _clean_extracted_text function."""
-    def test_normalize_multiple_newlines(self):
-        """Test normalization of multiple newlines."""
-        raw = "Line 1\n\nLine 2\n\n\nLine 3"
-        expected = "Line 1\nLine 2\nLine 3"
-        self.assertEqual(ca._clean_extracted_text(raw), expected)
-    def test_remove_artifacts(self):
-        """Test removal of PDF artifacts."""
-        raw = "  123  \n|---|\nSome text\n"
-        expected = "Some text"
-        self.assertEqual(ca._clean_extracted_text(raw), expected)
-    def test_normalize_spaces(self):
-        """Test normalization of multiple spaces."""
-        raw = "A  B   C"
-        expected = "A B C"
-        self.assertEqual(ca._clean_extracted_text(raw), expected)
-    def test_empty_string(self):
-        """Test handling of empty string."""
-        self.assertEqual(ca._clean_extracted_text(""), "")
-    def test_none_input(self):
-        """Test handling of None input."""
-        self.assertEqual(ca._clean_extracted_text(None), "")
-class TestStructureResumeText(unittest.TestCase):
-    """Test cases for the _structure_resume_text function."""
-    def test_basic_structure(self):
-        """Test basic resume text structuring."""
-        text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \
-               "\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++"
-        result = ca._structure_resume_text(text)
-        self.assertIn("contact_info", result["sections"])
-        self.assertIn("summary", result["sections"])
-        self.assertIn("experience", result["sections"])
-        self.assertIn("education", result["sections"])
-        self.assertIn("skills", result["sections"])
-        self.assertGreater(result["word_count"], 0)
-        self.assertGreaterEqual(result["section_count"], 5)
-    def test_empty_text(self):
-        """Test handling of empty text."""
-        result = ca._structure_resume_text("")
-        self.assertEqual(result["sections"], {})
-        self.assertEqual(result["full_text"], "")
-        self.assertEqual(result["word_count"], 0)
-        self.assertEqual(result["section_count"], 0)
-    def test_contains_required_fields(self):
-        """Test that result contains all required fields."""
-        text = "Some basic text"
-        result = ca._structure_resume_text(text)
-        required_fields = ["sections", "full_text", "llm_formatted", "summary",
-                          "format", "word_count", "section_count"]
-        for field in required_fields:
-            self.assertIn(field, result)
-class TestFormatForLLM(unittest.TestCase):
-    """Test cases for the _format_for_llm function."""
-    def test_section_formatting(self):
-        """Test proper formatting of sections for LLM."""
-        sections = {
-            "summary": "A summary.",
-            "contact_info": "Contact details.",
-            "experience": "Work exp.",
-            "education": "School info.",
-            "skills": "Python, C++"
-        }
-        formatted = ca._format_for_llm(sections)
-        self.assertIn("[SUMMARY]", formatted)
-        self.assertIn("[CONTACT INFO]", formatted)
-        self.assertIn("[EXPERIENCE]", formatted)
-        self.assertIn("[EDUCATION]", formatted)
-        self.assertIn("[SKILLS]", formatted)
-        self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
-        self.assertTrue(formatted.endswith("=== END RESUME ==="))
-    def test_empty_sections(self):
-        """Test handling of empty sections."""
-        sections = {}
-        formatted = ca._format_for_llm(sections)
-        self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
-        self.assertTrue(formatted.endswith("=== END RESUME ==="))
-class TestGetLLMContextFromResume(unittest.TestCase):
-    """Test cases for the get_llm_context_from_resume function."""
-    def test_success_with_llm_formatted(self):
-        """Test successful extraction with LLM formatted text."""
-        extraction_result = {
-            "status": "success",
-            "structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"}
-        }
-        result = ca.get_llm_context_from_resume(extraction_result)
-        self.assertEqual(result, "LLM text")
-    def test_fallback_to_full_text(self):
-        """Test fallback to full text when LLM formatted not available."""
-        extraction_result = {
-            "status": "success",
-            "structured_text": {"full_text": "Full text"}
-        }
-        result = ca.get_llm_context_from_resume(extraction_result)
-        self.assertEqual(result, "Full text")
-    def test_error_status(self):
-        """Test handling of error status."""
-        extraction_result = {"status": "error"}
-        result = ca.get_llm_context_from_resume(extraction_result)
-        self.assertEqual(result, "")
-    def test_missing_structured_text(self):
-        """Test handling of missing structured_text."""
-        extraction_result = {"status": "success"}
-        result = ca.get_llm_context_from_resume(extraction_result)
-        self.assertEqual(result, "")
-class TestExtractTextFromLinkedInPDF(unittest.TestCase):
-    """Test cases for the extract_text_from_linkedin_pdf function."""
-    def test_none_input(self):
-        """Test handling of None input."""
-        result = ca.extract_text_from_linkedin_pdf(None)
-        self.assertEqual(result["status"], "error")
-        self.assertIn("No PDF file provided", result["message"])
-    @patch('PyPDF2.PdfReader')
-    @patch('builtins.open')
-    def test_successful_extraction(self, mock_open, mock_pdf_reader):
-        """Test successful PDF text extraction with mocked PyPDF2."""
-        # Create a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
-            tmp_path = tmp.name
-        try:
-            # Mock file reading
-            mock_file = MagicMock()
-            mock_file.read.return_value = b"fake pdf content"
-            mock_open.return_value.__enter__.return_value = mock_file
-            # Mock PDF reader and page
-            mock_page = MagicMock()
-            mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \
-                                                   "\nDeveloper\nExperience\nCompany X"
-            mock_reader_instance = MagicMock()
-            mock_reader_instance.pages = [mock_page]
-            mock_pdf_reader.return_value = mock_reader_instance
-            # Test the function
-            result = ca.extract_text_from_linkedin_pdf(tmp_path)
-            self.assertEqual(result["status"], "success")
-            self.assertIn("structured_text", result)
-            self.assertIn("metadata", result)
-            self.assertIn("contact_info", result["structured_text"]["sections"])
-        finally:
-            # Clean up
-            if os.path.exists(tmp_path):
-                os.remove(tmp_path)
-    def test_nonexistent_file(self):
-        """Test handling of non-existent file."""
-        result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf")
-        self.assertEqual(result["status"], "error")
-        self.assertIn("Failed to extract text from PDF", result["message"])
 if __name__ == '__main__':

 """
+Unit tests for the linkedin_resume module.
 """
 import unittest
 import tempfile
 import os
+from pathlib import Path
+from functions import linkedin_resume
 # pylint: disable=protected-access
+class TestExtractText(unittest.TestCase):
+    """Test cases for the extract_text function."""
+    def test_extract_text_with_real_pdf(self):
+        """Test text extraction using the actual test PDF file."""
+        # Get path to the test PDF file
+        test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
+        # Verify the test file exists
+        self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
+        # Call extract_text with the real PDF
+        result = linkedin_resume.extract_text(str(test_pdf_path))
+        # Verify we get a result (should be a dict with sections)
+        if result is not None:
+            self.assertIsInstance(result, dict)
+            # Check that we have at least some content
+            self.assertGreater(len(result), 0)
+            # Each value should be a string
+            for _, content in result.items():
+                self.assertIsInstance(content, str)
+        else:
+            # If result is None, it means the PDF couldn't be processed
+            # This might happen with some PDF formats, which is acceptable
+            self.assertIsNone(result)
+    def test_extract_text_success(self):
+        """Test successful text extraction from the actual test PDF file."""
+        # Get path to the test PDF file
+        test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
+        # Verify the test file exists
+        self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
+        # Call extract_text with the real PDF
+        result = linkedin_resume.extract_text(str(test_pdf_path))
+        # Verify we get a result (should be a dict with sections)
+        if result is not None:
+            self.assertIsInstance(result, dict)
+            # Check that we have at least some content
+            self.assertGreater(len(result), 0)
+            # Each value should be a string
+            for section_name, content in result.items():
+                self.assertIsInstance(content, str)
+                self.assertGreater(
+                    len(content.strip()),
+                    0,
+                    f"Section {section_name} should have content"
+                )
+        else:
+            # If result is None, it means the PDF couldn't be processed
+            # This might happen with some PDF formats, which is acceptable
+            self.assertIsNone(result)
+    def test_extract_text_with_invalid_pdf(self):
+        """Test handling of invalid PDF content by creating a temporary invalid file."""
+        # Create a temporary file with invalid content
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as temp_file:
+            temp_file.write("This is not a valid PDF file")
+            temp_path = temp_file.name
+        try:
+            # This should return None due to invalid PDF format
+            result = linkedin_resume.extract_text(temp_path)
+            self.assertIsNone(result)
+        finally:
+            # Clean up the temporary file
+            os.unlink(temp_path)
+    def test_extract_text_parsing_behavior(self):
+        """Test text extraction and parsing with the real PDF file."""
+        # Get path to the test PDF file
+        test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
+        # Verify the test file exists
+        self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
+        # Call extract_text with the real PDF
+        result = linkedin_resume.extract_text(str(test_pdf_path))
+        # Test the parsing behavior - if we get a result, it should be structured properly
+        if result is not None:
+            self.assertIsInstance(result, dict)
+            # If we have content, verify it's been parsed into logical sections
+            for _, content in result.items():
+                self.assertIsInstance(content, str)
+                # Content should be cleaned (no excessive whitespace at start/end)
+                self.assertEqual(content, content.strip())
+    def test_extract_text_file_not_found(self):
+        """Test handling when file doesn't exist."""
+        result = linkedin_resume.extract_text("/nonexistent/file.pdf")
+        # Should return None when file not found
+        self.assertIsNone(result)
+class TestParseResumeText(unittest.TestCase):
+    """Test cases for the _parse_resume_text function."""
+    def test_parse_with_sections(self):
+        """Test parsing text with recognizable sections."""
+        text = """
+        Contact Information
+        John Doe
+        [email protected]
+        Summary
+        Experienced software engineer with 5 years experience
+        Experience
+        Software Engineer at Tech Company
+        Built web applications
+        Skills
+        Python, JavaScript, React
+        Education
+        Bachelor's in Computer Science
+        University of Technology
+        """
+        result = linkedin_resume._parse_resume_text(text)
+        self.assertIsInstance(result, dict)
+        self.assertIn("contact_info", result)
+        self.assertIn("summary", result)
+        self.assertIn("experience", result)
+        self.assertIn("skills", result)
+        self.assertIn("education", result)
+    def test_parse_empty_text(self):
+        """Test parsing empty or None text."""
+        self.assertIsNone(linkedin_resume._parse_resume_text(""))
+        self.assertIsNone(linkedin_resume._parse_resume_text(None))
+    def test_parse_text_no_sections(self):
+        """Test parsing text without recognizable sections."""
+        text = "Just some random text without any section headers"
+        result = linkedin_resume._parse_resume_text(text)
+        self.assertIsInstance(result, dict)
+        # Should still return a dict with at least the general section
+        self.assertIn("general", result)
+    def test_parse_calls_clean_section(self):
+        """Test that parsing calls _clean_section on each section using real text processing."""
+        text = """
+        Summary
+        Some summary text with   extra    spaces
+        Experience
+        Some experience text
+        """
+        result = linkedin_resume._parse_resume_text(text)
+        # Should be called and content should be cleaned
+        if result:
+            for _, content in result.items():
+                # Verify that cleaning has occurred (no excessive spaces)
+                self.assertNotIn("   ", content)  # No triple spaces should remain
+                self.assertEqual(content, content.strip())  # Should be stripped
+class TestCleanSection(unittest.TestCase):
+    """Test cases for the _clean_section function."""
+    def test_clean_unicode_normalization(self):
+        """Test unicode normalization."""
+        text = "Café résumé naïve"  # Text with accented characters
+        result = linkedin_resume._clean_section(text)
+        # Should normalize unicode characters
+        self.assertIsInstance(result, str)
+        self.assertNotEqual(result, "")
+    def test_clean_remove_page_numbers(self):
+        """Test removal of LinkedIn page numbers."""
+        text = "Some content\nPage 1 of 3\nMore content"
+        result = linkedin_resume._clean_section(text)
+        # Should remove page indicators
+        self.assertNotIn("Page 1 of 3", result)
+        self.assertIn("Some content", result)
+        self.assertIn("More content", result)
+    def test_clean_calls_whitespace_cleaner(self):
+        """Test that _clean_section properly cleans whitespace."""
+        text = "Some  text  with   spaces"
+        result = linkedin_resume._clean_section(text)
+        # Should clean multiple spaces to single spaces
+        self.assertNotIn("  ", result)  # No double spaces should remain
+        self.assertIn("Some text with spaces", result)  # Should have single spaces
+    def test_clean_strip_whitespace(self):
+        """Test stripping leading/trailing whitespace."""
+        text = "   Some content   "
+        result = linkedin_resume._clean_section(text)
+        # Should strip leading and trailing whitespace
+        self.assertFalse(result.startswith(" "))
+        self.assertFalse(result.endswith(" "))
+    def test_clean_empty_input(self):
+        """Test handling of empty input."""
+        self.assertEqual(linkedin_resume._clean_section(""), "")
+        self.assertEqual(linkedin_resume._clean_section("   "), "")
 if __name__ == '__main__':