Spaces:
Sleeping
Sleeping
Create cv_analyzer.py
Browse files- cv_analyzer.py +76 -0
cv_analyzer.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
from cv_prompt import (
|
4 |
+
ResumeQualityEvaluation,
|
5 |
+
get_section_detection_prompt,
|
6 |
+
get_content_quality_prompt,
|
7 |
+
calculate_section_detection_score,
|
8 |
+
calculate_overall_score,
|
9 |
+
get_completeness_prompt
|
10 |
+
)
|
11 |
+
from openai_utils import get_ai_response
|
12 |
+
from ocr_extractor import process_file
|
13 |
+
from langchain.output_parsers import PydanticOutputParser
|
14 |
+
from spelling_grammar_checker import evaluate_cv_text
|
15 |
+
from personal_information import analyze_personal_info
|
16 |
+
|
17 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
+
|
19 |
+
def analyze_cv(file_content):
|
20 |
+
try:
|
21 |
+
extracted_text = process_file(file_content, 'ocr_weights.json')
|
22 |
+
logging.info("Text extracted successfully")
|
23 |
+
|
24 |
+
# Section Detection
|
25 |
+
sections_prompt = get_section_detection_prompt(extracted_text)
|
26 |
+
sections_response = get_ai_response([{"role": "user", "content": sections_prompt}])
|
27 |
+
if sections_response is None:
|
28 |
+
return {"error": "Failed to get AI response for sections"}
|
29 |
+
|
30 |
+
sections_data = json.loads(sections_response)
|
31 |
+
detected_sections = sections_data.get('present_sections', [])
|
32 |
+
section_detection_score = calculate_section_detection_score(detected_sections)
|
33 |
+
logging.info(f"Detected sections: {detected_sections}")
|
34 |
+
logging.info(f"Section detection score: {section_detection_score}")
|
35 |
+
|
36 |
+
# Content Quality Analysis
|
37 |
+
quality_prompt = get_content_quality_prompt(extracted_text)
|
38 |
+
quality_response = get_ai_response([{"role": "user", "content": quality_prompt}])
|
39 |
+
|
40 |
+
if quality_response is None:
|
41 |
+
return {"error": "Failed to get AI response for content quality"}
|
42 |
+
|
43 |
+
parser = PydanticOutputParser(pydantic_object=ResumeQualityEvaluation)
|
44 |
+
evaluation_result = parser.parse(quality_response)
|
45 |
+
|
46 |
+
overall_score = calculate_overall_score(evaluation_result)
|
47 |
+
|
48 |
+
# Spelling and Grammar Check
|
49 |
+
spelling_grammar_score = evaluate_cv_text(file_content, 'ocr_weights.json')
|
50 |
+
|
51 |
+
# Personal Information Analysis
|
52 |
+
personal_info = json.loads(analyze_personal_info(file_content))
|
53 |
+
|
54 |
+
# Completeness Analysis
|
55 |
+
completeness_prompt = get_completeness_prompt(extracted_text, detected_sections)
|
56 |
+
completeness_response = get_ai_response([{"role": "user", "content": completeness_prompt}])
|
57 |
+
if completeness_response is None:
|
58 |
+
return {"error": "Failed to get AI response for completeness analysis"}
|
59 |
+
completeness_analysis = json.loads(completeness_response)
|
60 |
+
|
61 |
+
logging.info("All analyses completed")
|
62 |
+
logging.info(f"Overall score: {overall_score}")
|
63 |
+
|
64 |
+
return {
|
65 |
+
"extracted_text": extracted_text,
|
66 |
+
"detected_sections": detected_sections,
|
67 |
+
"section_detection_score": section_detection_score,
|
68 |
+
"content_analysis": evaluation_result.dict(),
|
69 |
+
"overall_score": overall_score,
|
70 |
+
"spelling_grammar_score": spelling_grammar_score,
|
71 |
+
"personal_info": personal_info,
|
72 |
+
"completeness_analysis": completeness_analysis
|
73 |
+
}
|
74 |
+
except Exception as e:
|
75 |
+
logging.error(f"Error in CV analysis: {str(e)}", exc_info=True)
|
76 |
+
return {"error": str(e)}
|