feat: main processing pipeline (#4)
Browse files* feat: main processing pipeline
* refactor: Code restructuring
* Resolved CR
* Resolved CR
---------
Co-authored-by: yxxx <[email protected]>
- .env.example +3 -0
- .gitignore +3 -0
- ENV_SETUP.md +33 -0
- requirements.txt +18 -1
- src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml +4 -0
- src/configs/llm/openai-gpt-3.5-turbo.yaml +4 -0
- src/configs/llm/openai-gpt-4o-mini.yaml +4 -0
- src/configs/parser/llamaparse_en.yaml +7 -0
- src/domain/emotion_metrics.py +0 -15
- src/domain/enums/emotion_types.py +8 -6
- src/domain/resume.py +0 -13
- src/llm/base_llm_provider.py +16 -0
- src/llm/enums.py +3 -0
- src/llm/llm.py +32 -0
- src/llm/nvidia_llm.py +29 -0
- src/llm/openai_llm.py +29 -0
- src/main_test.py +97 -0
- src/sample_inputs.py +98 -0
- src/service/emotion_recognition.py +136 -0
- src/service/interview_analyzer.py +0 -32
- src/service/resume_parser.py +42 -0
- src/service/utils.py +103 -0
- src/template/grading_prompt.py +111 -0
- src/template/parser_prompt.py +21 -0
.env.example
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
LLAMA_CLOUD_API_KEY=''
|
2 |
+
OPENAI_API_KEY=''
|
3 |
+
NVIDIA_API_KEY=''
|
.gitignore
CHANGED
@@ -160,3 +160,6 @@ cython_debug/
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
|
|
|
|
|
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
163 |
+
|
164 |
+
# Mac cache file
|
165 |
+
.DS_Store
|
ENV_SETUP.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Setting Up the Environment
|
2 |
+
|
3 |
+
<code>conda create -n automated_interview_filtering python=3.10.14</code>
|
4 |
+
|
5 |
+
<code>pip install -r requirements.txt</code>
|
6 |
+
|
7 |
+
<code>brew install ffmpeg</code>
|
8 |
+
|
9 |
+
</br>
|
10 |
+
|
11 |
+
# Creating a .env file
|
12 |
+
|
13 |
+
Create a <code>.env</code> file at the same directory level as this <code>ENV_SETUP.md</code> file, following the required field as listed in <code>.env.example</code>. You may visit the following to create free trial accounts and obtain your API keys:
|
14 |
+
|
15 |
+
- Llamaparse: <a href='https://cloud.llamaindex.ai/login'>https://cloud.llamaindex.ai/login</a>
|
16 |
+
- OpenAI: <a href='https://platform.openai.com/playground'>https://platform.openai.com/playground</a>
|
17 |
+
- Nvidia NIMs: <a href='https://build.nvidia.com/nvidia'>https://build.nvidia.com/nvidia</a>
|
18 |
+
|
19 |
+
</br>
|
20 |
+
|
21 |
+
# Running the Sample Code
|
22 |
+
|
23 |
+
<code>conda activate automated_interview_filtering</code>
|
24 |
+
|
25 |
+
<code>python -m src.main_test</code>
|
26 |
+
|
27 |
+
You can choose to either use NVIDIA-NIMs or OpenAI as the LLM Provider. This can be changed by selecting the YAML config files in <code>src/main_test.py</code>
|
28 |
+
|
29 |
+
</br>
|
30 |
+
|
31 |
+
# NOTE
|
32 |
+
|
33 |
+
<code>src/main_test.py</code> is a sample usage of the backend code. Please refer to <code>src/sample_inputs.py</code> for example of what the required input fields are.
|
requirements.txt
CHANGED
@@ -1,5 +1,22 @@
|
|
|
|
|
|
1 |
gradio>=4.0.0
|
2 |
numpy
|
3 |
pandas
|
4 |
python-dotenv
|
5 |
-
black==24.10.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# running on python 3.10.14
|
2 |
+
|
3 |
gradio>=4.0.0
|
4 |
numpy
|
5 |
pandas
|
6 |
python-dotenv
|
7 |
+
black==24.10.0
|
8 |
+
|
9 |
+
PyYAML==6.0.2
|
10 |
+
tf-keras==2.18.0
|
11 |
+
deepface==0.0.93
|
12 |
+
python-docx==1.1.2
|
13 |
+
llama-index==0.12.2
|
14 |
+
opencv-python==4.10.0.84
|
15 |
+
SpeechRecognition==3.11.0
|
16 |
+
|
17 |
+
moviepy==2.1.1
|
18 |
+
# brew install ffmpeg
|
19 |
+
|
20 |
+
llama-index-llms-openai==0.3.2
|
21 |
+
llama-index-llms-nvidia==0.3.0
|
22 |
+
llama-index-llms-openai-like==0.3.0
|
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROVIDER: nvidia
|
2 |
+
BASE_URL: https://integrate.api.nvidia.com/v1
|
3 |
+
MODEL: nvidia/llama-3.1-nemotron-70b-instruct
|
4 |
+
TEMPERATURE: 0
|
src/configs/llm/openai-gpt-3.5-turbo.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROVIDER: openai
|
2 |
+
BASE_URL: default
|
3 |
+
MODEL: gpt-3.5-turbo
|
4 |
+
TEMPERATURE: 0
|
src/configs/llm/openai-gpt-4o-mini.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROVIDER: openai
|
2 |
+
BASE_URL: default
|
3 |
+
MODEL: gpt-4o-mini
|
4 |
+
TEMPERATURE: 0
|
src/configs/parser/llamaparse_en.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LANGUAGE: en
|
2 |
+
DISABLE_OCR: false
|
3 |
+
PAGE_ROC_BBOX:
|
4 |
+
TOP: 0
|
5 |
+
RIGHT: 0
|
6 |
+
BOTTOM: 0
|
7 |
+
LEFT: 0
|
src/domain/emotion_metrics.py
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from typing import List, Dict
|
3 |
-
|
4 |
-
|
5 |
-
@dataclass
|
6 |
-
class EmotionMetrics:
|
7 |
-
confidence_score: float
|
8 |
-
engagement_level: float
|
9 |
-
emotional_stability: float
|
10 |
-
stress_indicators: List[str]
|
11 |
-
dominant_emotions: Dict[str, float]
|
12 |
-
|
13 |
-
def calculate_overall_score(self) -> float:
|
14 |
-
# Implementation for calculating overall emotional score
|
15 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/domain/enums/emotion_types.py
CHANGED
@@ -2,18 +2,20 @@ from enum import Enum
|
|
2 |
|
3 |
|
4 |
class EmotionType(Enum):
|
5 |
-
|
6 |
SAD = "sad"
|
|
|
7 |
ANGRY = "angry"
|
|
|
|
|
|
|
8 |
NEUTRAL = "neutral"
|
9 |
-
|
10 |
-
FEARFUL = "fearful"
|
11 |
-
DISGUSTED = "disgusted"
|
12 |
|
13 |
@classmethod
|
14 |
def get_positive_emotions(cls):
|
15 |
-
return [cls.HAPPY, cls.NEUTRAL]
|
16 |
|
17 |
@classmethod
|
18 |
def get_negative_emotions(cls):
|
19 |
-
return [cls.SAD, cls.
|
|
|
2 |
|
3 |
|
4 |
class EmotionType(Enum):
|
5 |
+
|
6 |
SAD = "sad"
|
7 |
+
FEAR = "fear"
|
8 |
ANGRY = "angry"
|
9 |
+
DISGUST = "disgust"
|
10 |
+
|
11 |
+
HAPPY = "happy"
|
12 |
NEUTRAL = "neutral"
|
13 |
+
SURPRISE = "surprise"
|
|
|
|
|
14 |
|
15 |
@classmethod
|
16 |
def get_positive_emotions(cls):
|
17 |
+
return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
|
18 |
|
19 |
@classmethod
|
20 |
def get_negative_emotions(cls):
|
21 |
+
return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]
|
src/domain/resume.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from typing import List, Dict
|
3 |
-
|
4 |
-
|
5 |
-
@dataclass
|
6 |
-
class Resume:
|
7 |
-
id: str
|
8 |
-
candidate_id: str
|
9 |
-
file_path: str
|
10 |
-
parsed_content: Dict
|
11 |
-
skills: List[str]
|
12 |
-
experience: List[Dict]
|
13 |
-
education: List[Dict]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llm/base_llm_provider.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Base class for LLM providers"""
|
2 |
+
|
3 |
+
from abc import abstractmethod
|
4 |
+
from typing import Dict, Optional
|
5 |
+
|
6 |
+
|
7 |
+
class BaseLLMProvider:
|
8 |
+
@abstractmethod
|
9 |
+
def __init__(self):
|
10 |
+
"""LLM provider initialization"""
|
11 |
+
raise NotImplementedError
|
12 |
+
|
13 |
+
@abstractmethod
|
14 |
+
def complete(self, prompt: str = "") -> str:
|
15 |
+
"""LLM chat completion implementation by each provider"""
|
16 |
+
raise NotImplementedError
|
src/llm/enums.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
OPENAI_LLM = "openai"
|
2 |
+
NVIDIA_LLM = "nvidia"
|
3 |
+
DEFAULT_LLM_API_BASE = "default"
|
src/llm/llm.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yaml
|
2 |
+
|
3 |
+
from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
|
4 |
+
from src.llm.base_llm_provider import BaseLLMProvider
|
5 |
+
from src.llm.openai_llm import OpenAILLM
|
6 |
+
from src.llm.nvidia_llm import NvidiaLLM
|
7 |
+
|
8 |
+
|
9 |
+
def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
|
10 |
+
"""
|
11 |
+
Initiates LLM client from config file
|
12 |
+
"""
|
13 |
+
|
14 |
+
# load config
|
15 |
+
with open(config_file_path, "r") as f:
|
16 |
+
config = yaml.safe_load(f)
|
17 |
+
|
18 |
+
# init & return llm
|
19 |
+
if config["PROVIDER"] == OPENAI_LLM:
|
20 |
+
return OpenAILLM(
|
21 |
+
model=config["MODEL"],
|
22 |
+
temperature=config["TEMPERATURE"],
|
23 |
+
base_url=config["BASE_URL"],
|
24 |
+
)
|
25 |
+
elif config["PROVIDER"] == NVIDIA_LLM:
|
26 |
+
return NvidiaLLM(
|
27 |
+
model=config["MODEL"],
|
28 |
+
temperature=config["TEMPERATURE"],
|
29 |
+
base_url=config["BASE_URL"],
|
30 |
+
)
|
31 |
+
else:
|
32 |
+
raise ValueError(config["MODEL"])
|
src/llm/nvidia_llm.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""NVIDIA LLM Implementation"""
|
2 |
+
|
3 |
+
from llama_index.llms.nvidia import NVIDIA
|
4 |
+
|
5 |
+
from src.llm.base_llm_provider import BaseLLMProvider
|
6 |
+
from src.llm.enums import DEFAULT_LLM_API_BASE
|
7 |
+
|
8 |
+
|
9 |
+
class NvidiaLLM(BaseLLMProvider):
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
|
13 |
+
temperature: float = 0.0,
|
14 |
+
base_url: str = "https://integrate.api.nvidia.com/v1",
|
15 |
+
):
|
16 |
+
"""Initiate NVIDIA client"""
|
17 |
+
|
18 |
+
if base_url == DEFAULT_LLM_API_BASE:
|
19 |
+
self._client = NVIDIA(
|
20 |
+
model=model,
|
21 |
+
temperature=temperature,
|
22 |
+
)
|
23 |
+
else:
|
24 |
+
self._client = NVIDIA(
|
25 |
+
model=model, temperature=temperature, base_url=base_url
|
26 |
+
)
|
27 |
+
|
28 |
+
def complete(self, prompt: str = "") -> str:
|
29 |
+
return str(self._client.complete(prompt))
|
src/llm/openai_llm.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""OpenAI LLM Implementation"""
|
2 |
+
|
3 |
+
from llama_index.llms.openai import OpenAI
|
4 |
+
|
5 |
+
from src.llm.base_llm_provider import BaseLLMProvider
|
6 |
+
from src.llm.enums import DEFAULT_LLM_API_BASE
|
7 |
+
|
8 |
+
|
9 |
+
class OpenAILLM(BaseLLMProvider):
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
model: str = "gpt-4o-mini",
|
13 |
+
temperature: float = 0.0,
|
14 |
+
base_url: str = DEFAULT_LLM_API_BASE,
|
15 |
+
):
|
16 |
+
"""Initiate OpenAI client"""
|
17 |
+
|
18 |
+
if base_url == DEFAULT_LLM_API_BASE:
|
19 |
+
self._client = OpenAI(
|
20 |
+
model=model,
|
21 |
+
temperature=temperature,
|
22 |
+
)
|
23 |
+
else:
|
24 |
+
self._client = OpenAI(
|
25 |
+
model=model, temperature=temperature, base_url=base_url
|
26 |
+
)
|
27 |
+
|
28 |
+
def complete(self, prompt: str = "") -> str:
|
29 |
+
return str(self._client.complete(prompt))
|
src/main_test.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from docx import Document
|
3 |
+
|
4 |
+
from src.llm.llm import get_llm
|
5 |
+
from src.service.resume_parser import ResumeParser
|
6 |
+
from src.service.emotion_recognition import EmotionRecognition
|
7 |
+
from src.service.utils import (
|
8 |
+
extract_audio,
|
9 |
+
audio2text,
|
10 |
+
sample_frames,
|
11 |
+
parse_yaml_string,
|
12 |
+
)
|
13 |
+
from src.template.grading_prompt import (
|
14 |
+
GRADE_RESPONSE_PROMPT,
|
15 |
+
RANKING_AND_FEEDBACK_PROMPT,
|
16 |
+
)
|
17 |
+
|
18 |
+
# sample input values
|
19 |
+
from src.sample_inputs import (
|
20 |
+
VIDEO_PATH,
|
21 |
+
RESUME_PATH,
|
22 |
+
INTERVIEW_QUESTION,
|
23 |
+
JOB_REQUIREMENTS,
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
# customise this part
|
28 |
+
LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
|
29 |
+
# LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
|
30 |
+
# LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
|
31 |
+
|
32 |
+
RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
|
33 |
+
OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav" # only supports .wav
|
34 |
+
OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
|
35 |
+
|
36 |
+
# init API keys as env variables
|
37 |
+
load_dotenv()
|
38 |
+
|
39 |
+
# init LLM & resume parser
|
40 |
+
llm = get_llm(LLM_CONFIG_FILE)
|
41 |
+
parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
|
42 |
+
|
43 |
+
|
44 |
+
# 1. extract audio from video
|
45 |
+
OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
|
46 |
+
assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
|
47 |
+
|
48 |
+
# 2. audio to text
|
49 |
+
audio_text = audio2text(OUTPUT_AUDIO_FILE)
|
50 |
+
print(audio_text)
|
51 |
+
|
52 |
+
# 3. extract frames form video
|
53 |
+
frames = sample_frames(VIDEO_PATH, sample_rate=8)
|
54 |
+
print(frames)
|
55 |
+
|
56 |
+
# 4. deepface extract emotions & compite confidence scores
|
57 |
+
emotions = EmotionRecognition.detect_face_emotions(frames)
|
58 |
+
emotions_dict = EmotionRecognition.process_emotions(emotions)
|
59 |
+
conf_score = emotions_dict["conf"]
|
60 |
+
print(emotions_dict)
|
61 |
+
|
62 |
+
# 5. llamaparse parse resume into MD
|
63 |
+
resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
|
64 |
+
print(resume_md)
|
65 |
+
|
66 |
+
# 6. llm grade question response
|
67 |
+
formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
|
68 |
+
interview_question=INTERVIEW_QUESTION,
|
69 |
+
conf_score=conf_score,
|
70 |
+
response_text=audio_text,
|
71 |
+
)
|
72 |
+
grade = llm.complete(formatted_grading_prompt)
|
73 |
+
print(grade)
|
74 |
+
|
75 |
+
# 7. llm rank and output final feedback
|
76 |
+
formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
|
77 |
+
job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
|
78 |
+
)
|
79 |
+
rank_and_feedback = llm.complete(formatted_ranking_prompt)
|
80 |
+
print(rank_and_feedback)
|
81 |
+
|
82 |
+
|
83 |
+
# 8. save to .docx report
|
84 |
+
expected_keys = ["name", "score", "feedback"]
|
85 |
+
rank_and_feedback_dict = parse_yaml_string(
|
86 |
+
yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
|
87 |
+
)
|
88 |
+
print(rank_and_feedback_dict)
|
89 |
+
|
90 |
+
doc = Document()
|
91 |
+
doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
|
92 |
+
doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
|
93 |
+
doc.add_heading(f"Brief Overview", 1)
|
94 |
+
doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
|
95 |
+
|
96 |
+
# Save the document
|
97 |
+
doc.save(OUTPUT_REPORT_FILE)
|
src/sample_inputs.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
|
2 |
+
|
3 |
+
VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
|
4 |
+
|
5 |
+
INTERVIEW_QUESTION = """
|
6 |
+
Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
|
7 |
+
Walk us through your approach to dataset preparation, model optimization, and deployment.
|
8 |
+
How did you handle challenges like ensuring the model's performance, scalability, and fairness?
|
9 |
+
"""
|
10 |
+
|
11 |
+
JOB_REQUIREMENTS = """
|
12 |
+
Job Title: LLM Engineer
|
13 |
+
|
14 |
+
Job Description:
|
15 |
+
################
|
16 |
+
- We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
|
17 |
+
have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
|
18 |
+
various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
|
19 |
+
solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
|
20 |
+
large-scale systems.
|
21 |
+
|
22 |
+
|
23 |
+
Key Responsibilities
|
24 |
+
####################
|
25 |
+
1. Model Development:
|
26 |
+
- Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
|
27 |
+
summarization, question answering, and classification.
|
28 |
+
- Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
|
29 |
+
|
30 |
+
2. Data Management:
|
31 |
+
- Curate, preprocess, and manage large datasets for training and evaluation.
|
32 |
+
- Ensure data quality by cleaning, augmenting, and annotating datasets.
|
33 |
+
|
34 |
+
3. Infrastructure & Deployment:
|
35 |
+
- Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
|
36 |
+
- Optimize inference speed and memory usage for production-grade applications.
|
37 |
+
|
38 |
+
4. Model Evaluation:
|
39 |
+
- Develop benchmarks to evaluate model performance, fairness, and safety.
|
40 |
+
- Implement guardrails to mitigate bias and ensure ethical use of AI systems.
|
41 |
+
|
42 |
+
5. Collaboration:
|
43 |
+
- Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
|
44 |
+
- Provide mentorship to junior team members and contribute to knowledge sharing within the team.
|
45 |
+
|
46 |
+
6. Research & Innovation:
|
47 |
+
- Stay updated on the latest research in NLP and deep learning.
|
48 |
+
- Contribute to academic papers, patents, or open-source projects where appropriate.
|
49 |
+
|
50 |
+
|
51 |
+
Requirements
|
52 |
+
############
|
53 |
+
1. Technical Skills:
|
54 |
+
- Strong programming skills in Python.
|
55 |
+
- Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
|
56 |
+
- Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
|
57 |
+
- Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
|
58 |
+
- Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
|
59 |
+
- Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
|
60 |
+
- Expertise in working with APIs for integrating LLMs into production systems.
|
61 |
+
|
62 |
+
2. Educational Background:
|
63 |
+
- Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
|
64 |
+
|
65 |
+
3. Experience:
|
66 |
+
- 3+ years of experience in NLP, machine learning, or a related field.
|
67 |
+
- Demonstrated success in building and deploying LLM-powered applications.
|
68 |
+
- Contributions to open-source projects or research publications in NLP are a plus.
|
69 |
+
|
70 |
+
4. Soft Skills:
|
71 |
+
- Strong problem-solving abilities and attention to detail.
|
72 |
+
- Excellent communication and collaboration skills to work with cross-functional teams.
|
73 |
+
- Adaptable, with a passion for continuous learning and innovation.
|
74 |
+
- A proactive and goal-oriented mindset.
|
75 |
+
|
76 |
+
5. Target Personalities:
|
77 |
+
- Innovative Thinker: Always exploring new ways to improve model performance and usability.
|
78 |
+
- Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
|
79 |
+
- Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
|
80 |
+
- Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
|
81 |
+
- Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
|
82 |
+
|
83 |
+
|
84 |
+
Preferred Qualifications:
|
85 |
+
#########################
|
86 |
+
- Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
|
87 |
+
- Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
|
88 |
+
- Familiarity with multi-modal LLMs and their integration.
|
89 |
+
- Experience working in cloud environments like AWS, Azure, or GCP.
|
90 |
+
- Contributions to community forums, blogs, or conferences related to LLMs or NLP.
|
91 |
+
|
92 |
+
What We Offer
|
93 |
+
#############
|
94 |
+
- Competitive salary and benefits package.
|
95 |
+
- Opportunities to work on groundbreaking AI projects.
|
96 |
+
- Flexible work environment, including remote options.
|
97 |
+
- Access to cutting-edge resources and infrastructure for AI development.
|
98 |
+
"""
|
src/service/emotion_recognition.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from deepface import DeepFace
|
3 |
+
|
4 |
+
from src.domain.enums.emotion_types import EmotionType
|
5 |
+
|
6 |
+
|
7 |
+
class EmotionRecognition:
|
8 |
+
def __init__(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
@classmethod
|
12 |
+
def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
|
13 |
+
"""
|
14 |
+
Performs facial emotion detection using the DeepFace model
|
15 |
+
"""
|
16 |
+
emotions = []
|
17 |
+
for frame in frames:
|
18 |
+
frame_result = DeepFace.analyze(
|
19 |
+
frame, actions=["emotion"], enforce_detection=False
|
20 |
+
)
|
21 |
+
emotions.append(frame_result)
|
22 |
+
|
23 |
+
return emotions
|
24 |
+
|
25 |
+
@classmethod
|
26 |
+
def process_emotions(cls, emotions: list) -> dict:
|
27 |
+
"""
|
28 |
+
Processes the emotions by calculating the overall confidence score using a
|
29 |
+
custom weighted emotion balancing algorithm.
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
- weighted normalized score
|
33 |
+
- signed, weighted normalized score
|
34 |
+
- confidence score
|
35 |
+
"""
|
36 |
+
|
37 |
+
count = 0
|
38 |
+
emots = {
|
39 |
+
str(EmotionType.SAD.value): 0,
|
40 |
+
str(EmotionType.FEAR.value): 0,
|
41 |
+
str(EmotionType.ANGRY.value): 0,
|
42 |
+
str(EmotionType.DISGUST.value): 0,
|
43 |
+
str(EmotionType.HAPPY.value): 0,
|
44 |
+
str(EmotionType.NEUTRAL.value): 0,
|
45 |
+
str(EmotionType.SURPRISE.value): 0,
|
46 |
+
}
|
47 |
+
|
48 |
+
for frame_result in emotions:
|
49 |
+
if len(frame_result) > 0:
|
50 |
+
emot = frame_result[0]["emotion"]
|
51 |
+
emots[str(EmotionType.SAD.value)] = (
|
52 |
+
emots.get(str(EmotionType.SAD.value), 0)
|
53 |
+
+ emot[str(EmotionType.SAD.value)]
|
54 |
+
)
|
55 |
+
emots[str(EmotionType.FEAR.value)] = (
|
56 |
+
emots.get(str(EmotionType.FEAR.value), 0)
|
57 |
+
+ emot[str(EmotionType.FEAR.value)]
|
58 |
+
)
|
59 |
+
emots[str(EmotionType.ANGRY.value)] = (
|
60 |
+
emots.get(str(EmotionType.ANGRY.value), 0)
|
61 |
+
+ emot[str(EmotionType.ANGRY.value)]
|
62 |
+
)
|
63 |
+
emots[str(EmotionType.DISGUST.value)] = (
|
64 |
+
emots.get(str(EmotionType.DISGUST.value), 0)
|
65 |
+
+ emot[str(EmotionType.DISGUST.value)]
|
66 |
+
)
|
67 |
+
emots[str(EmotionType.HAPPY.value)] = (
|
68 |
+
emots.get(str(EmotionType.HAPPY.value), 0)
|
69 |
+
+ emot[str(EmotionType.HAPPY.value)]
|
70 |
+
)
|
71 |
+
emots[str(EmotionType.NEUTRAL.value)] = (
|
72 |
+
emots.get(str(EmotionType.NEUTRAL.value), 0)
|
73 |
+
+ emot[str(EmotionType.NEUTRAL.value)]
|
74 |
+
)
|
75 |
+
emots[str(EmotionType.SURPRISE.value)] = (
|
76 |
+
emots.get(str(EmotionType.SURPRISE.value), 0)
|
77 |
+
+ emot[str(EmotionType.SURPRISE.value)]
|
78 |
+
)
|
79 |
+
count += 1
|
80 |
+
|
81 |
+
# prevent zero division
|
82 |
+
if count == 0:
|
83 |
+
count = 1
|
84 |
+
|
85 |
+
for i in list(emots.keys()):
|
86 |
+
emots[i] /= count * 100
|
87 |
+
|
88 |
+
# refactor according to custom weightage
|
89 |
+
sad_score = emots[str(EmotionType.SAD.value)] * 1.3
|
90 |
+
fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
|
91 |
+
angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
|
92 |
+
disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
|
93 |
+
happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
|
94 |
+
neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
|
95 |
+
surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
|
96 |
+
|
97 |
+
score_list = [
|
98 |
+
sad_score,
|
99 |
+
angry_score,
|
100 |
+
surprise_score,
|
101 |
+
fear_score,
|
102 |
+
happy_score,
|
103 |
+
disgust_score,
|
104 |
+
neutral_score,
|
105 |
+
]
|
106 |
+
normalized_scores = cls.__normalize_scores(score_list)
|
107 |
+
mean = np.mean(normalized_scores)
|
108 |
+
|
109 |
+
result_scores = [
|
110 |
+
(-sad_score),
|
111 |
+
(-angry_score),
|
112 |
+
surprise_score,
|
113 |
+
(-fear_score),
|
114 |
+
happy_score,
|
115 |
+
(-disgust_score),
|
116 |
+
neutral_score,
|
117 |
+
]
|
118 |
+
normalized_result_scores = cls.__normalize_scores(result_scores)
|
119 |
+
result = np.mean(normalized_result_scores)
|
120 |
+
|
121 |
+
difference = abs((mean - result) / mean) * 100
|
122 |
+
|
123 |
+
# keep values in range of [0, 100]
|
124 |
+
difference = min(difference, 50)
|
125 |
+
|
126 |
+
if mean > result:
|
127 |
+
conf = 50 - difference
|
128 |
+
else:
|
129 |
+
conf = 50 + difference
|
130 |
+
|
131 |
+
return {"mean": mean, "result": result, "conf": conf}
|
132 |
+
|
133 |
+
@classmethod
|
134 |
+
def __normalize_scores(cls, scores: list) -> list:
|
135 |
+
min_val, max_val = min(scores), max(scores)
|
136 |
+
return [(score - min_val) / (max_val - min_val) for score in scores]
|
src/service/interview_analyzer.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
# from src.domain.enums.file_types import VideoFileType
|
2 |
-
# from src.domain.enums.emotion_types import EmotionType
|
3 |
-
# from src.domain.entities.interview import Interview
|
4 |
-
# from typing import Dict, List
|
5 |
-
#
|
6 |
-
#
|
7 |
-
# class InterviewAnalyzer:
|
8 |
-
# def validate_video(self, video_path: str) -> bool:
|
9 |
-
# file_extension = video_path[video_path.rfind(".") :]
|
10 |
-
# return VideoFileType.validate_format(file_extension)
|
11 |
-
#
|
12 |
-
# def analyze_emotions(
|
13 |
-
# self, emotion_data: Dict[str, float]
|
14 |
-
# ) -> Dict[EmotionType, float]:
|
15 |
-
# analyzed_emotions = {}
|
16 |
-
# for emotion_name, score in emotion_data.items():
|
17 |
-
# try:
|
18 |
-
# emotion_type = EmotionType(emotion_name.lower())
|
19 |
-
# analyzed_emotions[emotion_type] = score
|
20 |
-
# except ValueError:
|
21 |
-
# continue
|
22 |
-
# return analyzed_emotions
|
23 |
-
#
|
24 |
-
# def get_dominant_emotion(
|
25 |
-
# self, emotion_scores: Dict[EmotionType, float]
|
26 |
-
# ) -> EmotionType:
|
27 |
-
# return max(emotion_scores.items(), key=lambda x: x[1])[0]
|
28 |
-
#
|
29 |
-
# def is_positive_response(self, emotion_scores: Dict[EmotionType, float]) -> bool:
|
30 |
-
# positive_emotions = EmotionType.get_positive_emotions()
|
31 |
-
# dominant_emotion = self.get_dominant_emotion(emotion_scores)
|
32 |
-
# return dominant_emotion in positive_emotions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/service/resume_parser.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yaml
|
2 |
+
from llama_parse import LlamaParse
|
3 |
+
from llama_index.core import SimpleDirectoryReader
|
4 |
+
|
5 |
+
from src.template.parser_prompt import PARSE_RESUME_PROMPT
|
6 |
+
|
7 |
+
|
8 |
+
class ResumeParser:
|
9 |
+
def __init__(self, config_file_path: str = "config.yaml"):
|
10 |
+
"""
|
11 |
+
Initiates a resume parser client
|
12 |
+
"""
|
13 |
+
|
14 |
+
# load config
|
15 |
+
with open(config_file_path, "r") as f:
|
16 |
+
config = yaml.safe_load(f)
|
17 |
+
|
18 |
+
# set bbox size
|
19 |
+
bbox_margin = config["PAGE_ROC_BBOX"]
|
20 |
+
bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
|
21 |
+
|
22 |
+
self._parser = LlamaParse(
|
23 |
+
language=config["LANGUAGE"],
|
24 |
+
disable_ocr=config["DISABLE_OCR"],
|
25 |
+
bounding_box=bbox,
|
26 |
+
result_type="markdown",
|
27 |
+
parsing_instruction=PARSE_RESUME_PROMPT,
|
28 |
+
is_formatting_instruction=False,
|
29 |
+
)
|
30 |
+
|
31 |
+
def parse_resume_to_markdown(self, resume_path: str = "") -> str:
|
32 |
+
"""
|
33 |
+
Parses the resume into markdown text.
|
34 |
+
|
35 |
+
Supported filetypes:
|
36 |
+
- .pdf
|
37 |
+
"""
|
38 |
+
document = SimpleDirectoryReader(
|
39 |
+
input_files=[resume_path], file_extractor={".pdf": self._parser}
|
40 |
+
).load_data()
|
41 |
+
|
42 |
+
return "\n".join([str(d.text) for d in document])
|
src/service/utils.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import yaml
|
3 |
+
import numpy as np
|
4 |
+
from pathlib import Path
|
5 |
+
import speech_recognition as sr
|
6 |
+
from moviepy import VideoFileClip
|
7 |
+
|
8 |
+
|
9 |
+
def extract_audio(
|
10 |
+
input_video_file: str = "",
|
11 |
+
output_audio_file: str = "",
|
12 |
+
) -> str:
|
13 |
+
"""
|
14 |
+
Extracts audio from input video file, and save it to the respective path.
|
15 |
+
Returns the path to the saved audio file if extraction is successful.
|
16 |
+
Supported input video file formats are:
|
17 |
+
- .mp4
|
18 |
+
- .mov
|
19 |
+
|
20 |
+
Supported output audio file formats are:
|
21 |
+
- .wav
|
22 |
+
"""
|
23 |
+
try:
|
24 |
+
input_video_file = str(Path(input_video_file))
|
25 |
+
output_audio_file = str(Path(output_audio_file))
|
26 |
+
|
27 |
+
# Load the video file
|
28 |
+
video = VideoFileClip(input_video_file)
|
29 |
+
|
30 |
+
# Extract audio and write to output file
|
31 |
+
video.audio.write_audiofile(output_audio_file)
|
32 |
+
|
33 |
+
print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
|
34 |
+
|
35 |
+
return output_audio_file
|
36 |
+
except Exception as e:
|
37 |
+
print(e)
|
38 |
+
return None
|
39 |
+
|
40 |
+
|
41 |
+
def audio2text(audio_file: str = "") -> str:
|
42 |
+
"""
|
43 |
+
Converts audio to text using Google's text-to-audio engine (Local),
|
44 |
+
and returns the text.
|
45 |
+
"""
|
46 |
+
r = sr.Recognizer()
|
47 |
+
with sr.AudioFile(audio_file) as source:
|
48 |
+
audio = r.record(source)
|
49 |
+
text = r.recognize_google(audio)
|
50 |
+
return text
|
51 |
+
|
52 |
+
|
53 |
+
def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
|
54 |
+
"""
|
55 |
+
Samples one frame every 'sample_rate' frames from the video file and returns
|
56 |
+
them in the form of a list of Numpy ndarray objects.
|
57 |
+
"""
|
58 |
+
cap = cv2.VideoCapture(input_video_file)
|
59 |
+
frames = []
|
60 |
+
count = 0
|
61 |
+
|
62 |
+
while cap.isOpened():
|
63 |
+
ret, frame = cap.read()
|
64 |
+
if not ret:
|
65 |
+
break
|
66 |
+
if count % sample_rate == 0:
|
67 |
+
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
68 |
+
count += 1
|
69 |
+
cap.release()
|
70 |
+
|
71 |
+
return frames
|
72 |
+
|
73 |
+
|
74 |
+
def parse_yaml_string(
|
75 |
+
yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
|
76 |
+
) -> dict:
|
77 |
+
"""
|
78 |
+
Parses a YAML string into a Python dictionary based on a list of
|
79 |
+
expected keys.
|
80 |
+
"""
|
81 |
+
|
82 |
+
# removes ```YAML ``` heading and footers if present
|
83 |
+
if cleanup:
|
84 |
+
yaml_string = yaml_string.replace("YAML", "")
|
85 |
+
yaml_string = yaml_string.replace("yaml", "")
|
86 |
+
yaml_string = yaml_string.replace("`", "")
|
87 |
+
|
88 |
+
try:
|
89 |
+
parsed_data = yaml.safe_load(yaml_string)
|
90 |
+
|
91 |
+
# Handle missing keys with error handling
|
92 |
+
result = {}
|
93 |
+
for key in expected_keys:
|
94 |
+
if key in parsed_data:
|
95 |
+
result[key] = parsed_data[key]
|
96 |
+
else:
|
97 |
+
print(f"[parse_yaml_string()] : Missing key {key}")
|
98 |
+
|
99 |
+
return result
|
100 |
+
|
101 |
+
except KeyError as e:
|
102 |
+
print(e)
|
103 |
+
return None
|
src/template/grading_prompt.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core.prompts import PromptTemplate
|
2 |
+
|
3 |
+
GRADE_RESPONSE_PROMPT = PromptTemplate(
|
4 |
+
"""
|
5 |
+
You are a Human Resource Manager and an interviewer.
|
6 |
+
Your task is to review an interviewee's overall performance based on multiple factors.
|
7 |
+
You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
|
8 |
+
|
9 |
+
The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
|
10 |
+
Based on this information, evaluate the interviewee’s performance in the following areas:
|
11 |
+
|
12 |
+
1. **Answer Quality**:
|
13 |
+
Assess the clarity, relevance, and accuracy of their response to the interview question.
|
14 |
+
Did the interviewee address the key points effectively?
|
15 |
+
|
16 |
+
2. **Problem-Solving Skills**:
|
17 |
+
Evaluate how well the interviewee tackled any problem presented in the interview question.
|
18 |
+
Were they able to think critically, analyze the situation, and propose solutions?
|
19 |
+
|
20 |
+
3. **Confidence**:
|
21 |
+
Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
|
22 |
+
|
23 |
+
4. **Personality**:
|
24 |
+
Consider the tone, communication style, and interpersonal skills of the interviewee.
|
25 |
+
How well did they engage with the question and the interview process?
|
26 |
+
Do they demonstrate qualities like openness, empathy, or assertiveness?
|
27 |
+
|
28 |
+
5. **Overall Performance**:
|
29 |
+
Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
|
30 |
+
Offer feedback on strengths and areas for improvement.
|
31 |
+
|
32 |
+
Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
|
33 |
+
|
34 |
+
|
35 |
+
########################################
|
36 |
+
Interview Question:
|
37 |
+
{interview_question}
|
38 |
+
|
39 |
+
########################################
|
40 |
+
Interviewee's Facial Confidence Score:
|
41 |
+
{conf_score}
|
42 |
+
|
43 |
+
########################################
|
44 |
+
Interviewee's response in text:
|
45 |
+
{response_text}
|
46 |
+
|
47 |
+
########################################
|
48 |
+
output:
|
49 |
+
"""
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
|
54 |
+
"""
|
55 |
+
You are an HR specialist evaluating an interviewee for a specific role.
|
56 |
+
Your task is to assess the suitability of the interviewee based on the following information:
|
57 |
+
|
58 |
+
1. **Job Requirements**:
|
59 |
+
A list of skills, experiences, and qualifications required for the role.
|
60 |
+
|
61 |
+
2. **Interview Feedback**:
|
62 |
+
The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
|
63 |
+
|
64 |
+
3. **Resume Text**:
|
65 |
+
A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
|
66 |
+
|
67 |
+
Using these inputs, generate an output strictly in the following YAML format:
|
68 |
+
|
69 |
+
###########################
|
70 |
+
name: <name>
|
71 |
+
score: <score>
|
72 |
+
feedback: <feedback text>
|
73 |
+
###########################
|
74 |
+
|
75 |
+
|
76 |
+
Details for the output:
|
77 |
+
1. **name**:
|
78 |
+
Name of the interviewee.
|
79 |
+
|
80 |
+
2. **score**:
|
81 |
+
A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
|
82 |
+
|
83 |
+
3. **feedback**:
|
84 |
+
- A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
|
85 |
+
- Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
|
86 |
+
- Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
|
87 |
+
|
88 |
+
Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
|
89 |
+
|
90 |
+
|
91 |
+
########################################
|
92 |
+
Job Requirements:
|
93 |
+
{job_requirements}
|
94 |
+
|
95 |
+
########################################
|
96 |
+
Interview Feedback:
|
97 |
+
{interview_feedback}
|
98 |
+
|
99 |
+
########################################
|
100 |
+
Resume Text:
|
101 |
+
{resume_text}
|
102 |
+
|
103 |
+
########################################
|
104 |
+
|
105 |
+
Output strictly following the below YAML format:
|
106 |
+
|
107 |
+
name: <name>
|
108 |
+
score: <score>
|
109 |
+
feedback: <feedback text>
|
110 |
+
"""
|
111 |
+
)
|
src/template/parser_prompt.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core.prompts import PromptTemplate
|
2 |
+
|
3 |
+
PARSE_RESUME_PROMPT = """
|
4 |
+
You are tasked with parsing a resume.
|
5 |
+
|
6 |
+
**Your Focus**:
|
7 |
+
- Reproduce only the main body text, including section headers and bullet points, exactly as received.
|
8 |
+
- Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
|
9 |
+
- Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
|
10 |
+
- '# 14 Experience'
|
11 |
+
- '# 2 Education'
|
12 |
+
Invalid examples are as below:
|
13 |
+
- '# 14\n # Experience'
|
14 |
+
- '# 2\n # Education'
|
15 |
+
- You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
|
16 |
+
- Do not make up any text and headers that are not present in the original text.
|
17 |
+
|
18 |
+
**Expected Output**:
|
19 |
+
- Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
|
20 |
+
- Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
|
21 |
+
"""
|