root commited on
Commit
6433e25
Β·
1 Parent(s): df2d9bf
Files changed (2) hide show
  1. app.py +28 -49
  2. requirements.txt +5 -3
app.py CHANGED
@@ -15,10 +15,11 @@ from docx import Document
15
  import csv
16
  from datasets import load_dataset
17
  import gc
18
- from huggingface_hub import InferenceClient
19
  import time
20
  import faiss
21
  import re
 
22
 
23
  # Download NLTK resources
24
  try:
@@ -46,7 +47,7 @@ with st.sidebar:
46
 
47
  # Advanced options
48
  st.subheader("Advanced Options")
49
- top_k = st.number_input("Number of results to display", min_value=1, max_value=50, value=10, step=1)
50
 
51
  # LLM Settings
52
  st.subheader("LLM Settings")
@@ -80,12 +81,14 @@ if 'resume_texts' not in st.session_state:
80
  st.session_state.resume_texts = []
81
  if 'file_names' not in st.session_state:
82
  st.session_state.file_names = []
83
- if 'llm_client' not in st.session_state:
84
- st.session_state.llm_client = None
85
  if 'explanations_generated' not in st.session_state:
86
  st.session_state.explanations_generated = False
87
  if 'current_job_description' not in st.session_state:
88
  st.session_state.current_job_description = ""
 
 
 
 
89
 
90
  @st.cache_resource
91
  def load_embedding_model():
@@ -112,28 +115,23 @@ def load_cross_encoder():
112
  st.error(f"❌ Error loading Cross-Encoder model: {str(e)}")
113
  return None
114
 
115
- def initialize_llm_client(hf_token=None):
116
- """Initialize the LLM client for Qwen3-14B"""
117
- try:
118
- client = InferenceClient(
119
- model="Qwen/Qwen3-14B",
120
- token=hf_token if hf_token else None
121
- )
122
- return client
123
- except Exception as e:
124
- st.error(f"❌ Error initializing LLM client: {str(e)}")
125
- return None
126
 
127
  class ResumeScreener:
128
  def __init__(self):
129
  # Load models
130
  self.embedding_model = load_embedding_model()
131
  self.cross_encoder = load_cross_encoder()
132
- self.llm_client = None
133
-
134
- def set_llm_client(self, client):
135
- """Set the LLM client"""
136
- self.llm_client = client
137
 
138
  def extract_text_from_file(self, file_path, file_type):
139
  """Extract text from various file types"""
@@ -363,10 +361,6 @@ class ResumeScreener:
363
  def add_intent_scores(self, resume_texts, job_description, top_20_with_bm25):
364
  """Stage 4: Add LLM intent analysis scores"""
365
  try:
366
- if not self.llm_client:
367
- st.warning("LLM client not available. Using default intent scores.")
368
- return [(idx, cross_score, bm25_score, 0.1) for idx, cross_score, bm25_score in top_20_with_bm25]
369
-
370
  results_with_intent = []
371
  progress_bar = st.progress(0)
372
 
@@ -402,12 +396,10 @@ Response format:
402
  Intent: [Yes/Maybe/No]
403
  Reason: [Brief justification]"""
404
 
405
- response = self.llm_client.text_generation(
406
  prompt,
407
- max_new_tokens=100,
408
- temperature=0.3,
409
- top_p=0.9,
410
- do_sample=True
411
  )
412
 
413
  # Parse response
@@ -516,7 +508,7 @@ Reason: [Brief justification]"""
516
 
517
  def generate_llm_explanation(self, resume_text, job_description, score, skills, max_retries=3):
518
  """Generate detailed explanation using Qwen3-14B"""
519
- if not self.llm_client:
520
  return self.generate_simple_explanation(score, score, score, skills)
521
 
522
  # Truncate texts to manage token limits
@@ -544,12 +536,10 @@ Write a professional, detailed 150-word analysis for THIS INDIVIDUAL CANDIDATE:"
544
 
545
  for attempt in range(max_retries):
546
  try:
547
- response = self.llm_client.text_generation(
548
  prompt,
549
- max_new_tokens=200,
550
- temperature=0.7,
551
- top_p=0.9,
552
- do_sample=True
553
  )
554
 
555
  # Extract the response and ensure it's about 150 words
@@ -600,17 +590,6 @@ st.markdown("---")
600
  # Initialize screener
601
  screener = ResumeScreener()
602
 
603
- # Initialize LLM client if enabled
604
- if use_llm_explanations:
605
- if 'hf_token' in locals() and hf_token:
606
- if st.session_state.llm_client is None:
607
- st.session_state.llm_client = initialize_llm_client(hf_token)
608
- else:
609
- if st.session_state.llm_client is None:
610
- st.session_state.llm_client = initialize_llm_client()
611
-
612
- screener.set_llm_client(st.session_state.llm_client)
613
-
614
  # Job Description Input
615
  st.header("πŸ“ Step 1: Enter Job Description")
616
  job_description = st.text_area(
@@ -864,7 +843,7 @@ with col2:
864
  show_explanation_button = (
865
  st.session_state.results and
866
  use_llm_explanations and
867
- st.session_state.llm_client and
868
  not st.session_state.explanations_generated
869
  )
870
 
@@ -907,8 +886,8 @@ with col2:
907
  elif st.session_state.results and not use_llm_explanations:
908
  st.info("πŸ’‘ Enable 'Generate AI Explanations' in sidebar to use this feature")
909
 
910
- elif st.session_state.results and not st.session_state.llm_client:
911
- st.warning("⚠️ LLM client not available. Check your Hugging Face token.")
912
 
913
  # Display Results
914
  if st.session_state.results:
 
15
  import csv
16
  from datasets import load_dataset
17
  import gc
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
  import time
20
  import faiss
21
  import re
22
+ import openai
23
 
24
  # Download NLTK resources
25
  try:
 
47
 
48
  # Advanced options
49
  st.subheader("Advanced Options")
50
+ top_k = st.selectbox("Number of results to display", options=[1, 5, 10, 20, 50], index=2)
51
 
52
  # LLM Settings
53
  st.subheader("LLM Settings")
 
81
  st.session_state.resume_texts = []
82
  if 'file_names' not in st.session_state:
83
  st.session_state.file_names = []
 
 
84
  if 'explanations_generated' not in st.session_state:
85
  st.session_state.explanations_generated = False
86
  if 'current_job_description' not in st.session_state:
87
  st.session_state.current_job_description = ""
88
+ if 'vllm_4b_endpoint' not in st.session_state:
89
+ st.session_state.vllm_4b_endpoint = "http://localhost:8001/v1" # Qwen3-4B vLLM endpoint
90
+ if 'vllm_14b_endpoint' not in st.session_state:
91
+ st.session_state.vllm_14b_endpoint = "http://localhost:8002/v1" # Qwen3-14B vLLM endpoint
92
 
93
  @st.cache_resource
94
  def load_embedding_model():
 
115
  st.error(f"❌ Error loading Cross-Encoder model: {str(e)}")
116
  return None
117
 
118
+ def vllm_chat_completion(prompt, endpoint, max_tokens=200, temperature=0.7):
119
+ openai.api_base = endpoint
120
+ openai.api_key = "EMPTY" # vLLM does not require a real key
121
+ response = openai.ChatCompletion.create(
122
+ model="Qwen/Qwen3-4B" if "4b" in endpoint else "Qwen/Qwen3-14B",
123
+ messages=[{"role": "user", "content": prompt}],
124
+ max_tokens=max_tokens,
125
+ temperature=temperature,
126
+ stream=False
127
+ )
128
+ return response.choices[0].message.content.strip()
129
 
130
  class ResumeScreener:
131
  def __init__(self):
132
  # Load models
133
  self.embedding_model = load_embedding_model()
134
  self.cross_encoder = load_cross_encoder()
 
 
 
 
 
135
 
136
  def extract_text_from_file(self, file_path, file_type):
137
  """Extract text from various file types"""
 
361
  def add_intent_scores(self, resume_texts, job_description, top_20_with_bm25):
362
  """Stage 4: Add LLM intent analysis scores"""
363
  try:
 
 
 
 
364
  results_with_intent = []
365
  progress_bar = st.progress(0)
366
 
 
396
  Intent: [Yes/Maybe/No]
397
  Reason: [Brief justification]"""
398
 
399
+ response = vllm_chat_completion(
400
  prompt,
401
+ st.session_state.vllm_4b_endpoint,
402
+ max_tokens=100
 
 
403
  )
404
 
405
  # Parse response
 
508
 
509
  def generate_llm_explanation(self, resume_text, job_description, score, skills, max_retries=3):
510
  """Generate detailed explanation using Qwen3-14B"""
511
+ if not st.session_state.vllm_14b_endpoint:
512
  return self.generate_simple_explanation(score, score, score, skills)
513
 
514
  # Truncate texts to manage token limits
 
536
 
537
  for attempt in range(max_retries):
538
  try:
539
+ response = vllm_chat_completion(
540
  prompt,
541
+ st.session_state.vllm_14b_endpoint,
542
+ max_tokens=200
 
 
543
  )
544
 
545
  # Extract the response and ensure it's about 150 words
 
590
  # Initialize screener
591
  screener = ResumeScreener()
592
 
 
 
 
 
 
 
 
 
 
 
 
593
  # Job Description Input
594
  st.header("πŸ“ Step 1: Enter Job Description")
595
  job_description = st.text_area(
 
843
  show_explanation_button = (
844
  st.session_state.results and
845
  use_llm_explanations and
846
+ st.session_state.vllm_14b_endpoint and
847
  not st.session_state.explanations_generated
848
  )
849
 
 
886
  elif st.session_state.results and not use_llm_explanations:
887
  st.info("πŸ’‘ Enable 'Generate AI Explanations' in sidebar to use this feature")
888
 
889
+ elif st.session_state.results and not st.session_state.vllm_14b_endpoint:
890
+ st.warning("⚠️ LLM model not available. Check your Hugging Face token.")
891
 
892
  # Display Results
893
  if st.session_state.results:
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  streamlit==1.31.0
2
- transformers==4.51.0
3
  torch==2.1.2
4
  pdfplumber==0.10.1
5
  PyPDF2==3.0.1
@@ -10,10 +10,12 @@ rank-bm25==0.2.2
10
  pandas==2.1.3
11
  numpy==1.24.3
12
  tqdm==4.66.1
13
- huggingface-hub>=0.30.0
14
  bitsandbytes==0.44.1
15
  accelerate==0.27.2
16
  datasets==2.18.0
17
  sentence-transformers==2.7.0
18
  plotly==5.18.0
19
- einops
 
 
 
1
  streamlit==1.31.0
2
+ transformers>=4.51.0
3
  torch==2.1.2
4
  pdfplumber==0.10.1
5
  PyPDF2==3.0.1
 
10
  pandas==2.1.3
11
  numpy==1.24.3
12
  tqdm==4.66.1
13
+ huggingface-hub==0.30.0
14
  bitsandbytes==0.44.1
15
  accelerate==0.27.2
16
  datasets==2.18.0
17
  sentence-transformers==2.7.0
18
  plotly==5.18.0
19
+ einops
20
+ vllm>=0.8.5
21
+ openai>=1.0.0