Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -82,6 +82,9 @@ jobs_data_summarized = batch_summarize(jobs_data_final, 'processed_description',
|
|
82 |
# Summarize all 'processed_resume' in resume_data_final
|
83 |
resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume')
|
84 |
|
|
|
|
|
|
|
85 |
|
86 |
# Example Usage
|
87 |
encoder = SentenceTransformerEncoder(model_name='all-MiniLM-L6-v2')
|
@@ -92,6 +95,9 @@ jobs_data_summarized_and_encoded = encoder.encode_column(jobs_data_summarized, '
|
|
92 |
# Encoding the summarized resumes
|
93 |
resume_data_summarized_and_encoded = encoder.encode_column(resume_data_summarized, 'summarized_resume')
|
94 |
|
|
|
|
|
|
|
95 |
|
96 |
# Combine the jobs data
|
97 |
jobs_combined = pd.merge(
|
@@ -110,6 +116,11 @@ jobs_combined.reset_index(drop=True, inplace=True)
|
|
110 |
resume_combined.reset_index(drop=True, inplace=True)
|
111 |
|
112 |
|
|
|
|
|
|
|
|
|
|
|
113 |
#QDRANT VECTORIZER
|
114 |
|
115 |
vector_dimension = encoder.model.get_sentence_embedding_dimension()
|
@@ -126,6 +137,12 @@ def ensure_list_format(df, vector_col):
|
|
126 |
jobs_combined = ensure_list_format(jobs_combined, 'summarized_description_encoded')
|
127 |
resume_combined = ensure_list_format(resume_combined, 'summarized_resume_encoded')
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
given_job_vector = jobs_combined['summarized_description_encoded'].iloc[0]
|
130 |
|
131 |
# Now upload to Qdrant
|
@@ -135,10 +152,18 @@ qdrant_interface.save_to_qdrant(resume_combined, 'resumes', 'summarized_resume_e
|
|
135 |
# Retrieve specific records by IDs from the 'jobs' collection
|
136 |
specific_jobs_records = qdrant_interface.retrieve_specific_records('jobs', ids=[1])
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
# Find top 5 matching resumes for the example job
|
139 |
matched_resumes = qdrant_interface.match_jobs_to_resumes(given_job_vector, top_k=5)
|
140 |
for resume, score in matched_resumes:
|
141 |
-
|
|
|
|
|
142 |
|
143 |
|
144 |
|
|
|
82 |
# Summarize all 'processed_resume' in resume_data_final
|
83 |
resume_data_summarized = batch_summarize(resume_data_final, 'processed_resume', summarizer, batch_size=10, output_col='summarized_resume')
|
84 |
|
85 |
+
st.write("SUMMARISED")
|
86 |
+
st.write(jobs_data_summarized)
|
87 |
+
st.write(resume_data_summarized)
|
88 |
|
89 |
# Example Usage
|
90 |
encoder = SentenceTransformerEncoder(model_name='all-MiniLM-L6-v2')
|
|
|
95 |
# Encoding the summarized resumes
|
96 |
resume_data_summarized_and_encoded = encoder.encode_column(resume_data_summarized, 'summarized_resume')
|
97 |
|
98 |
+
st.write("SUMMARISED AND ENCODED")
|
99 |
+
st.write(jobs_data_summarized_and_encoded)
|
100 |
+
st.write(resume_data_summarized_and_encoded)
|
101 |
|
102 |
# Combine the jobs data
|
103 |
jobs_combined = pd.merge(
|
|
|
116 |
resume_combined.reset_index(drop=True, inplace=True)
|
117 |
|
118 |
|
119 |
+
st.write("SUMMARISED AND ENCODED")
|
120 |
+
st.write(jobs_combined)
|
121 |
+
st.write(resume_combined)
|
122 |
+
|
123 |
+
|
124 |
#QDRANT VECTORIZER
|
125 |
|
126 |
vector_dimension = encoder.model.get_sentence_embedding_dimension()
|
|
|
137 |
jobs_combined = ensure_list_format(jobs_combined, 'summarized_description_encoded')
|
138 |
resume_combined = ensure_list_format(resume_combined, 'summarized_resume_encoded')
|
139 |
|
140 |
+
|
141 |
+
st.write("LIST FORMAT")
|
142 |
+
st.write(jobs_combined)
|
143 |
+
st.write(resume_combined)
|
144 |
+
|
145 |
+
|
146 |
given_job_vector = jobs_combined['summarized_description_encoded'].iloc[0]
|
147 |
|
148 |
# Now upload to Qdrant
|
|
|
152 |
# Retrieve specific records by IDs from the 'jobs' collection
|
153 |
specific_jobs_records = qdrant_interface.retrieve_specific_records('jobs', ids=[1])
|
154 |
|
155 |
+
|
156 |
+
st.write("SPECIFIC JOB RECS")
|
157 |
+
st.write(specific_jobs_records)
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
# Find top 5 matching resumes for the example job
|
162 |
matched_resumes = qdrant_interface.match_jobs_to_resumes(given_job_vector, top_k=5)
|
163 |
for resume, score in matched_resumes:
|
164 |
+
st.write(f"Matched Resume: {resume['summarized_resume']}, Score: {score}")
|
165 |
+
|
166 |
+
|
167 |
|
168 |
|
169 |
|