mbosse99 commited on
Commit
9a804ac
·
1 Parent(s): 9536a3c

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +362 -0
  3. cvdb.db +3 -0
  4. requirements.txt +5 -0
  5. sys_prompt_frontend.txt +15 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cvdb.db filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import openai
4
+ import re
5
+ import sqlite3
6
+ import streamlit as st
7
+ from streamlit_js_eval import streamlit_js_eval
8
+ from langchain.embeddings.openai import OpenAIEmbeddings
9
+ from langchain.vectorstores.azuresearch import AzureSearch
10
+ from PyPDF2 import PdfReader
11
+
12
+ os.environ["OPENAI_API_KEY"] = "201b389eda7b48a496fa81c091f8e51e"
13
+ os.environ["OPENAI_API_BASE"] = "https://tensora-oai.openai.azure.com/"
14
+ os.environ["OPENAI_API_TYPE"] = "azure"
15
+ os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
16
+ os.environ["AZURE_SEARCH_ENDPOINT"] = "https://tensora-search.search.windows.net"
17
+ os.environ["AZURE_SEARCH_KEY"] = "LABhDdbb8NPPilxOwPpZ4nXRyHzABsKyXdMiSQ50CKAzSeB1fy1x"
18
+
19
+ openai.api_key = os.getenv("OPENAI_API_KEY")
20
+ openai.api_base = "https://tensora-oai.openai.azure.com/"
21
+ openai.api_type = "azure"
22
+ openai.api_version = "2023-05-15"
23
+
24
+ st.markdown(
25
+ """
26
+ <style>
27
+ [data-testid=column]{
28
+ text-align: center;
29
+ display: flex;
30
+ align-items: center;
31
+ justify-content: center;
32
+ }
33
+ h3{
34
+ text-align: left;
35
+ }
36
+ </style>
37
+ """,
38
+ unsafe_allow_html=True,
39
+ )
40
+
41
+ with open("sys_prompt_frontend.txt") as f:
42
+ sys_prompt = f.read()
43
+
44
+ def adjust_numbering(lst):
45
+ return [f"{i + 1}. {item.split('. ', 1)[1]}" for i, item in enumerate(lst)]
46
+
47
+ def check_keywords_in_content(database_path, table_name, input_id, keywords):
48
+ # Verbindung zur Datenbank herstellen
49
+ conn = sqlite3.connect(database_path)
50
+ cursor = conn.cursor()
51
+
52
+ # SQL-Abfrage, um die Zeile mit der angegebenen ID abzurufen
53
+ cursor.execute(f'SELECT * FROM {table_name} WHERE id = ?', (input_id,))
54
+
55
+ # Ergebnis abrufen
56
+ row = cursor.fetchone()
57
+
58
+ # Wenn die Zeile nicht gefunden wurde, False zurückgeben
59
+ if not row:
60
+ conn.close()
61
+ print("ID not found")
62
+ return False
63
+
64
+ # Überprüfen, ob die Keywords in der Spalte content enthalten sind (case-insensitive)
65
+ content = row[1].lower() # Annahme: content ist die zweite Spalte, und wir wandeln ihn in Kleinbuchstaben um
66
+ keywords_lower = [keyword.lower() for keyword in keywords]
67
+
68
+ contains_keywords = all(keyword in content for keyword in keywords_lower)
69
+
70
+ # Verbindung schließen
71
+ conn.close()
72
+
73
+ return contains_keywords
74
+
75
+ if "similarity_search_string" not in st.session_state:
76
+ st.session_state["similarity_search_string"] = None
77
+ if "job_string" not in st.session_state:
78
+ st.session_state["job_string"] = None
79
+ if "docs_res" not in st.session_state:
80
+ st.session_state["docs_res"] = None
81
+ if "final_candidates" not in st.session_state:
82
+ st.session_state["final_candidates"] = None
83
+ if "final_question_string" not in st.session_state:
84
+ st.session_state["final_question_string"] = []
85
+ if "ai_questions" not in st.session_state:
86
+ st.session_state["ai_questions"] = None
87
+ if "db" not in st.session_state:
88
+ embedder = OpenAIEmbeddings(deployment="text-embedding-ada-002", chunk_size=1)
89
+ embedding_function = embedder.embed_query
90
+
91
+
92
+ db = AzureSearch(
93
+ index_name="wg-cvs",
94
+ azure_search_endpoint=os.environ.get("AZURE_SEARCH_ENDPOINT"),
95
+ azure_search_key=os.environ.get("AZURE_SEARCH_KEY"),
96
+ embedding_function=embedding_function,
97
+ )
98
+ st.session_state["db"] = db
99
+
100
+
101
+ col1, col2 = st.columns([2, 1])
102
+
103
+ col1.title("Candidate Search")
104
+ col2.image("https://www.workgenius.com/wp-content/uploads/2023/03/WorkGenius_navy-1.svg")
105
+
106
+ st.write("Please upload the job description for which you would like candidates to be proposed.")
107
+ col_file, col_clear = st.columns([6,1])
108
+
109
+ with col_file:
110
+ uploaded_file_jobdescription = st.file_uploader("Upload the job description:", type=["pdf"], key="job")
111
+ with col_clear:
112
+ if st.button("Clear", use_container_width=True):
113
+ streamlit_js_eval(js_expressions="parent.window.location.reload()")
114
+
115
+ text_area_params = st.text_area(label="Add additional search parameters, which are separated by commas (e.g. master, phd, web developer, spanish)")
116
+
117
+ submit = st.button("Search candidates",disabled= True if st.session_state["final_candidates"] else False)
118
+ if not st.session_state["job"] and submit:
119
+ st.error("Please upload a job description to search for candidates")
120
+ if st.session_state["docs_res"] and submit:
121
+ with st.spinner("Load the candidates, this may take a moment..."):
122
+ query_string = "The following keywords must be included: " + text_area_params + " " + st.session_state["job_string"]
123
+ checked_candidates = []
124
+ db_path = 'cvdb.db'
125
+ table_name = 'files'
126
+ candidates_per_search = 100
127
+ target_candidates_count = 10
128
+ current_offset = 0
129
+
130
+ while len(checked_candidates) < target_candidates_count:
131
+ # Führe eine similarity search durch und erhalte 100 Kandidaten
132
+ raw_candidates = st.session_state["db"].similarity_search(query_string, k=candidates_per_search+current_offset)
133
+
134
+ for candidate in raw_candidates[current_offset:]:
135
+ candidates_id = candidate.metadata["source"].split("/")[-1]
136
+ keyword_bool = check_keywords_in_content(db_path, table_name, candidates_id, text_area_params.split(','))
137
+
138
+ if keyword_bool:
139
+ checked_candidates.append(candidate)
140
+
141
+ # Überprüfe, ob die Zielanzahl erreicht wurde und breche die Schleife ab, wenn ja
142
+ if len(checked_candidates) >= target_candidates_count:
143
+ break
144
+
145
+ current_offset += candidates_per_search
146
+ if current_offset == 600:
147
+ break
148
+
149
+ # Setze die Ergebnisse in der Session State Variable
150
+ st.session_state["docs_res"] = checked_candidates
151
+ if len(checked_candidates) == 0:
152
+ st.error("No candidates can be found with these keywords. Please adjust the keywords and try again.", icon="🚨")
153
+ if (st.session_state["job"] and submit) or st.session_state["docs_res"]:
154
+ if not st.session_state["job_string"]:
155
+ pdf_data_jobdescription = st.session_state["job"].read()
156
+ pdf_data_jobdescription_string = ""
157
+ pdf_reader_job = PdfReader(io.BytesIO(pdf_data_jobdescription))
158
+ for page_num in range(len(pdf_reader_job.pages)):
159
+ page = pdf_reader_job.pages[page_num]
160
+ pdf_data_jobdescription_string += page.extract_text()
161
+ # st.session_state["pdf_data_jobdescription"] = pdf_data_jobdescription activate and add sessio state if data is needed
162
+ st.session_state["job_string"] = pdf_data_jobdescription_string
163
+ if not st.session_state["docs_res"]:
164
+ # print("ich bin im spinner")
165
+ # print(st.session_state["job_string"]+" "+text_area_params)
166
+ with st.spinner("Load the candidates, this may take a moment..."):
167
+ #Use this line if you just want to perform one similarity search
168
+ # st.session_state["docs_res"] = st.session_state["db"].similarity_search(text_area_params+" "+st.session_state["job_string"], k=100)
169
+
170
+ query_string = "The following keywords must be included: " + text_area_params + " " + st.session_state["job_string"]
171
+ checked_candidates = []
172
+ db_path = 'cvdb.db'
173
+ table_name = 'files'
174
+ candidates_per_search = 100
175
+ target_candidates_count = 10
176
+ current_offset = 0
177
+
178
+ while len(checked_candidates) < target_candidates_count:
179
+ # Führe eine similarity search durch und erhalte 100 Kandidaten
180
+ raw_candidates = st.session_state["db"].similarity_search(query_string, k=candidates_per_search+current_offset)
181
+
182
+ for candidate in raw_candidates[current_offset:]:
183
+ candidates_id = candidate.metadata["source"].split("/")[-1]
184
+ keyword_bool = check_keywords_in_content(db_path, table_name, candidates_id, text_area_params.split(','))
185
+
186
+ if keyword_bool:
187
+ checked_candidates.append(candidate)
188
+
189
+ # Überprüfe, ob die Zielanzahl erreicht wurde und breche die Schleife ab, wenn ja
190
+ if len(checked_candidates) >= target_candidates_count:
191
+ break
192
+
193
+ current_offset += candidates_per_search
194
+ if current_offset == 600:
195
+ break
196
+
197
+ # Setze die Ergebnisse in der Session State Variable
198
+ st.session_state["docs_res"] = checked_candidates
199
+ if len(checked_candidates) == 0:
200
+ st.error("No candidates can be found with these keywords. Please adjust the keywords and try again.", icon="🚨")
201
+ # query_string = "The following keywords must be included: "+text_area_params+" "+st.session_state["job_string"]
202
+ # raw_candidates = st.session_state["db"].similarity_search(query_string, k=100)
203
+ # checked_candidates = []
204
+ # db_path = 'cvdb.db'
205
+ # table_name = 'files'
206
+ # for candidate in raw_candidates:
207
+ # candidates_id = candidate.metadata["source"].split("/")[-1]
208
+ # keyword_bool = check_keywords_in_content(db_path,table_name,candidates_id,text_area_params.split(','))
209
+ # print(keyword_bool)
210
+ # if check_keywords_in_content(db_path,table_name,candidates_id,text_area_params.split(',')):
211
+ # if len(checked_candidates)<15:
212
+ # checked_candidates.append(candidate)
213
+ # else:
214
+ # break
215
+
216
+ # st.session_state["docs_res"] = checked_candidates
217
+ #This Code is creating a new Index based on the raw candidates
218
+ # raw_candidates = st.session_state["db"].similarity_search(text_area_params+" "+st.session_state["job_string"], k=50)
219
+ # raw_candidates_embeddings = []
220
+ # for candidate in raw_candidates:
221
+ # raw_candidates_embeddings.append(embedding_function(candidate.page_content))
222
+
223
+ # st.session_state["docs_res"] = st.session_state["db"].similarity_search_by_vector(embedding=raw_candidates_embeddings,k=10,query="Every candidate needs to be proficient in spanish")
224
+ # db_temp = AzureSearch.from_documents(
225
+ # raw_candidates,
226
+ # embedding=embedder,
227
+ # index_name="wg-cvs-temp",
228
+ # azure_search_endpoint=os.environ.get("AZURE_SEARCH_ENDPOINT"),
229
+ # azure_search_key=os.environ.get("AZURE_SEARCH_KEY"),
230
+ # )
231
+
232
+ # st.session_state["docs_res"] = db_temp.similarity_search(query="Every candidate needs to be proficient in spanish", k=10)
233
+
234
+ #Use this code to check candidates with gpt-4
235
+ # raw_candidates = st.session_state["db"].similarity_search(text_area_params+" "+st.session_state["job_string"], k=15)
236
+ # temp_candidates = []
237
+ # for candidate in raw_candidates:
238
+ # res_approve = openai.ChatCompletion.create(
239
+ # engine="gpt-4",
240
+ # temperature=0.1,
241
+ # messages=[
242
+ # {
243
+ # "role": "system",
244
+ # "content": "You are a professional recruiter who receives a resume and a set of requirements. The only thing you have to do is to say whether the requirements are fulfilled or not. you should not explain yourself and simply answer '1' if the requirements are fulfilled and '0' if not.",
245
+ # },
246
+ # {
247
+ # "role": "system",
248
+ # "content": "The candidate needs to be located in New York"
249
+ # },
250
+ # {
251
+ # "role": "system",
252
+ # "content": candidate.page_content
253
+ # }
254
+ # ],
255
+ # )
256
+ # print(res_approve.choices[0]["message"]["content"])
257
+ # if res_approve.choices[0]["message"]["content"] == "1":
258
+ # temp_candidates.append(candidate)
259
+ # st.session_state["docs_res"] = temp_candidates
260
+
261
+
262
+ if not st.session_state["final_candidates"]:
263
+ for i,doc in enumerate(st.session_state["docs_res"]):
264
+ # print(doc)
265
+ cols_final = st.columns([6,1])
266
+ with cols_final[1]:
267
+ if st.button("Remove",use_container_width=True,key="btn_rm_cv_row_"+str(i)):
268
+ # st.write(doc.page_content)
269
+ st.session_state["docs_res"].pop(i)
270
+ st.rerun()
271
+ with cols_final[0]:
272
+ # st.subheader(doc.metadata["source"])
273
+ with st.expander(doc.metadata["source"]):
274
+ st.write(doc.page_content)
275
+ if st.button("Accept candidates", key="accept_candidates_btn"):
276
+ print("hello")
277
+ st.session_state["final_candidates"] = st.session_state["docs_res"].copy()
278
+ st.rerun()
279
+ else:
280
+ print("Now Questions")
281
+ st.subheader("Your Candidates:")
282
+ st.write(", ".join(candidate.metadata["source"] for candidate in st.session_state["final_candidates"]))
283
+ # for i,candidate in enumerate(st.session_state["final_candidates"]):
284
+ # st.write(candidate.metadata["source"])
285
+ cv_strings = "; Next CV: ".join(candidate.page_content for candidate in st.session_state["final_candidates"])
286
+ # print(len(cv_strings))
287
+ system = sys_prompt.format(job=st.session_state["job_string"], resume=st.session_state["final_candidates"][0], n=15)
288
+ if not st.session_state["ai_questions"]:
289
+ try:
290
+ # st.write("The questions are generated. This may take a short moment...")
291
+ st.info("The questions are generated. This may take a short moment.", icon="ℹ️")
292
+ with st.spinner("Loading..."):
293
+ res = openai.ChatCompletion.create(
294
+ engine="gpt-4",
295
+ temperature=0.2,
296
+ messages=[
297
+ {
298
+ "role": "system",
299
+ "content": system,
300
+ },
301
+ ],
302
+ )
303
+ st.session_state["ai_questions"] = [item for item in res.choices[0]["message"]["content"].split("\n") if len(item) > 0]
304
+ for i,q in enumerate(res.choices[0]["message"]["content"].split("\n")):
305
+ st.session_state["disable_row_"+str(i)] = False
306
+ st.rerun()
307
+ except Exception as e:
308
+ print(f"Fehler beim generieren der Fragen: {str(e)}")
309
+ st.error("An error has occurred. Please reload the page or contact the admin.", icon="🚨")
310
+ else:
311
+ if len(st.session_state["final_question_string"]) <= 0:
312
+ for i,question in enumerate(st.session_state["ai_questions"]):
313
+ cols = st.columns([5,1])
314
+ with cols[1]:
315
+ # if st.button("Accept",use_container_width=True,key="btn_accept_row_"+str(i)):
316
+ # print("accept")
317
+ # pattern = re.compile(r"^[1-9][0-9]?\.")
318
+ # questions_length = len(st.session_state["final_question_string"])
319
+ # question_from_text_area = st.session_state["text_area_"+str(i)]
320
+ # question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
321
+ # st.session_state["final_question_string"].append(question_to_append)
322
+ # st.session_state["disable_row_"+str(i)] = True
323
+ # st.rerun()
324
+ if st.button("Delete",use_container_width=True,key="btn_del_row_"+str(i)):
325
+ print("delete")
326
+ st.session_state["ai_questions"].remove(question)
327
+ st.rerun()
328
+ with cols[0]:
329
+ st.text_area(label="Question "+str(i+1)+":",value=question,label_visibility="collapsed",key="text_area_"+str(i),disabled=st.session_state["disable_row_"+str(i)])
330
+ st.write("If you are satisfied with the questions, then accept them. You can still sort them afterwards.")
331
+ if st.button("Accept all questions",use_container_width=True,key="accept_all_questions"):
332
+ for i,question in enumerate(st.session_state["ai_questions"]):
333
+ pattern = re.compile(r"^[1-9][0-9]?\.")
334
+ questions_length = len(st.session_state["final_question_string"])
335
+ question_from_text_area = st.session_state["text_area_"+str(i)]
336
+ question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
337
+ st.session_state["final_question_string"].append(question_to_append)
338
+ st.session_state["disable_row_"+str(i)] = True
339
+ st.rerun()
340
+ for i,final_q in enumerate(st.session_state["final_question_string"]):
341
+ cols_final = st.columns([5,1])
342
+ with cols_final[1]:
343
+ if st.button("Up",use_container_width=True,key="btn_up_row_"+str(i),disabled=True if i == 0 else False):
344
+ if i > 0:
345
+ # Tausche das aktuelle Element mit dem vorherigen Element
346
+ st.session_state.final_question_string[i], st.session_state.final_question_string[i - 1] = \
347
+ st.session_state.final_question_string[i - 1], st.session_state.final_question_string[i]
348
+ st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
349
+ st.rerun()
350
+ if st.button("Down",use_container_width=True,key="btn_down_row_"+str(i), disabled=True if i == len(st.session_state["final_question_string"])-1 else False):
351
+ if i < len(st.session_state.final_question_string) - 1:
352
+ # Tausche das aktuelle Element mit dem nächsten Element
353
+ st.session_state.final_question_string[i], st.session_state.final_question_string[i + 1] = \
354
+ st.session_state.final_question_string[i + 1], st.session_state.final_question_string[i]
355
+ st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
356
+ st.rerun()
357
+ with cols_final[0]:
358
+ st.write(final_q)
359
+ if st.button("Submit", use_container_width=True):
360
+ st.success('Successful search for candidates and generation of questions')
361
+
362
+
cvdb.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a37e1cd92c164e099e5e935c776458f4771f4f9b1eb8ad899e8124df64a1dd2
3
+ size 395046912
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai==0.28.1
2
+ streamlit
3
+ langchain
4
+ PyPDF2
5
+ streamlit_js_eval
sys_prompt_frontend.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a professional recruiter specialized in conducting interviews. Your task is to generate {n} questions for an interview and collect as much relevant information from the applicant (the user) as possible. As context you will be given the job description. You will also receive one or more resumes from potential candidates to get an overview of the applicants.
2
+
3
+ Please follow these rules:
4
+
5
+ - Try to ask open ended questions to collect more information from the applicant.
6
+ - Concentrate on questions that the resume alone cannot answer. Aim to fill in the gaps.
7
+ - Don't give feedback, don't summarize and don't explain yourself. Your role is investigative.
8
+ - Use the {n} questions wisely to get an overall impression of the applicant.
9
+ - Just generate the {n} questions, nothing else.
10
+
11
+ JOB DESCRIPTION:
12
+ {job}
13
+
14
+ RESUME(S):
15
+ {resume}