Spaces:
Paused
Paused
Tao Wu
commited on
Commit
·
6fc2fca
1
Parent(s):
a5dc95f
add skills query
Browse files- app/app.py +6 -6
app/app.py
CHANGED
@@ -20,7 +20,7 @@ def retrieve_documents(occupation,skills):
|
|
20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
21 |
oc_uri = occupations.get(occupation, "")
|
22 |
skill_query = ''
|
23 |
-
|
24 |
if isinstance(oc_uri, int):
|
25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
26 |
target_occupation = df[df['id'] == oc_uri]
|
@@ -33,20 +33,20 @@ def retrieve_documents(occupation,skills):
|
|
33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
34 |
for german_label in skills:
|
35 |
skill_query += german_label + ' '
|
36 |
-
|
37 |
-
|
38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
40 |
print(query)
|
41 |
docs = retriever.get_relevant_documents(query)
|
42 |
-
|
43 |
|
44 |
#remove duplicates
|
45 |
seen_course_ids = set()
|
46 |
candidate_doc_unique = []
|
47 |
|
48 |
-
for doc in
|
49 |
-
course_id = doc.metadata.get('
|
50 |
if course_id not in seen_course_ids:
|
51 |
candidate_doc_unique.append(doc)
|
52 |
seen_course_ids.add(course_id)
|
|
|
20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
21 |
oc_uri = occupations.get(occupation, "")
|
22 |
skill_query = ''
|
23 |
+
candidate_docs = []
|
24 |
if isinstance(oc_uri, int):
|
25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
26 |
target_occupation = df[df['id'] == oc_uri]
|
|
|
33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
34 |
for german_label in skills:
|
35 |
skill_query += german_label + ' '
|
36 |
+
skills_docs = retriever.get_relevant_documents(german_label)
|
37 |
+
candidate_docs.extend(skills_docs[:2])
|
38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
40 |
print(query)
|
41 |
docs = retriever.get_relevant_documents(query)
|
42 |
+
candidate_docs.extend(docs[:5])
|
43 |
|
44 |
#remove duplicates
|
45 |
seen_course_ids = set()
|
46 |
candidate_doc_unique = []
|
47 |
|
48 |
+
for doc in candidate_docs:
|
49 |
+
course_id = doc.metadata.get('id','')
|
50 |
if course_id not in seen_course_ids:
|
51 |
candidate_doc_unique.append(doc)
|
52 |
seen_course_ids.add(course_id)
|