Spaces:
Paused
Paused
Tao Wu
commited on
Commit
·
12baee3
1
Parent(s):
d0487d6
update
Browse files- app/app.py +1 -1
- app/config.py +6 -2
- app/embedding_setup.py +3 -3
app/app.py
CHANGED
@@ -32,7 +32,7 @@ def retrieve_documents(occupation,skills):
|
|
32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
33 |
for german_label in skills:
|
34 |
skill_query += german_label + ' '
|
35 |
-
query = target_occupation_query + ' ' + skill_query
|
36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
|
37 |
print(query)
|
38 |
docs = retriever.get_relevant_documents(query)
|
|
|
32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
33 |
for german_label in skills:
|
34 |
skill_query += german_label + ' '
|
35 |
+
query = 'target occupation: ' + target_occupation_query + ' skills gap:' + skill_query
|
36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
|
37 |
print(query)
|
38 |
docs = retriever.get_relevant_documents(query)
|
app/config.py
CHANGED
@@ -8,15 +8,19 @@ REDIS_DB = int(os.getenv('REDIS_DB', 0))
|
|
8 |
|
9 |
# Model and embedding configuration
|
10 |
#MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
11 |
-
MODEL_NAME = os.getenv('MODEL_NAME', "
|
12 |
ENCODE_KWARGS = {
|
13 |
'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
|
14 |
'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
|
15 |
}
|
16 |
-
QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', '')
|
17 |
|
18 |
# Other configurations
|
19 |
TOP_K = int(os.getenv('TOP_K', 10))
|
20 |
#PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
|
21 |
PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
|
22 |
CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Model and embedding configuration
|
10 |
#MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
11 |
+
MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
12 |
ENCODE_KWARGS = {
|
13 |
'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
|
14 |
'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
|
15 |
}
|
16 |
+
QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', 'Find the course that relates to the given occupation and cover the skills gap')
|
17 |
|
18 |
# Other configurations
|
19 |
TOP_K = int(os.getenv('TOP_K', 10))
|
20 |
#PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
|
21 |
PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
|
22 |
CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
|
23 |
+
|
24 |
+
REC_LORA_MODEL = os.getenv('REC_LORA_MODEL', 'wt3639/Llama-3-8B-Instruct_CourseRec_lora')
|
25 |
+
EXP_LORA_MODEL = os.getenv('EXP_LORA_MODEL', 'wt3639/alpaca_german_english')
|
26 |
+
LLM_MODEL = os.getenv('LLM_MODEL', '"meta-llama/Meta-Llama-3-8B-Instruct"')
|
app/embedding_setup.py
CHANGED
@@ -30,9 +30,9 @@ db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_in
|
|
30 |
retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
31 |
|
32 |
|
33 |
-
|
34 |
-
lora_weights_rec =
|
35 |
-
lora_weights_exp =
|
36 |
hf_auth = os.environ.get("hf_token")
|
37 |
|
38 |
|
|
|
30 |
retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
31 |
|
32 |
|
33 |
+
|
34 |
+
lora_weights_rec = REC_LORA_MODEL
|
35 |
+
lora_weights_exp = EXP_LORA_MODEL
|
36 |
hf_auth = os.environ.get("hf_token")
|
37 |
|
38 |
|