Tao Wu commited on
Commit
12baee3
·
1 Parent(s): d0487d6
Files changed (3) hide show
  1. app/app.py +1 -1
  2. app/config.py +6 -2
  3. app/embedding_setup.py +3 -3
app/app.py CHANGED
@@ -32,7 +32,7 @@ def retrieve_documents(occupation,skills):
32
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
33
  for german_label in skills:
34
  skill_query += german_label + ' '
35
- query = target_occupation_query + ' ' + skill_query
36
  llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
37
  print(query)
38
  docs = retriever.get_relevant_documents(query)
 
32
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
33
  for german_label in skills:
34
  skill_query += german_label + ' '
35
+ query = 'target occupation: ' + target_occupation_query + ' skills gap:' + skill_query
36
  llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
37
  print(query)
38
  docs = retriever.get_relevant_documents(query)
app/config.py CHANGED
@@ -8,15 +8,19 @@ REDIS_DB = int(os.getenv('REDIS_DB', 0))
8
 
9
  # Model and embedding configuration
10
  #MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
11
- MODEL_NAME = os.getenv('MODEL_NAME', "wt3639/EduGBERT_CourseRec")
12
  ENCODE_KWARGS = {
13
  'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
14
  'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
15
  }
16
- QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', '')
17
 
18
  # Other configurations
19
  TOP_K = int(os.getenv('TOP_K', 10))
20
  #PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
21
  PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
22
  CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
 
 
 
 
 
8
 
9
  # Model and embedding configuration
10
  #MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
11
+ MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
12
  ENCODE_KWARGS = {
13
  'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
14
  'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
15
  }
16
+ QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', 'Find the course that relates to the given occupation and cover the skills gap')
17
 
18
  # Other configurations
19
  TOP_K = int(os.getenv('TOP_K', 10))
20
  #PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
21
  PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
22
  CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
23
+
24
+ REC_LORA_MODEL = os.getenv('REC_LORA_MODEL', 'wt3639/Llama-3-8B-Instruct_CourseRec_lora')
25
+ EXP_LORA_MODEL = os.getenv('EXP_LORA_MODEL', 'wt3639/alpaca_german_english')
26
+ LLM_MODEL = os.getenv('LLM_MODEL', '"meta-llama/Meta-Llama-3-8B-Instruct"')
app/embedding_setup.py CHANGED
@@ -30,9 +30,9 @@ db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_in
30
  retriever = db.as_retriever(search_kwargs={"k": TOP_K})
31
 
32
 
33
- LLM_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
34
- lora_weights_rec = "wt3639/Llama-3-8B-Instruct_CourseRec_lora"
35
- lora_weights_exp = "wt3639/alpaca_german_english"
36
  hf_auth = os.environ.get("hf_token")
37
 
38
 
 
30
  retriever = db.as_retriever(search_kwargs={"k": TOP_K})
31
 
32
 
33
+
34
+ lora_weights_rec = REC_LORA_MODEL
35
+ lora_weights_exp = EXP_LORA_MODEL
36
  hf_auth = os.environ.get("hf_token")
37
 
38