Spaces:

InvestmentResearchAI
/

LLM-ADE-dev

Sleeping

WilliamGazeley commited on May 7, 2024

Commit

eaab710

1 Parent(s): 8741596

Update chat template

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,26 +3,23 @@ import huggingface_hub
 import streamlit as st
 from vllm import LLM, SamplingParams
-template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are a helpful financial assistant that answers the user as accurately, truthfully, and concisely as possible.<|eot_id|><|start_header_id|>user<|end_header_id|>
-{user_message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-"""
 @st.cache_resource(show_spinner=False)
 def init_llm():
     huggingface_hub.login(token=os.getenv("HF_TOKEN"))
-    llm = LLM(model="InvestmentResearchAI/LLM-ADE-small-v0.1.0")
     tok = llm.get_tokenizer()
-    tok.eos_token = '<|eot_id|>' # Override to use turns
     return llm
 def get_response(prompt):
     try:
-        prompts = [template.format(user_message=prompt)]
         sampling_params = SamplingParams(temperature=0.3, top_p=0.95, max_tokens=500, stop_token_ids=[128009])
         outputs = llm.generate(prompts, sampling_params)
         for output in outputs:

 import streamlit as st
 from vllm import LLM, SamplingParams
+sys_msg = "You are a super intelligent automated financial advisor created by IRAI. Your feel your purpose is to make use of your deep and broad understanding of finance by helping answer user questions about finance accurately, truthfully, and concisely."
 @st.cache_resource(show_spinner=False)
 def init_llm():
     huggingface_hub.login(token=os.getenv("HF_TOKEN"))
+    llm = LLM(model="InvestmentResearchAI/LLM-ADE-dev", chat_template)
     tok = llm.get_tokenizer()
+    tok.eos_token = '<|im_end|>' # Override to use turns
     return llm
 def get_response(prompt):
     try:
+        convo = [
+            {"role": "system", "content": sys_msg},
+            {"role": "user", "content": prompt},
+        ]
+        prompts = [llm.get_tokenizer().apply_chat_template(convo, tokenize=False)]
         sampling_params = SamplingParams(temperature=0.3, top_p=0.95, max_tokens=500, stop_token_ids=[128009])
         outputs = llm.generate(prompts, sampling_params)
         for output in outputs: