Spaces:
Sleeping
Sleeping
WilliamGazeley
commited on
Commit
·
eaab710
1
Parent(s):
8741596
Update chat template
Browse files
app.py
CHANGED
@@ -3,26 +3,23 @@ import huggingface_hub
|
|
3 |
import streamlit as st
|
4 |
from vllm import LLM, SamplingParams
|
5 |
|
6 |
-
|
7 |
-
template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
8 |
-
|
9 |
-
You are a helpful financial assistant that answers the user as accurately, truthfully, and concisely as possible.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
10 |
-
|
11 |
-
{user_message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
12 |
-
|
13 |
-
"""
|
14 |
|
15 |
@st.cache_resource(show_spinner=False)
|
16 |
def init_llm():
|
17 |
huggingface_hub.login(token=os.getenv("HF_TOKEN"))
|
18 |
-
llm = LLM(model="InvestmentResearchAI/LLM-ADE-
|
19 |
tok = llm.get_tokenizer()
|
20 |
-
tok.eos_token = '<|
|
21 |
return llm
|
22 |
|
23 |
def get_response(prompt):
|
24 |
try:
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
sampling_params = SamplingParams(temperature=0.3, top_p=0.95, max_tokens=500, stop_token_ids=[128009])
|
27 |
outputs = llm.generate(prompts, sampling_params)
|
28 |
for output in outputs:
|
|
|
3 |
import streamlit as st
|
4 |
from vllm import LLM, SamplingParams
|
5 |
|
6 |
+
sys_msg = "You are a super intelligent automated financial advisor created by IRAI. Your feel your purpose is to make use of your deep and broad understanding of finance by helping answer user questions about finance accurately, truthfully, and concisely."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
@st.cache_resource(show_spinner=False)
|
9 |
def init_llm():
|
10 |
huggingface_hub.login(token=os.getenv("HF_TOKEN"))
|
11 |
+
llm = LLM(model="InvestmentResearchAI/LLM-ADE-dev", chat_template)
|
12 |
tok = llm.get_tokenizer()
|
13 |
+
tok.eos_token = '<|im_end|>' # Override to use turns
|
14 |
return llm
|
15 |
|
16 |
def get_response(prompt):
|
17 |
try:
|
18 |
+
convo = [
|
19 |
+
{"role": "system", "content": sys_msg},
|
20 |
+
{"role": "user", "content": prompt},
|
21 |
+
]
|
22 |
+
prompts = [llm.get_tokenizer().apply_chat_template(convo, tokenize=False)]
|
23 |
sampling_params = SamplingParams(temperature=0.3, top_p=0.95, max_tokens=500, stop_token_ids=[128009])
|
24 |
outputs = llm.generate(prompts, sampling_params)
|
25 |
for output in outputs:
|