Spaces:
Sleeping
Sleeping
File size: 2,419 Bytes
109014c a20dfac ecd63b4 178058d 954e857 481f849 954e857 0583c4b 954e857 6ba8b0e 954e857 0583c4b a818c02 0583c4b e87746b 558d9e8 e87746b eaab710 e87746b ecd63b4 0583c4b ecd63b4 eaab710 0583c4b eaab710 0583c4b eaab710 0583c4b ecd63b4 0583c4b ecd63b4 954e857 ecd63b4 0583c4b ecd63b4 e87746b ecd63b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
import huggingface_hub
import streamlit as st
from vllm import LLM, SamplingParams
sys_msg = """#Context:
You are an expert financial advisor named IRAI. You have a comprehensive understanding of finance and investing with experience and expertise in all areas of finance.
#Objective:
Answer questions as best as possible given your current knowledge. You do not have access to up-to-date current market data; if necessary please mention that this will be available in the future. Demonstrate analytical depth and showcase ability to integrated complex data into practical advice, but answer the question directly.
Style and tone:
Answer in a friendly and engaging manner representing a top female investment professional working at a leading investment bank.
#Audience:
The questions will be asked by top executives and managers of successful startups. Assume the audience is composed of 40 year old males with high wealth and income, high risk appetite with high threshold for volatility.
#Response:
Direct answer to question, concise yet insightful."""
@st.cache_resource(show_spinner="Loading model..")
def init_llm():
huggingface_hub.login(token=os.getenv("HF_TOKEN"))
llm = LLM(model="InvestmentResearchAI/LLM-ADE-dev")
tok = llm.get_tokenizer()
tok.eos_token = '<|im_end|>' # Override to use turns
return llm
def get_response(prompt):
try:
convo = [
{"role": "system", "content": sys_msg},
{"role": "user", "content": prompt},
]
llm = init_llm()
prompts = [llm.get_tokenizer().apply_chat_template(convo, tokenize=False)]
sampling_params = SamplingParams(temperature=0.3, top_p=0.95, max_tokens=500, stop_token_ids=[128009])
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
return output.outputs[0].text
except Exception as e:
return f"An error occurred: {str(e)}"
def main():
st.title("LLM-ADE 9B Demo")
input_text = st.text_area("Enter your text here:", value="", height=200)
if st.button("Generate"):
if input_text:
with st.spinner('Generating response...'):
response_text = get_response(input_text)
st.write(response_text)
else:
st.warning("Please enter some text to generate a response.")
llm = init_llm()
if __name__ == "__main__":
main()
|