NvidiaLlama31 / app.py
Ley_Fill7
Modified to take user input
8e08e98
raw
history blame
1.46 kB
import streamlit as st
import os
from openai import OpenAI
api_key = os.getenv("NVIDIANIM_API_KEY")
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=api_key,
)
model_name = "meta/llama-3.1-405b-instruct"
if "messages" not in st.session_state:
st.session_state.messages = []
def get_llama_response(question):
st.session_state.messages.append({"role": "user", "content": question})
response = client.chat.completions.create(
model=model_name,
messages=st.session_state.messages,
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=True
)
response_text = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
response_text += chunk.choices[0].delta.content
st.session_state.messages.append({"role": "assistant", "content": response_text})
return response_text
st.title("Ask Llama 3.1 405B on Nvidia NIM")
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
user_input = st.chat_input("Your message")
if st.button("Submit"):
if user_input:
st.session_state.messages.append({"role": "user", "content": user_input})
with st.chat_message("user"):
st.markdown(user_input)
response = get_llama_response(user_input)
with st.chat_message("assistant"):
st.markdown(response)