File size: 1,458 Bytes
07f7e2b
420b173
07f7e2b
17d0417
 
d3f031e
5057f72
 
17d0417
5057f72
 
 
 
07f7e2b
 
5057f72
07f7e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420b173
07f7e2b
 
 
 
 
8e08e98
07f7e2b
420b173
8e08e98
 
420b173
8e08e98
 
07f7e2b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
import os
from openai import OpenAI

api_key = os.getenv("NVIDIANIM_API_KEY")

client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key=api_key,
)

model_name = "meta/llama-3.1-405b-instruct"

if "messages" not in st.session_state:
    st.session_state.messages = []

def get_llama_response(question):
    st.session_state.messages.append({"role": "user", "content": question})
    response = client.chat.completions.create(
        model=model_name,
        messages=st.session_state.messages,
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
        stream=True
    )

    response_text = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            response_text += chunk.choices[0].delta.content

    st.session_state.messages.append({"role": "assistant", "content": response_text})
    return response_text

st.title("Ask Llama 3.1 405B on Nvidia NIM")

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

user_input = st.chat_input("Your message")

if st.button("Submit"):
    if user_input:
        st.session_state.messages.append({"role": "user", "content": user_input})
        with st.chat_message("user"):
            st.markdown(user_input)
        response = get_llama_response(user_input)
        with st.chat_message("assistant"):
            st.markdown(response)