File size: 982 Bytes
49ca5bb
07f7e2b
420b173
17d0417
 
d3f031e
5057f72
 
17d0417
5057f72
 
 
 
07f7e2b
49ca5bb
 
 
 
 
 
 
 
 
 
 
 
 
 
07f7e2b
420b173
49ca5bb
07f7e2b
420b173
49ca5bb
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from openai import OpenAI
import streamlit as st
import os

api_key = os.getenv("NVIDIANIM_API_KEY")

client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key=api_key,
)

model_name = "meta/llama-3.1-405b-instruct"

def get_llama_response(question):
  completion = client.chat.completions.create(
      model=model_name,
      messages=[{"role": "user", "content": question}],
      temperature=0.2,
      top_p=0.7,
      max_tokens=1024,
      stream=True
  )
  
  response = ""
  for chunk in completion:
    if chunk.choices[0].delta.content is not None:
      response += chunk.choices[0].delta.content
  return response.strip()

st.title("Ask Llama 3.1 405B on Nvidia NIM")
user_question = st.text_input("Enter your question:")

if st.button("Submit"):
  if user_question:
    llama_response = get_llama_response(user_question)
    st.write("**Llama 3.1 405B Response:**")
    st.write(llama_response)
  else:
    st.warning("Please enter a question.")