Spaces:
Sleeping
Sleeping
File size: 982 Bytes
49ca5bb 07f7e2b 420b173 17d0417 d3f031e 5057f72 17d0417 5057f72 07f7e2b 49ca5bb 07f7e2b 420b173 49ca5bb 07f7e2b 420b173 49ca5bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from openai import OpenAI
import streamlit as st
import os
api_key = os.getenv("NVIDIANIM_API_KEY")
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=api_key,
)
model_name = "meta/llama-3.1-405b-instruct"
def get_llama_response(question):
completion = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": question}],
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=True
)
response = ""
for chunk in completion:
if chunk.choices[0].delta.content is not None:
response += chunk.choices[0].delta.content
return response.strip()
st.title("Ask Llama 3.1 405B on Nvidia NIM")
user_question = st.text_input("Enter your question:")
if st.button("Submit"):
if user_question:
llama_response = get_llama_response(user_question)
st.write("**Llama 3.1 405B Response:**")
st.write(llama_response)
else:
st.warning("Please enter a question.") |