Spaces:
Sleeping
Sleeping
from openai import OpenAI | |
import streamlit as st | |
import os | |
api_key = os.getenv("NVIDIANIM_API_KEY") | |
client = OpenAI( | |
base_url="https://integrate.api.nvidia.com/v1", | |
api_key=api_key, | |
) | |
model_name = "meta/llama-3.1-405b-instruct" | |
def get_llama_response(question): | |
completion = client.chat.completions.create( | |
model=model_name, | |
messages=[{"role": "user", "content": question}], | |
temperature=0.2, | |
top_p=0.7, | |
max_tokens=1024, | |
stream=True | |
) | |
response = "" | |
for chunk in completion: | |
if chunk.choices[0].delta.content is not None: | |
response += chunk.choices[0].delta.content | |
return response.strip() | |
st.title("Ask Llama 3.1 405B on Nvidia NIM") | |
user_question = st.text_input("Enter your question:") | |
if st.button("Submit"): | |
if user_question: | |
llama_response = get_llama_response(user_question) | |
st.write("**Llama 3.1 405B Response:**") | |
st.write(llama_response) | |
else: | |
st.warning("Please enter a question.") |