Spaces:
Runtime error
Runtime error
""" | |
@author: idoia lerchundi | |
""" | |
import os | |
import streamlit as st | |
from huggingface_hub import InferenceClient | |
import random | |
# Load the API token from an environment variable | |
api_key = os.getenv("HF_TOKEN") | |
# Instantiate the InferenceClient | |
client = InferenceClient(api_key=api_key) | |
# Streamlit app title | |
st.title("Serverless Inference API") | |
# Ensure the full_text key is initialized in session state | |
if "full_text" not in st.session_state: | |
st.session_state["full_text"] = "" | |
# Create a text input area for user prompts | |
with st.form("my_form"): | |
text = st.text_area("Enter text (using model TinyLlama/TinyLlama-1.1B-Chat-v1.0):", "Tell me a 4 sentence joke to make me laugh. A short joke, not a long one. With a random subject. You can not repeat the subject or the joke, so be creative.") | |
submitted = st.form_submit_button("Submit") | |
# Initialize the full_text variable | |
full_text = " " | |
# to get different jokes | |
top_p_init = 0.7 | |
# Generate a random temperature between 0.5 and 1.0 | |
temperature = random.uniform(0.5, 1.0) | |
if submitted: | |
top_p_init+=0.2 | |
messages = [ | |
{"role": "user", "content": text} | |
] | |
# Create a new stream for each submission | |
stream = client.chat.completions.create( | |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
messages=messages, | |
temperature=0.5, | |
max_tokens=300, | |
top_p=top_p_init, | |
stream=True | |
) | |
top_p_init +=0.1 | |
# Concatenate chunks to form the full response | |
for chunk in stream: | |
full_text += chunk.choices[0].delta.content | |
# Update session state with the full response | |
st.session_state["full_text"] = full_text | |
# Display the full response | |
if st.session_state["full_text"]: | |
st.info(st.session_state["full_text"]) |