""" @author: idoia lerchundi """ import os import streamlit as st from huggingface_hub import InferenceClient import random # Load the API token from an environment variable api_key = os.getenv("HF_TOKEN") # Instantiate the InferenceClient client = InferenceClient(api_key=api_key) # Streamlit app title st.title("Serverless Inference API") # Ensure the full_text key is initialized in session state if "full_text" not in st.session_state: st.session_state["full_text"] = "" # Create a text input area for user prompts with st.form("my_form"): text = st.text_area("Enter text (using model TinyLlama/TinyLlama-1.1B-Chat-v1.0):", "Tell me a 4 sentence joke to make me laugh. A short joke, not a long one. With a random subject. You can not repeat the subject or the joke, so be creative.") submitted = st.form_submit_button("Submit") # Initialize the full_text variable full_text = " " # to get different jokes top_p_init = 0.7 # Generate a random temperature between 0.5 and 1.0 temperature = random.uniform(0.5, 1.0) if submitted: top_p_init+=0.2 messages = [ {"role": "user", "content": text} ] # Create a new stream for each submission stream = client.chat.completions.create( model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", messages=messages, temperature=0.5, max_tokens=300, top_p=top_p_init, stream=True ) top_p_init +=0.1 # Concatenate chunks to form the full response for chunk in stream: full_text += chunk.choices[0].delta.content # Update session state with the full response st.session_state["full_text"] = full_text # Display the full response if st.session_state["full_text"]: st.info(st.session_state["full_text"])