""" @author: idoia lerchundi """ import os import streamlit as st from huggingface_hub import InferenceClient # Load the API token from an environment variable api_key = os.getenv("HF_TOKEN") # Instantiate the InferenceClient client = InferenceClient(api_key=api_key) # Streamlit app title st.title("Serverless Inference API") # Ensure the full_text key is initialized in session state if "full_text" not in st.session_state: st.session_state["full_text"] = "" # Create a text input area for user prompts with st.form("my_form"): text = st.text_area("Enter text:", "Tell me a 4 sentence joke to make me laugh. A short joke, not a long one.") submitted = st.form_submit_button("Submit") # Initialize the full_text variable full_text = " " # to get different jokes top_p_init = 0.7 if submitted: top_p = top_p_init+0.1 messages = [ {"role": "user", "content": text} ] # Create a new stream for each submission stream = client.chat.completions.create( model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", messages=messages, temperature=0.5, max_tokens=300, top_p== float(top_p_init), stream=True ) top_p_init +=0.1 # Concatenate chunks to form the full response for chunk in stream: full_text += chunk.choices[0].delta.content # Update session state with the full response st.session_state["full_text"] = full_text # Display the full response if st.session_state["full_text"]: st.info(st.session_state["full_text"])