""" @author: idoia lerchundi """ import os import streamlit as st from huggingface_hub import InferenceClient # Load the API token from an environment variable api_key = os.getenv("HF_TOKEN") # Instantiate the InferenceClient client = InferenceClient(api_key=api_key) # Streamlit app title st.title("Hugging Face Inference with Streamlit") # Create a text input area for user prompts with st.form("my_form"): text = st.text_area("Enter text:", "Tell me a joke to make me laugh.") submitted = st.form_submit_button("Submit") # Initialize the full_text variable full_text = "" if submitted: messages = [ {"role": "user", "content": text} ] # Create a new stream for each submission stream = client.chat.completions.create( model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", messages=messages, temperature=0.5, max_tokens=100, top_p=0.7, stream=True ) # Concatenate chunks to form the full response for chunk in stream: full_text += chunk.choices[0].delta.content # Display the full response st.info(full_text)