Spaces:

AItool
/

ServerlessInferenceAPI

Runtime error

File size: 3,158 Bytes

5aa525a
 
 
 
f50d98f
5aa525a
 
1d7f870
5aa525a
 
 
 
 
 
 
f50d98f
 
 
 
 
 
 
 
 
 
 
5aa525a
587858f
5aa525a
0f3d4bc
 
 
af29fad
 
 
91130dc
af29fad
5aa525a
 
b65da23
5aa525a
 
 
92cca2a
1d7f870
 
 
5aa525a
 
 
 
 
 
 
 
af29fad
5aa525a
7d18afe
 
af96bbe
af29fad
5aa525a
3952e87
f50d98f
 
 
5aa525a
 
 
898c8a2
 
 
 
 
ed78754

"""
@author: idoia lerchundi
"""
import os
import time
import streamlit as st
from huggingface_hub import InferenceClient
import random

# Load the API token from an environment variable
api_key = os.getenv("HF_TOKEN")

# Instantiate the InferenceClient
client = InferenceClient(api_key=api_key)

# Function to simulate some process and return the elapsed time
def process_with_timing(): start_time = time.time() 
    # Simulate a process with sleep
    time.sleep(2.345) 
    # Change this value to simulate different processing times
    end_time = time.time() 
    elapsed_time = end_time - start_time
    minutes, seconds = divmod(elapsed_time, 60)
    milliseconds = (seconds - int(seconds)) * 1000
    return minutes, int(seconds), milliseconds

# Streamlit app title
st.title("Text-generation model using Streamlit from Inference API (serverless) feature.") 

# Ensure the full_text key is initialized in session state
if "full_text" not in st.session_state:
    st.session_state["full_text"] = ""

# Model selection dropdown
model_options = [ "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "gpt2", "facebook/opt-1.3b", "EleutherAI/gpt-neo-2.7B","meta-llama/Llama-Llama-3-8B-Instruct", "meta-llama/Llama-Llama-3.1-1B-Instruct", "meta-llama/Llama-Llama-3.2-3B-Instruct", "meta-llama/Llama-Llama-3.2-8B-Instruct", "Qwen/Qwen2.5-1.5B-Instruct", "openai-community/gpt2", "google/gemma-1.1-7b-it", "google/gemma-1.27b-it", "google/gemma-1.2b-it", "google/gemma-1.9b-it", "google/gemma-2.2b-it", "HuggingFaceH4/starchat7b-beta", "distilbert/distilgpt2", "facebook/opt-1.3b", "distributed/optimized=gpt2-1b" ]
selected_model = st.selectbox("Choose a model:", model_options)

# Create a text input area for user prompts
with st.form("my_form"):
    text = st.text_area("JOKER (TinyLlama is not great at joke telling.) (using model TinyLlama/TinyLlama-1.1B-Chat-v1.0):", "Tell me a clever and funny joke in exactly 4 sentences. It should make me laugh really hard. Don't repeat the topic in your joke. Be creative and concise.")
    submitted = st.form_submit_button("Submit")

# Initialize the full_text variable
full_text = " "

# Generate a random temperature between 0.5 and 1.0
temperature = random.uniform(0.5, 1.0)

if submitted:
    messages = [
        {"role": "user", "content": text}
    ]
    
    # Create a new stream for each submission
    stream = client.chat.completions.create(
        model=selected_model,
        messages=messages,
        # Generate a random temperature between 0.5 and 1.0
        temperature = random.uniform(0.5, 1.0),
        max_tokens=300,
        top_p=random.uniform(0.7, 1.0),
        stream=True
    )
    minutes, seconds, milliseconds = process_with_timing()
    st.write(f"Elapsed Time: {int(minutes)} minutes, {seconds} seconds, and {milliseconds:.2f} milliseconds")
    
    # Concatenate chunks to form the full response
    for chunk in stream:
        full_text += chunk.choices[0].delta.content
        
    # Update session state with the full response
    st.session_state["full_text"] = full_text
    
# Display the full response
if st.session_state["full_text"]:
    st.info(st.session_state["full_text"])