HF-Serverless-LLM-Inference-API-Status

Runtime error

File size: 4,169 Bytes

import os
import gradio as gr
import pandas as pd
from huggingface_hub import InferenceClient
from threading import Timer

HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
def get_available_free():
    models = InferenceClient().list_deployed_models("text-generation-inference")['text-generation']
    models_conclusion = {
        "Model": [],
        "API": [],
        "Text Completion": [],
        "Chat Completion": []
    }
    for m in models:
        text_available = False
        chat_available = False
        pro_sub = False
        try:
            InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1)
            text_available = True
            InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1)
            chat_available = True
        except Exception as e:
            print(e)
            if e and "Model requires a Pro subscription" in str(e):
                pro_sub = True
            if e and "Rate limit reached" in str(e):
                print("Rate Limited!!")
                if os.path.exists("data.csv"):
                    print("Loading data from file...")
                    return pd.read_csv("data.csv").to_dict(orient='list')
                return []
        models_conclusion["Model"].append(m)
        models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding"))
        models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("✓" if chat_available else "⌀"))
        models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("✓" if text_available else "⌀"))
    pd.DataFrame(models_conclusion).to_csv("data.csv", index=False)
    return models_conclusion

def update_data():
    data = get_available_free()
    df = pd.DataFrame(data)
    return df

def display_table(search_query=""):
    df = update_data()
    if search_query:
        filtered_df = df[df["Model"].str.contains(search_query, case=False)]
    else:
        filtered_df = df
    
    styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion"])
    return styled_df

def apply_row_styles(row):
    api_value = row["API"]
    return [
        color_status(api_value, row["Model"]),
        color_status(api_value, row["API"]),
        color_status(api_value, row["Text Completion"]),
        color_status(api_value, row["Chat Completion"])
    ]

def color_status(api_value, cell_value):
    if cell_value == "---":
        if api_value == "Free":
            return 'background-color: green'
        elif api_value == "Pro Subscription":
            return 'background-color: blue'
        elif api_value == "Not Responding":
            return 'background-color: red'
    else:
        if cell_value == "Free":
            return 'background-color: green'
        elif cell_value == "Pro Subscription":
            return 'background-color: blue'
        elif cell_value == "Not Responding":
            return 'background-color: red'
        elif cell_value == "✓":
            return 'background-color: green'
        elif cell_value == "⌀":
            return 'background-color: red'
    return ''

def search_models(query):
    return display_table(query)

description = "This is a space that retrieves the status of all supported HF LLM Serverless Inference APIs.\nUpdates every 2 hours!"
with gr.Blocks() as demo:
    gr.Markdown("## HF Serverless LLM Inference API Status")
    gr.Markdown(description)
    search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...")
    table = gr.Dataframe(value=display_table(), headers="keys")
    
    search_box.change(fn=search_models, inputs=search_box, outputs=table)
    
    def update_every_two_hours():
        search_models(search_box.value)
        Timer(7200, update_every_two_hours).start()
        
    Timer(7200, update_every_two_hours).start()
    
demo.launch()