import os import gradio as gr import pandas as pd from huggingface_hub import InferenceClient from threading import Timer HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") def get_available_free(): models = InferenceClient().list_deployed_models("text-generation-inference")['text-generation'] models_conclusion = { "Model": [], "API": [], "Text Completion": [], "Chat Completion": [] } for m in models: text_available = False chat_available = False pro_sub = False try: InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1) text_available = True InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1) chat_available = True except Exception as e: print(e) if e and "Model requires a Pro subscription" in str(e): pro_sub = True if e and "Rate limit reached" in str(e): print("Rate Limited!!") if os.path.exists("data.csv"): print("Loading data from file...") return pd.read_csv("data.csv").to_dict(orient='list') return [] models_conclusion["Model"].append(m) models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding")) models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("✓" if chat_available else "⌀")) models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("✓" if text_available else "⌀")) pd.DataFrame(models_conclusion).to_csv("data.csv", index=False) return models_conclusion def update_data(): data = get_available_free() df = pd.DataFrame(data) return df def display_table(search_query=""): df = update_data() if search_query: filtered_df = df[df["Model"].str.contains(search_query, case=False)] else: filtered_df = df styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion"]) return styled_df def apply_row_styles(row): api_value = row["API"] return [ color_status(api_value, row["Model"]), color_status(api_value, row["API"]), color_status(api_value, row["Text Completion"]), color_status(api_value, row["Chat Completion"]) ] def color_status(api_value, cell_value): if cell_value == "---": if api_value == "Free": return 'background-color: green' elif api_value == "Pro Subscription": return 'background-color: blue' elif api_value == "Not Responding": return 'background-color: red' else: if cell_value == "Free": return 'background-color: green' elif cell_value == "Pro Subscription": return 'background-color: blue' elif cell_value == "Not Responding": return 'background-color: red' elif cell_value == "✓": return 'background-color: green' elif cell_value == "⌀": return 'background-color: red' return '' def search_models(query): return display_table(query) description = "This is a space that retrieves the status of all supported HF LLM Serverless Inference APIs.\nUpdates every 2 hours!" with gr.Blocks() as demo: gr.Markdown("## HF Serverless LLM Inference API Status") gr.Markdown(description) search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...") table = gr.Dataframe(value=display_table(), headers="keys") search_box.change(fn=search_models, inputs=search_box, outputs=table) def update_every_two_hours(): search_models(search_box.value) Timer(7200, update_every_two_hours).start() Timer(7200, update_every_two_hours).start() demo.launch()