pandora-s's picture
Update app.py
4544d80 verified
raw
history blame
4.17 kB
import os
import gradio as gr
import pandas as pd
from huggingface_hub import InferenceClient
from threading import Timer
HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
def get_available_free():
models = InferenceClient().list_deployed_models("text-generation-inference")['text-generation']
models_conclusion = {
"Model": [],
"API": [],
"Text Completion": [],
"Chat Completion": []
}
for m in models:
text_available = False
chat_available = False
pro_sub = False
try:
InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1)
text_available = True
InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1)
chat_available = True
except Exception as e:
print(e)
if e and "Model requires a Pro subscription" in str(e):
pro_sub = True
if e and "Rate limit reached" in str(e):
print("Rate Limited!!")
if os.path.exists("data.csv"):
print("Loading data from file...")
return pd.read_csv("data.csv").to_dict(orient='list')
return []
models_conclusion["Model"].append(m)
models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding"))
models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if chat_available else "βŒ€"))
models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if text_available else "βŒ€"))
pd.DataFrame(models_conclusion).to_csv("data.csv", index=False)
return models_conclusion
def update_data():
data = get_available_free()
df = pd.DataFrame(data)
return df
def display_table(search_query=""):
df = update_data()
if search_query:
filtered_df = df[df["Model"].str.contains(search_query, case=False)]
else:
filtered_df = df
styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion"])
return styled_df
def apply_row_styles(row):
api_value = row["API"]
return [
color_status(api_value, row["Model"]),
color_status(api_value, row["API"]),
color_status(api_value, row["Text Completion"]),
color_status(api_value, row["Chat Completion"])
]
def color_status(api_value, cell_value):
if cell_value == "---":
if api_value == "Free":
return 'background-color: green'
elif api_value == "Pro Subscription":
return 'background-color: blue'
elif api_value == "Not Responding":
return 'background-color: red'
else:
if cell_value == "Free":
return 'background-color: green'
elif cell_value == "Pro Subscription":
return 'background-color: blue'
elif cell_value == "Not Responding":
return 'background-color: red'
elif cell_value == "βœ“":
return 'background-color: green'
elif cell_value == "βŒ€":
return 'background-color: red'
return ''
def search_models(query):
return display_table(query)
description = "This is a space that retrieves the status of all supported HF LLM Serverless Inference APIs.\nUpdates every 2 hours!"
with gr.Blocks() as demo:
gr.Markdown("## HF Serverless LLM Inference API Status")
gr.Markdown(description)
search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...")
table = gr.Dataframe(value=display_table(), headers="keys")
search_box.change(fn=search_models, inputs=search_box, outputs=table)
def update_every_two_hours():
search_models(search_box.value)
Timer(7200, update_every_two_hours).start()
Timer(7200, update_every_two_hours).start()
demo.launch()