pandora-s's picture
Update app.py
c140c30 verified
raw
history blame
7.58 kB
import os
import gradio as gr
import pandas as pd
from huggingface_hub import InferenceClient
from threading import Timer
from tqdm import tqdm
HUGGINGFACE_TOKEN =os.environ.get("HUGGINGFACE_TOKEN")
def get_available_free(use_cache = False):
if use_cache:
if os.path.exists(str(os.getcwd())+"/data.csv"):
# print("Loading data from file...")
return pd.read_csv("data.csv").to_dict(orient='list')
models_dict = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models("text-generation-inference")
models = models_dict['text-generation'] + models_dict['text2text-generation']
models_vision = models_dict['image-text-to-text']
models_others = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models(frameworks="all")["text-generation"]
models_conclusion = {
"Model": [],
"API": [],
"Text Completion": [],
"Chat Completion": [],
"Vision": []
}
all_models = list(set(models + models_vision + models_others))
for m in tqdm(all_models):
text_available = False
chat_available = False
vision_available = False
if m in models_vision:
vision_available = True
pro_sub = False
try:
InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1)
text_available = True
except Exception as e:
# print(e)
if e and "Model requires a Pro subscription" in str(e):
pro_sub = True
if e and "Rate limit reached" in str(e):
# print("Rate Limited!!")
if os.path.exists(str(os.getcwd())+"/data.csv"):
# print("Loading data from file...")
return pd.read_csv(str(os.getcwd())+"/data.csv").to_dict(orient='list')
return []
try:
InferenceClient(m, timeout=10).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1)
chat_available = True
except Exception as e:
# print(e)
if e and "Model requires a Pro subscription" in str(e):
pro_sub = True
if e and "Rate limit reached" in str(e):
# print("Rate Limited!!")
if os.path.exists("data.csv"):
# print("Loading data from file...")
return pd.read_csv(str(os.getcwd())+"/data.csv").to_dict(orient='list')
return []
models_conclusion["Model"].append(m)
models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding"))
models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if chat_available else "βŒ€"))
models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if text_available else "βŒ€"))
models_conclusion["Vision"].append("βœ“" if vision_available else "βŒ€")
pd.DataFrame(models_conclusion).to_csv(str(os.getcwd())+"/data.csv", index=False)
return models_conclusion
def update_data(use_cache = False):
data = get_available_free(use_cache)
df = pd.DataFrame(data)
status_mapping = {"βœ“": 0, "βŒ€": 1, "---": 2}
df['Text Completion'] = df['Text Completion'].map(status_mapping)
df['Chat Completion'] = df['Chat Completion'].map(status_mapping)
df = df.sort_values(by=['API', 'Text Completion', 'Chat Completion', 'Vision'])
df['Text Completion'] = df['Text Completion'].map({v: k for k, v in status_mapping.items()})
df['Chat Completion'] = df['Chat Completion'].map({v: k for k, v in status_mapping.items()})
return df
def display_table(search_query="", filters=[], use_cache=False):
df = update_data(use_cache)
search_query = str(search_query)
if search_query:
filtered_df = df[df["Model"].str.contains(search_query, case=False)]
else:
filtered_df = df
if filters:
if "Free" in filters:
filtered_df = filtered_df[filtered_df["API"] == "Free"]
if "Text Completion" in filters:
filtered_df = filtered_df[filtered_df["Text Completion"] == "βœ“"]
if "Chat Completion" in filters:
filtered_df = filtered_df[filtered_df["Chat Completion"] == "βœ“"]
styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion", "Vision"])
return styled_df
def apply_row_styles(row):
api_value = row["API"]
return [
color_status(api_value, row["Model"]),
color_status(api_value, row["API"]),
color_status(api_value, row["Text Completion"]),
color_status(api_value, row["Chat Completion"]),
color_status(api_value, row["Vision"])
]
def color_status(api_value, cell_value):
if cell_value == "---":
if api_value == "Free":
return 'background-color: green'
elif api_value == "Pro Subscription":
return 'background-color: blue'
elif api_value == "Not Responding":
return 'background-color: red'
else:
if cell_value == "Free":
return 'background-color: green'
elif cell_value == "Pro Subscription":
return 'background-color: blue'
elif cell_value == "Not Responding":
return 'background-color: red'
elif cell_value == "βœ“":
return 'background-color: green'
elif cell_value == "βŒ€":
return 'background-color: red'
return ''
def search_models(query, filters = [], use_cache = True):
return display_table(query, filters, use_cache)
description = """
This is a space that retrieves the status of all supported HF LLM Serverless Inference APIs.
*Updates every 2 hours!*
If you are a student or you just want to quickly see what models are available to experiment for free, you are most likely highly interested on the free API huggingface provides... but like me, you struggle to find what models are available or not!
This is why I made this space that every 2 hours checks and updates the status of the list of LLMs that are in theory supported by retrieving the list in `InferenceClient().list_deployed_models()`.
So all you need is to plug:
```py
from huggingface_hub import InferenceClient
inf = InferenceClient(model = "MODEL", token = "TOKEN")
response = inf.text_generation("And play !!")
print(response)
```
"""
first_run = True
with gr.Blocks() as demo:
gr.Markdown("## HF Serverless LLM Inference API Status")
gr.Markdown(description)
search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...")
filter_box = gr.CheckboxGroup(choices=["Free", "Text Completion", "Chat Completion"], label="Filters")
table = gr.Dataframe(value=display_table(use_cache=True), headers="keys")
def update_filters(query, filters):
return search_models(query, filters, use_cache=True)
search_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
filter_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
def update_every_two_hours(first_run):
search_models(search_box.value, first_run)
Timer(7200, update_every_two_hours, args=(False,)).start()
Timer(0, update_every_two_hours, args=(first_run,)).start()
demo.launch()