ahmedghani's picture
Update app.py
7476383
raw
history blame
18.3 kB
import gradio as gr
import json
import requests
import pandas as pd
def avaliable_providers():
providers = []
headers = {
"Content-Type": "application/json",
}
endpoint_url = "https://api.endpoints.huggingface.cloud/provider"
response = requests.get(endpoint_url, headers=headers)
for provider in response.json()['items']:
if provider['status'] == 'available':
providers.append(provider['vendor'])
return providers
def update_regions(provider):
available_regions = []
headers = {
"Content-Type": "application/json",
}
endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region"
response = requests.get(endpoint_url, headers=headers)
for region in response.json()['items']:
if region['status'] == 'available':
available_regions.append(f"{region['region']}/{region['label']}")
return gr.Dropdown.update(
choices=available_regions,
value=available_regions[0] if len(available_regions) > 0 else None
)
def update_compute_options(provider, region):
region = region.split("/")[0]
available_compute_choices = []
headers = {
"Content-Type": "application/json",
}
endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region/{region}/compute"
print(endpoint_url)
response = requests.get(endpoint_url, headers=headers)
for compute in response.json()['items']:
if compute['status'] == 'available':
accelerator = compute['accelerator']
numAccelerators = compute['numAccelerators']
memoryGb = compute['memoryGb'].replace("Gi", "GB")
architecture = compute['architecture']
instanceType = compute['instanceType']
type = f"{numAccelerators}vCPU {memoryGb} · {architecture}" if accelerator == "cpu" else f"{numAccelerators}x {architecture}"
available_compute_choices.append(
f"{compute['accelerator'].upper()} [{compute['instanceSize']}] · {type} · {instanceType}"
)
return gr.Dropdown.update(
choices=available_compute_choices,
value=available_compute_choices[0] if len(available_compute_choices) > 0 else None
)
def submit(
hf_token_input,
endpoint_name_input,
provider_selector,
region_selector,
repository_selector,
revision_selector,
task_selector,
framework_selector,
compute_selector,
min_node_selector,
max_node_selector,
security_selector
):
compute_resources = compute_selector.split("·")
accelerator = compute_resources[0][:3].strip()
size_l_index = compute_resources[0].index("[") - 1
size_r_index = compute_resources[0].index("]")
size = compute_resources[0][size_l_index : size_r_index].strip()
type = compute_resources[-1].strip()
payload = {
"accountId": repository_selector.split("/")[0],
"compute": {
"accelerator": accelerator.lower(),
"instanceSize": size[1:],
"instanceType": type,
"scaling": {
"maxReplica": int(max_node_selector),
"minReplica": int(min_node_selector)
}
},
"model": {
"framework": "custom",
"image": {
"huggingface": {}
},
"repository": repository_selector.lower(),
"revision": revision_selector,
"task": task_selector.lower()
},
"name": endpoint_name_input.strip(),
"provider": {
"region": region_selector.split("/")[0].lower(),
"vendor": provider_selector.lower()
},
"type": security_selector.lower()
}
payload = json.dumps(payload)
print(f"Payload: {payload}")
headers = {
"Authorization": f"Bearer {hf_token_input.strip()}",
"Content-Type": "application/json",
}
endpoint_url = f"https://api.endpoints.huggingface.cloud/endpoint"
print(f"Endpoint: {endpoint_url}")
response = requests.post(endpoint_url, headers=headers, data=payload)
if response.status_code == 400:
return f"{response.text}. Malformed data in {payload}"
elif response.status_code == 401:
return "Invalid token"
elif response.status_code == 409:
return f"Error: {response.text}"
elif response.status_code == 202:
return f"Endpoint {endpoint_name_input} created successfully on {provider_selector.lower()} using {repository_selector.lower()}@{revision_selector}. \n Please check out the progress at https://ui.endpoints.huggingface.co/endpoints."
else:
return f"something went wrong {response.status_code} = {response.text}"
def delete_endpoint(
hf_token_input,
endpoint_name_input
):
response = requests.delete(
f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}",
headers = {
"Authorization": f"Bearer {hf_token_input.strip()}",
"Content-Type": "application/json",
}
)
if response.status_code == 401:
return "Invalid token"
elif response.status_code == 404:
return f"Error: {response.text}"
elif response.status_code == 202:
return f"Endpoint {endpoint_name_input} deleted successfully."
else:
return f"something went wrong {response.status_code} = {response.text}"
def get_all_endpoints(
hf_token_input,
):
response = requests.get(
f"https://api.endpoints.huggingface.cloud/endpoint",
headers = {
"Authorization": f"Bearer {hf_token_input.strip()}",
"Content-Type": "application/json",
})
if response.status_code == 401:
return "Invalid token"
elif response.status_code == 200:
endpoints_json = response.json()
print(endpoints_json)
endpoints_df = pd.DataFrame(endpoints_json["items"])
endpoints_df = endpoints_df[["name", "model", "provider", "compute", "status"]]
endpoints_df["model"] = endpoints_df["model"].apply(lambda x: x["repository"] + "@" + x["revision"])
endpoints_df["provider"] = endpoints_df["provider"].apply(lambda x: x["vendor"] + "/" + x["region"])
endpoints_df["compute"] = endpoints_df["compute"].apply(lambda x: x["instanceType"] + "·" + x["instanceSize"] + " [" + x["accelerator"] + "]")
endpoints_df["status"] = endpoints_df["status"].apply(lambda x: x["state"])
endpoints_df["minReplica"] = endpoints_df["compute"].apply(lambda x: x["scaling"]["minReplica"])
endpoints_df["maxReplica"] = endpoints_df["compute"].apply(lambda x: x["scaling"]["maxReplica"])
endpoints_df["createdAt"] = endpoints_df["status"].apply(lambda x: x["createdAt"])
endpoints_df["updatedAt"] = endpoints_df["status"].apply(lambda x: x["updatedAt"])
endpoints_df = endpoints_df[["name", "model", "provider", "compute", "status", "minReplica", "maxReplica", "createdAt", "updatedAt"]]
return gr.Dataframe.update(
value=endpoints_df
)
def update_endpoint(
hf_token_input,
endpoint_name_input,
min_node_selector,
max_node_selector,
instance_type,
):
payload ={
"compute": {
"instanceSize": instance_type.split("·")[0].split("[")[1].split("]")[0],
"instanceType": instance_type.split("·")[-1].strip(),
"scaling": {
"maxReplica": max_node_selector,
"minReplica": min_node_selector
}
}}
response = requests.put(
f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}",
headers = {
"Authorization": f"Bearer {hf_token_input.strip()}",
"Content-Type": "application/json",
},
data=payload,
)
if response.status_code == 401:
return "Invalid token"
elif response.status_code == 404:
return f"Error: {response.text}"
elif response.status_code == 202:
return f"Endpoint {endpoint_name_input} updated successfully."
else:
return f"something went wrong {response.status_code} = {response.text}"
with gr.Blocks() as demo2:
gr.Markdown("""
#### Your 🤗 Access Token <span style="color:red;">(Required)</span>
""")
hf_token_input = gr.Textbox(
show_label=False,
type="password"
)
with gr.Tab("Info"):
gr.Markdown("""
### All Deployed Endpoints
""")
endpoints_table = gr.Dataframe(
headers=["Endpoint Name", "Revision", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"],
col_count=(9, "fixed"),
)
endpoint_info_button = gr.Button(value="Get Info")
endpoint_info_button.click(
get_all_endpoints,
inputs=hf_token_input,
outputs=endpoints_table
)
# Deploy Endpoint
with gr.Tab("Deploy Endpoint"):
gr.Markdown(
"""
### <br><center style="color:green">(Deploy Your Model on 🤗 Endpoint)</center>
""")
gr.Markdown("""
#### Endpoint Name
""")
endpoint_name_input = gr.Textbox(
show_label=False
)
with gr.Row():
gr.Markdown("""
#### Cloud Provider
""")
gr.Markdown("""
#### Cloud Region
""")
with gr.Row():
provider_selector = gr.Dropdown(
choices=avaliable_providers(),
interactive=True,
show_label=False,
)
region_selector = gr.Dropdown(
[],
value="",
interactive=True,
show_label=False,
)
provider_selector.change(update_regions, inputs=provider_selector, outputs=region_selector)
with gr.Row():
gr.Markdown("""
#### Target Model
""")
gr.Markdown("""
#### Target Model Version(branch commit hash)
""")
with gr.Row():
repository_selector = gr.Textbox(
value="",
interactive=True,
show_label=False,
)
revision_selector = gr.Textbox(
value="",
interactive=True,
show_label=False,
)
with gr.Row():
gr.Markdown("""
#### Task
""")
gr.Markdown("""
#### Framework
""")
with gr.Row():
task_selector = gr.Textbox(
value="Custom",
interactive=True,
show_label=False,
)
framework_selector = gr.Textbox(
value="Custom",
interactive=True,
show_label=False,
)
gr.Markdown("""
#### Select Compute Instance Type
""")
compute_selector = gr.Dropdown(
[],
value="",
interactive=True,
show_label=False,
)
region_selector.change(update_compute_options, inputs=[provider_selector, region_selector], outputs=compute_selector)
with gr.Row():
gr.Markdown("""
#### Min Number of Nodes
""")
gr.Markdown("""
#### Max Number of Nodes
""")
gr.Markdown("""
#### Security Level
""")
with gr.Row():
min_node_selector = gr.Number(
value=1,
interactive=True,
show_label=False,
)
max_node_selector = gr.Number(
value=1,
interactive=True,
show_label=False,
)
security_selector = gr.Radio(
choices=["Protected", "Public"],
value="Protected",
interactive=True,
show_label=False,
)
submit_button = gr.Button(
value="Submit",
)
status_txt = gr.Textbox(
value="status",
interactive=False
)
submit_button.click(
submit,
inputs=[
hf_token_input,
endpoint_name_input,
provider_selector,
region_selector,
repository_selector,
revision_selector,
task_selector,
framework_selector,
compute_selector,
min_node_selector,
max_node_selector,
security_selector],
outputs=status_txt)
# Update Endpoint
with gr.Tab("Update Endpoint"):
gr.Markdown("""
### <br><center style="color:green">(Update 🔁 Endpoint)</center>
""")
with gr.Row():
gr.Markdown("""
#### Cloud Provider
""")
gr.Markdown("""
#### Cloud Region
""")
with gr.Row():
update_provider_selector = gr.Dropdown(
choices=avaliable_providers(),
interactive=True,
show_label=False,
)
update_region_selector = gr.Dropdown(
[],
value="",
interactive=True,
show_label=False,
)
with gr.Row():
gr.Markdown("""
#### Endpoint Name
""")
gr.Markdown("""
#### Instance Type
""")
with gr.Row():
update_endpoint_name_input = gr.Textbox(
show_label=False
)
update_compute_selector = gr.Dropdown(
[],
value="",
interactive=True,
show_label=False,
)
with gr.Row():
gr.Markdown("""
#### Min Number of Nodes
""")
gr.Markdown("""
#### Max Number of Nodes
""")
with gr.Row():
update_min_node_input = gr.Number(
value=1,
interactive=True,
show_label=False,
)
update_max_node_input = gr.Number(
value=1,
interactive=True,
show_label=False,
)
update_button = gr.Button(
value="Update",
)
update_status_txt = gr.Textbox(
value="status",
interactive=False
)
update_provider_selector.change(update_regions, inputs=update_provider_selector, outputs=update_region_selector)
update_region_selector.change(update_compute_options, inputs=[update_provider_selector, update_region_selector], outputs=update_compute_selector)
update_button.click(
update_endpoint,
inputs=[
hf_token_input,
update_endpoint_name_input,
update_min_node_input,
update_max_node_input,
update_compute_selector
],
outputs=update_status_txt
)
# Delete Endpoint
with gr.Tab("Delete Endpoint"):
gr.Markdown("""
### <br><center style="color:green">(Delete 🗑️ Endpoint)</center>
""")
gr.Markdown("""
#### Endpoint Name
""")
delete_endpoint_name_input = gr.Textbox(
show_label=False
)
delete_button = gr.Button(
value="Delete",
)
delete_status_txt = gr.Textbox(
value="status",
interactive=False
)
delete_button.click(
delete_endpoint,
inputs=[
hf_token_input,
delete_endpoint_name_input
],
outputs=delete_status_txt
)
# Pricing Table
with gr.Tab("Pricing Table"):
gr.Markdown("""
### <br><center style="color:green">(Instance Pricing Table)</center>
#### Pricing Table(CPU) - 2023/2/22
""")
gr.Dataframe(
headers=["provider", "size", "$/h", "vCPUs", "Memory", "Architecture"],
datatype=["str", "str", "str", "number", "str", "str"],
row_count=8,
col_count=(6, "fixed"),
value=[
["aws", "small", "$0.06", 1, "2GB", "Intel Xeon - Ice Lake"],
["aws", "medium", "$0.12", 2, "4GB", "Intel Xeon - Ice Lake"],
["aws", "large", "$0.24", 4, "8GB", "Intel Xeon - Ice Lake"],
["aws", "xlarge", "$0.48", 8, "16GB", "Intel Xeon - Ice Lake"],
["azure", "small", "$0.06", 1, "2GB", "Intel Xeon"],
["azure", "medium", "$0.12", 2, "4GB", "Intel Xeon"],
["azure", "large", "$0.24", 4, "8GB", "Intel Xeon"],
["azure", "xlarge", "$0.48", 8, "16GB", "Intel Xeon"],
]
)
gr.Markdown("""
#### Pricing Table(GPU) - 2023/2/22
""")
gr.Dataframe(
headers=["provider", "size", "$/h", "GPUs", "Memory", "Architecture"],
datatype=["str", "str", "str", "number", "str", "str"],
row_count=6,
col_count=(6, "fixed"),
value=[
["aws", "small", "$0.60", 1, "14GB", "NVIDIA T4"],
["aws", "medium", "$1.30", 1, "24GB", "NVIDIA A10G"],
["aws", "large", "$4.50", 4, "56GB", "NVIDIA T4"],
["aws", "xlarge", "$6.50", 1, "80GB", "NVIDIA A100"],
["aws", "xxlarge", "$7.00", 4, "96GB", "NVIDIA A10G"],
["aws", "xxxlarge", "$45.0", 8, "640GB", "NVIDIA A100"],
]
)
demo2.launch()