import gradio as gr import json import requests import pandas as pd def update_regions(provider): available_regions = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region" response = requests.get(endpoint_url, headers=headers) for region in response.json()['items']: if region['status'] == 'available': available_regions.append(f"{region['region']}/{region['label']}") return gr.Dropdown.update( choices=available_regions, value=available_regions[0] if len(available_regions) > 0 else None ) def update_compute_options(provider, region): region = region.split("/")[0] available_compute_choices = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region/{region}/compute" print(endpoint_url) response = requests.get(endpoint_url, headers=headers) for compute in response.json()['items']: if compute['status'] == 'available': accelerator = compute['accelerator'] numAccelerators = compute['numAccelerators'] memoryGb = compute['memoryGb'].replace("Gi", "GB") architecture = compute['architecture'] instanceType = compute['instanceType'] type = f"{numAccelerators}vCPU {memoryGb} · {architecture}" if accelerator == "cpu" else f"{numAccelerators}x {architecture}" available_compute_choices.append( f"{compute['accelerator'].upper()} [{compute['instanceSize']}] · {type} · {instanceType}" ) return gr.Dropdown.update( choices=available_compute_choices, value=available_compute_choices[0] if len(available_compute_choices) > 0 else None ) def submit( hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector ): compute_resources = compute_selector.split("·") accelerator = compute_resources[0][:3].strip() size_l_index = compute_resources[0].index("[") - 1 size_r_index = compute_resources[0].index("]") size = compute_resources[0][size_l_index : size_r_index].strip() type = compute_resources[-1].strip() payload = { "accountId": repository_selector.split("/")[0], "compute": { "accelerator": accelerator.lower(), "instanceSize": size[1:], "instanceType": type, "scaling": { "maxReplica": int(max_node_selector), "minReplica": int(min_node_selector) } }, "model": { "framework": "custom", "image": { "huggingface": {} }, "repository": repository_selector.lower(), "revision": revision_selector, "task": task_selector.lower() }, "name": endpoint_name_input.strip(), "provider": { "region": region_selector.split("/")[0].lower(), "vendor": provider_selector.lower() }, "type": security_selector.lower() } payload = json.dumps(payload) print(f"Payload: {payload}") headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/endpoint" print(f"Endpoint: {endpoint_url}") response = requests.post(endpoint_url, headers=headers, data=payload) if response.status_code == 400: return f"{response.text}. Malformed data in {payload}" elif response.status_code == 401: return "Invalid token" elif response.status_code == 409: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} created successfully on {provider_selector.lower()} using {repository_selector.lower()}@{revision_selector}. \n Please check out the progress at https://ui.endpoints.huggingface.co/endpoints." else: return f"something went wrong {response.status_code} = {response.text}" def delete_endpoint( hf_token_input, endpoint_name_input ): response = requests.delete( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } ) if response.status_code == 401: return "Invalid token" elif response.status_code == 404: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} deleted successfully." else: return f"something went wrong {response.status_code} = {response.text}" def get_all_endpoints( hf_token_input, ): response = requests.get( f"https://api.endpoints.huggingface.cloud/endpoint", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", }) if response.status_code == 401: return "Invalid token" elif response.status_code == 200: endpoints_json = response.json() print(endpoints_json) endpoints_df = pd.DataFrame(endpoints_json["items"]) endpoints_df = endpoints_df[["name", "model", "provider", "compute", "status"]] endpoints_df["model"] = endpoints_df["model"].apply(lambda x: x["repository"] + "@" + x["revision"]) endpoints_df["provider"] = endpoints_df["provider"].apply(lambda x: x["vendor"] + "/" + x["region"]) endpoints_df["compute"] = endpoints_df["compute"].apply(lambda x: x["instanceType"] + "·" + x["instanceSize"] + " [" + x["accelerator"] + "]") endpoints_df["status"] = endpoints_df["status"].apply(lambda x: x["state"]) endpoints_df["minReplica"] = endpoints_df["compute"].apply(lambda x: x["scaling"]["minReplica"]) endpoints_df["maxReplica"] = endpoints_df["compute"].apply(lambda x: x["scaling"]["maxReplica"]) endpoints_df["createdAt"] = endpoints_df["status"].apply(lambda x: x["createdAt"]) endpoints_df["updatedAt"] = endpoints_df["status"].apply(lambda x: x["updatedAt"]) endpoints_df = endpoints_df[["name", "model", "provider", "compute", "status", "minReplica", "maxReplica", "createdAt", "updatedAt"]] return gr.Dataframe.update( value=endpoints_df ) def update_endpoint( hf_token_input, endpoint_name_input, min_node_selector, max_node_selector, instance_type, ): payload ={ "compute": { "instanceSize": instance_type.split("·")[0].split("[")[1].split("]")[0], "instanceType": instance_type.split("·")[-1].strip(), "scaling": { "maxReplica": max_node_selector, "minReplica": min_node_selector } }} response = requests.put( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", }, data=payload, ) if response.status_code == 401: return "Invalid token" elif response.status_code == 404: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} updated successfully." else: return f"something went wrong {response.status_code} = {response.text}" with gr.Blocks() as interface: gr.Markdown(""" #### Your 🤗 Access Token (Required) """) hf_token_input = gr.Textbox( show_label=False, type="password" ) # Get All Endpoints Info with gr.Tab("Info"): gr.Markdown(""" ### All Deployed Endpoints """) endpoints_table = gr.Dataframe( headers=["Endpoint Name", "Revision", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"], col_count=(9, "fixed"), ) endpoint_info_button = gr.Button(value="Get Info") # Deploy Endpoint with gr.Tab("Deploy Endpoint"): gr.Markdown( """ ###
(Deploy Your Model on 🤗 Endpoint)
""") gr.Markdown(""" #### Endpoint Name """) endpoint_name_input = gr.Textbox( show_label=False ) with gr.Row(): gr.Markdown(""" #### Cloud Provider """) gr.Markdown(""" #### Cloud Region """) with gr.Row(): provider_selector = gr.Dropdown( choices=["aws", "azure"], value="", interactive=True, show_label=False, ) region_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Target Model """) gr.Markdown(""" #### Target Model Version(branch commit hash) """) with gr.Row(): repository_selector = gr.Textbox( value="", interactive=True, show_label=False, ) revision_selector = gr.Textbox( value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Task """) gr.Markdown(""" #### Framework """) with gr.Row(): task_selector = gr.Textbox( value="Custom", interactive=True, show_label=False, ) framework_selector = gr.Textbox( value="Custom", interactive=True, show_label=False, ) gr.Markdown(""" #### Select Compute Instance Type """) compute_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Min Number of Nodes """) gr.Markdown(""" #### Max Number of Nodes """) gr.Markdown(""" #### Security Level """) with gr.Row(): min_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) max_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) security_selector = gr.Radio( choices=["Protected", "Public"], value="Protected", interactive=True, show_label=False, ) submit_button = gr.Button( value="Submit", ) status_txt = gr.Textbox( value="status", interactive=False ) # Update Endpoint with gr.Tab("Update Endpoint"): gr.Markdown(""" ###
(Update 🔁 Endpoint)
""") with gr.Row(): gr.Markdown(""" #### Cloud Provider """) gr.Markdown(""" #### Cloud Region """) with gr.Row(): update_provider_selector = gr.Dropdown( choices=["aws", "azure"], value="", interactive=True, show_label=False, ) update_region_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Endpoint Name """) gr.Markdown(""" #### Instance Type """) with gr.Row(): update_endpoint_name_input = gr.Textbox( show_label=False ) update_compute_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Min Number of Nodes """) gr.Markdown(""" #### Max Number of Nodes """) with gr.Row(): update_min_node_input = gr.Number( value=1, interactive=True, show_label=False, ) update_max_node_input = gr.Number( value=1, interactive=True, show_label=False, ) update_button = gr.Button( value="Update", ) update_status_txt = gr.Textbox( value="status", interactive=False ) # Delete Endpoint with gr.Tab("Delete Endpoint"): gr.Markdown(""" ###
(Delete 🗑️ Endpoint)
""") gr.Markdown(""" #### Endpoint Name """) delete_endpoint_name_input = gr.Textbox( show_label=False ) delete_button = gr.Button( value="Delete", ) delete_status_txt = gr.Textbox( value="status", interactive=False ) # Pricing Table with gr.Tab("Pricing Table"): gr.Markdown(""" ###
(Instance Pricing Table)
#### Pricing Table(CPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "vCPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=8, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.06", 1, "2GB", "Intel Xeon - Ice Lake"], ["aws", "medium", "$0.12", 2, "4GB", "Intel Xeon - Ice Lake"], ["aws", "large", "$0.24", 4, "8GB", "Intel Xeon - Ice Lake"], ["aws", "xlarge", "$0.48", 8, "16GB", "Intel Xeon - Ice Lake"], ["azure", "small", "$0.06", 1, "2GB", "Intel Xeon"], ["azure", "medium", "$0.12", 2, "4GB", "Intel Xeon"], ["azure", "large", "$0.24", 4, "8GB", "Intel Xeon"], ["azure", "xlarge", "$0.48", 8, "16GB", "Intel Xeon"], ] ) gr.Markdown(""" #### Pricing Table(GPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "GPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=6, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.60", 1, "14GB", "NVIDIA T4"], ["aws", "medium", "$1.30", 1, "24GB", "NVIDIA A10G"], ["aws", "large", "$4.50", 4, "56GB", "NVIDIA T4"], ["aws", "xlarge", "$6.50", 1, "80GB", "NVIDIA A100"], ["aws", "xxlarge", "$7.00", 4, "96GB", "NVIDIA A10G"], ["aws", "xxxlarge", "$45.0", 8, "640GB", "NVIDIA A100"], ] ) # Info Tab Events endpoint_info_button.click( get_all_endpoints, inputs=hf_token_input, outputs=endpoints_table ) # Deploy Tab Events provider_selector.change(update_regions, inputs=provider_selector, outputs=region_selector) region_selector.change(update_compute_options, inputs=[provider_selector, region_selector], outputs=compute_selector) submit_button.click( submit, inputs=[ hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector], outputs=status_txt) # Update Tab Events update_provider_selector.change(update_regions, inputs=update_provider_selector, outputs=update_region_selector) update_region_selector.change(update_compute_options, inputs=[update_provider_selector, update_region_selector], outputs=update_compute_selector) update_button.click( update_endpoint, inputs=[ hf_token_input, update_endpoint_name_input, update_min_node_input, update_max_node_input, update_compute_selector ], outputs=update_status_txt ) # Delete Tab Events delete_button.click( delete_endpoint, inputs=[ hf_token_input, delete_endpoint_name_input ], outputs=delete_status_txt ) interface.launch()