File size: 1,203 Bytes
a01c0de
65e0c01
4888114
 
 
 
 
 
 
 
 
b5c1c64
4888114
 
 
 
5086c79
4888114
 
 
5086c79
86e8bd8
 
849b34b
4888114
 
86e8bd8
5086c79
849b34b
4888114
5086c79
a374c78
4888114
5086c79
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import time, json
import gradio as gr
import http.client

url = os.environ['cnvrg_url']
token = os.environ['cnvrg_token']

def generate_output(prompt,maxtokens=20):
    payload = f"{maxtokens}#{prompt}"
    conn = http.client.HTTPSConnection(url, 443)
    payload = '{"input_params": "'+payload+'"}'
    headers = {
        'Cnvrg-Api-Key': token,
        'Content-Type': "application/json"
        }
    tic = time.time()
    conn.request("POST", "/api/v1/endpoints/nkjpkfaw14saekgsvudy", payload, headers)
    res = conn.getresponse()
    data = res.read()
    toc = time.time()
    raw_data = data.decode("utf-8")
    json_data = json.loads(raw_data)
    return json_data["prediction"], str(round(toc-tic,3))+"s"

input_text = gr.inputs.Textbox(label="Enter Prompt")
slider = gr.inputs.Slider(minimum=1, maximum=75, step=1, default=20, label="Max New Tokens")
output_text = gr.outputs.Textbox(label="Response")
time_text = gr.outputs.Label(label="Time Taken")

title = "SPR - Cnvrg.io | Serving"
description = "Model - Falcon 7B (Instruct) - FP32"

gr.Interface(fn=generate_output, inputs=[input_text,slider], outputs=[output_text,time_text], title=title, description=description).launch()