Spaces:
Runtime error
Runtime error
File size: 3,631 Bytes
c166553 378cf08 8352cfc c166553 03c472b c166553 03c472b c166553 a37fc8e c166553 a37fc8e c166553 5b6465e c166553 a37fc8e c166553 136552b c166553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import json
import os
import shutil
import requests
import gradio as gr
from huggingface_hub import Repository, InferenceClient
HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/WizardLM/WizardCoder-Python-34B-V1.0"
BOT_NAME = "Wizard"
STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
EXAMPLES = [
["what are the benefits of programming in python?"],
["explain binary search in java?"],
]
client = InferenceClient(
API_URL,
headers={"Authorization": f"Bearer {HF_TOKEN}"},
)
def format_prompt(message, history, system_prompt):
prompt = ""
if system_prompt:
prompt += f"System: {system_prompt}\n"
for user_prompt, bot_response in history:
prompt += f"User: {user_prompt}\n"
prompt += f"Wizard: {bot_response}\n" # Response already contains "Wizard: "
prompt += f"""User: {message}
Wizard:"""
return prompt
seed = 42
def generate(
prompt, history, system_prompt="", temperature=0.4, max_new_tokens=800, top_p=0.95, repetition_penalty=1.5,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
global seed
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
stop_sequences=STOP_SEQUENCES,
do_sample=True,
seed=seed,
)
seed = seed + 1
formatted_prompt = format_prompt(prompt, history, system_prompt)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
for stop_str in STOP_SEQUENCES:
if output.endswith(stop_str):
output = output[:-len(stop_str)]
output = output.rstrip()
yield output
yield output
return output
additional_inputs=[
gr.Textbox("", label="Optional system prompt"),
gr.Slider(
label="Temperature",
value=0.4,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=800,
minimum=0,
maximum=8192,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.5,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
def vote(data: gr.LikeData):
if data.liked:
print("You upvoted this response: " + data.value)
else:
print("You downvoted this response: " + data.value)
chatbot = gr.Chatbot(avatar_images=('user.png', 'bot.png'),bubble_full_width = False)
chat_interface = gr.ChatInterface(
generate,
chatbot = chatbot,
examples=EXAMPLES,
additional_inputs=additional_inputs,
)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown(
# Code bot
)
chatbot.like(vote, None, None)
chat_interface.render()
demo.queue(concurrency_count=100, api_open=False).launch(show_api=False) |