Spaces:
Runtime error
Runtime error
File size: 3,882 Bytes
ec0c335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
"""Call API providers."""
import os
import random
import time
from fastchat.utils import build_logger
from fastchat.constants import WORKER_API_TIMEOUT
logger = build_logger("gradio_web_server", "gradio_web_server.log")
def openai_api_stream_iter(
model_name,
messages,
temperature,
top_p,
max_new_tokens,
api_base=None,
api_key=None,
):
import openai
is_azure = False
if "azure" in model_name:
is_azure = True
openai.api_type = "azure"
openai.api_version = "2023-07-01-preview"
else:
openai.api_type = "open_ai"
openai.api_version = None
openai.api_base = api_base or "https://api.openai.com/v1"
openai.api_key = api_key or os.environ["OPENAI_API_KEY"]
if model_name == "gpt-4-turbo":
model_name = "gpt-4-1106-preview"
# Make requests
gen_params = {
"model": model_name,
"prompt": messages,
"temperature": temperature,
"top_p": top_p,
"max_new_tokens": max_new_tokens,
}
logger.info(f"==== request ====\n{gen_params}")
if is_azure:
res = openai.ChatCompletion.create(
engine=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
stream=True,
)
else:
res = openai.ChatCompletion.create(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
stream=True,
)
text = ""
for chunk in res:
if len(chunk["choices"]) > 0:
text += chunk["choices"][0]["delta"].get("content", "")
data = {
"text": text,
"error_code": 0,
}
yield data
def anthropic_api_stream_iter(model_name, prompt, temperature, top_p, max_new_tokens):
import anthropic
c = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
# Make requests
gen_params = {
"model": model_name,
"prompt": prompt,
"temperature": temperature,
"top_p": top_p,
"max_new_tokens": max_new_tokens,
}
logger.info(f"==== request ====\n{gen_params}")
res = c.completions.create(
prompt=prompt,
stop_sequences=[anthropic.HUMAN_PROMPT],
max_tokens_to_sample=max_new_tokens,
temperature=temperature,
top_p=top_p,
model=model_name,
stream=True,
)
text = ""
for chunk in res:
text += chunk.completion
data = {
"text": text,
"error_code": 0,
}
yield data
def init_palm_chat(model_name):
import vertexai # pip3 install google-cloud-aiplatform
from vertexai.preview.language_models import ChatModel
project_id = os.environ["GCP_PROJECT_ID"]
location = "us-central1"
vertexai.init(project=project_id, location=location)
chat_model = ChatModel.from_pretrained(model_name)
chat = chat_model.start_chat(examples=[])
return chat
def palm_api_stream_iter(chat, message, temperature, top_p, max_new_tokens):
parameters = {
"temperature": temperature,
"top_p": top_p,
"max_output_tokens": max_new_tokens,
}
gen_params = {
"model": "palm-2",
"prompt": message,
}
gen_params.update(parameters)
logger.info(f"==== request ====\n{gen_params}")
response = chat.send_message(message, **parameters)
content = response.text
pos = 0
while pos < len(content):
# This is a fancy way to simulate token generation latency combined
# with a Poisson process.
pos += random.randint(10, 20)
time.sleep(random.expovariate(50))
data = {
"text": content[:pos],
"error_code": 0,
}
yield data
|