Spaces:
Build error
Build error
import copy | |
import json | |
import re | |
import requests | |
import uuid | |
# from curl_cffi import requests | |
from tclogger import logger | |
from transformers import AutoTokenizer | |
from constants.models import ( | |
MODEL_MAP, | |
STOP_SEQUENCES_MAP, | |
TOKEN_LIMIT_MAP, | |
TOKEN_RESERVED, | |
) | |
from constants.envs import PROXIES | |
from constants.headers import ( | |
REQUESTS_HEADERS, | |
HUGGINGCHAT_POST_HEADERS, | |
HUGGINGCHAT_SETTINGS_POST_DATA, | |
) | |
from messagers.message_outputer import OpenaiStreamOutputer | |
from messagers.message_composer import MessageComposer | |
class TokenChecker: | |
def __init__(self, input_str: str, model: str): | |
self.input_str = input_str | |
if model in MODEL_MAP.keys(): | |
self.model = model | |
else: | |
self.model = "mixtral-8x7b" | |
self.model_fullname = MODEL_MAP[self.model] | |
if self.model == "llama3-70b": | |
# As original llama3 repo is gated and requires auth, | |
# I use NousResearch's version as a workaround | |
self.tokenizer = AutoTokenizer.from_pretrained( | |
"NousResearch/Meta-Llama-3-70B" | |
) | |
else: | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_fullname) | |
def count_tokens(self): | |
token_count = len(self.tokenizer.encode(self.input_str)) | |
logger.note(f"Prompt Token Count: {token_count}") | |
return token_count | |
def check_token_limit(self): | |
token_limit = TOKEN_LIMIT_MAP[self.model] | |
token_redundancy = int(token_limit - TOKEN_RESERVED - self.count_tokens()) | |
if token_redundancy <= 0: | |
raise ValueError(f"Prompt exceeded token limit: {token_limit}") | |
return True | |
class HuggingchatRequester: | |
def __init__(self, model: str): | |
if model in MODEL_MAP.keys(): | |
self.model = model | |
else: | |
self.model = "mixtral-8x7b" | |
self.model_fullname = MODEL_MAP[self.model] | |
def get_hf_chat_id(self): | |
request_url = "https://huggingface.co/chat/settings" | |
request_body = copy.deepcopy(HUGGINGCHAT_SETTINGS_POST_DATA) | |
extra_body = { | |
"activeModel": self.model_fullname, | |
} | |
request_body.update(extra_body) | |
logger.note(f"> hf-chat ID:", end=" ") | |
res = requests.post( | |
request_url, | |
headers=HUGGINGCHAT_POST_HEADERS, | |
json=request_body, | |
proxies=PROXIES, | |
timeout=10, | |
) | |
self.hf_chat_id = res.cookies.get("hf-chat") | |
if self.hf_chat_id: | |
logger.success(f"[{self.hf_chat_id}]") | |
else: | |
logger.warn(f"[{res.status_code}]") | |
logger.warn(res.text) | |
raise ValueError("Failed to get hf-chat ID!") | |
def get_conversation_id(self, system_prompt: str = ""): | |
request_url = "https://huggingface.co/chat/conversation" | |
request_headers = HUGGINGCHAT_POST_HEADERS | |
extra_headers = { | |
"Cookie": f"hf-chat={self.hf_chat_id}", | |
} | |
request_headers.update(extra_headers) | |
request_body = { | |
"model": self.model_fullname, | |
"preprompt": system_prompt, | |
} | |
logger.note(f"> Conversation ID:", end=" ") | |
res = requests.post( | |
request_url, | |
headers=request_headers, | |
json=request_body, | |
proxies=PROXIES, | |
timeout=10, | |
) | |
if res.status_code == 200: | |
conversation_id = res.json()["conversationId"] | |
logger.success(f"[{conversation_id}]") | |
else: | |
logger.warn(f"[{res.status_code}]") | |
raise ValueError("Failed to get conversation ID!") | |
self.conversation_id = conversation_id | |
return conversation_id | |
def get_last_message_id(self): | |
request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}/__data.json?x-sveltekit-invalidated=11" | |
request_headers = HUGGINGCHAT_POST_HEADERS | |
extra_headers = { | |
"Cookie": f"hf-chat={self.hf_chat_id}", | |
} | |
request_headers.update(extra_headers) | |
logger.note(f"> Message ID:", end=" ") | |
message_id = None | |
res = requests.post( | |
request_url, | |
headers=request_headers, | |
proxies=PROXIES, | |
timeout=10, | |
) | |
if res.status_code == 200: | |
data = res.json()["nodes"][1]["data"] | |
# find the last element which matches the format of uuid4 | |
uuid_pattern = re.compile( | |
r"^[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}$" | |
) | |
for item in data: | |
if type(item) == str and uuid_pattern.match(item): | |
message_id = item | |
logger.success(f"[{message_id}]") | |
else: | |
logger.warn(f"[{res.status_code}]") | |
raise ValueError("Failed to get conversation ID!") | |
return message_id | |
def log_request(self, url, method="GET"): | |
logger.note(f"> {method}:", end=" ") | |
logger.mesg(f"{url}", end=" ") | |
def log_response( | |
self, res: requests.Response, stream=False, iter_lines=False, verbose=False | |
): | |
status_code = res.status_code | |
status_code_str = f"[{status_code}]" | |
if status_code == 200: | |
logger_func = logger.success | |
else: | |
logger_func = logger.warn | |
logger.enter_quiet(not verbose) | |
logger_func(status_code_str) | |
if status_code != 200: | |
logger_func(res.text) | |
if stream: | |
if not iter_lines: | |
return | |
for line in res.iter_lines(): | |
line = line.decode("utf-8") | |
line = re.sub(r"^data:\s*", "", line) | |
line = line.strip() | |
if line: | |
try: | |
data = json.loads(line, strict=False) | |
msg_type = data.get("type") | |
if msg_type == "status": | |
msg_status = data.get("status") | |
elif msg_type == "stream": | |
content = data.get("token", "") | |
logger_func(content, end="") | |
elif msg_type == "finalAnswer": | |
full_content = data.get("text") | |
logger.success("\n[Finished]") | |
break | |
else: | |
pass | |
except Exception as e: | |
logger.warn(e) | |
else: | |
logger_func(res.json()) | |
logger.exit_quiet(not verbose) | |
def chat_completions(self, messages: list[dict], iter_lines=False, verbose=False): | |
composer = MessageComposer(model=self.model) | |
system_prompt, input_prompt = composer.decompose_to_system_and_input_prompt( | |
messages | |
) | |
checker = TokenChecker(input_str=system_prompt + input_prompt, model=self.model) | |
checker.check_token_limit() | |
logger.enter_quiet(not verbose) | |
self.get_hf_chat_id() | |
self.get_conversation_id(system_prompt=system_prompt) | |
message_id = self.get_last_message_id() | |
request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}" | |
request_headers = copy.deepcopy(HUGGINGCHAT_POST_HEADERS) | |
extra_headers = { | |
"Content-Type": "text/event-stream", | |
"Referer": request_url, | |
"Cookie": f"hf-chat={self.hf_chat_id}", | |
} | |
request_headers.update(extra_headers) | |
request_body = { | |
"files": [], | |
"id": message_id, | |
"inputs": input_prompt, | |
"is_continue": False, | |
"is_retry": False, | |
"web_search": False, | |
} | |
self.log_request(request_url, method="POST") | |
logger.exit_quiet(not verbose) | |
res = requests.post( | |
request_url, | |
headers=request_headers, | |
json=request_body, | |
proxies=PROXIES, | |
stream=True, | |
) | |
self.log_response(res, stream=True, iter_lines=iter_lines, verbose=verbose) | |
return res | |
class HuggingchatStreamer: | |
def __init__(self, model: str): | |
if model in MODEL_MAP.keys(): | |
self.model = model | |
else: | |
self.model = "mixtral-8x7b" | |
self.model_fullname = MODEL_MAP[self.model] | |
self.message_outputer = OpenaiStreamOutputer(model=self.model) | |
def chat_response(self, messages: list[dict], verbose=False): | |
requester = HuggingchatRequester(model=self.model) | |
return requester.chat_completions( | |
messages=messages, iter_lines=False, verbose=verbose | |
) | |
def chat_return_generator(self, stream_response: requests.Response, verbose=False): | |
is_finished = False | |
for line in stream_response.iter_lines(): | |
line = line.decode("utf-8") | |
line = re.sub(r"^data:\s*", "", line) | |
line = line.strip() | |
if not line: | |
continue | |
try: | |
data = json.loads(line, strict=False) | |
msg_type = data.get("type") | |
if msg_type == "status": | |
msg_status = data.get("status") | |
continue | |
elif msg_type == "stream": | |
content_type = "Completions" | |
content = data.get("token", "") | |
if verbose: | |
logger.success(content, end="") | |
elif msg_type == "finalAnswer": | |
content_type = "Finished" | |
content = "" | |
full_content = data.get("text") | |
if verbose: | |
logger.success("\n[Finished]") | |
is_finished = True | |
break | |
else: | |
continue | |
except Exception as e: | |
logger.warn(e) | |
output = self.message_outputer.output( | |
content=content, content_type=content_type | |
) | |
yield output | |
if not is_finished: | |
yield self.message_outputer.output(content="", content_type="Finished") | |
def chat_return_dict(self, stream_response: requests.Response): | |
final_output = self.message_outputer.default_data.copy() | |
final_output["choices"] = [ | |
{ | |
"index": 0, | |
"finish_reason": "stop", | |
"message": {"role": "assistant", "content": ""}, | |
} | |
] | |
final_content = "" | |
for item in self.chat_return_generator(stream_response): | |
try: | |
data = json.loads(item) | |
delta = data["choices"][0]["delta"] | |
delta_content = delta.get("content", "") | |
if delta_content: | |
final_content += delta_content | |
except Exception as e: | |
logger.warn(e) | |
final_output["choices"][0]["message"]["content"] = final_content.strip() | |
return final_output | |
if __name__ == "__main__": | |
# model = "command-r-plus" | |
model = "llama3-70b" | |
# model = "zephyr-141b" | |
streamer = HuggingchatStreamer(model=model) | |
messages = [ | |
{ | |
"role": "system", | |
"content": "You are an LLM developed by CloseAI.\nYour name is Hansimov-Copilot.", | |
}, | |
{"role": "user", "content": "Hello, what is your role?"}, | |
{"role": "assistant", "content": "I am an LLM."}, | |
{"role": "user", "content": "What is your name?"}, | |
] | |
streamer.chat_response(messages=messages) | |
# HF_ENDPOINT=https://hf-mirror.com python -m networks.huggingchat_streamer | |