Spaces:

JeffJing
/

ZookChatBot

Runtime error

App Files Files Community

ZookChatBot / steamship /utils /huggingface_helper.py

JeffJing

Upload 195 files

b115d50 over 2 years ago

raw

history blame contribute delete

3.65 kB

	"""This class is a helper for plugins to use models hosted on Hugging Face.

	It uses asyncio parallelism to make many http requests simultaneously.
	"""

	import asyncio
	import logging
	import time
	from http import HTTPStatus
	from typing import List, Optional

	import aiohttp
	from aiohttp import ClientTimeout

	from steamship import Block, SteamshipError


	async def _model_call(
	session, text: str, api_url, headers, additional_params: dict = None, use_gpu: bool = False
	) -> Optional[list]:
	additional_params = additional_params or {}
	json_input = {
	"inputs": text or "",
	"parameters": additional_params,
	"options": {"use_gpu": use_gpu, "wait_for_model": False},
	}
	ok_response, nok_response = None, None

	max_error_retries = 3

	"""
	Hugging Face returns an error that says that the model is currently loading
	if it believes you have 'too many' requests simultaneously, so the logic retries in this case, but fails on
	other errors.
	"""
	tries = 0
	while tries <= max_error_retries:
	async with session.post(api_url, headers=headers, json=json_input) as response:
	if response.status == HTTPStatus.OK and response.content_type == "application/json":
	ok_response = await response.json()
	logging.info(ok_response)
	return ok_response
	else:
	nok_response = await response.text()
	if "is currently loading" not in nok_response:
	logging.info(
	f'Received text response "{nok_response}" for input text "{text}" [attempt {tries}/{max_error_retries}]'
	)
	tries += 1
	else:
	await asyncio.sleep(1)
	if ok_response is None:
	raise SteamshipError(
	message="Unable to query Hugging Face model",
	internal_message=f"HF returned error: {nok_response} after {tries} attempts",
	)
	return ok_response


	async def _model_calls(
	texts: List[str],
	api_url: str,
	headers,
	timeout_seconds: int,
	additional_params: dict = None,
	use_gpu: bool = False,
	) -> List[list]:
	async with aiohttp.ClientSession(timeout=ClientTimeout(total=timeout_seconds)) as session:
	tasks = []
	for text in texts:
	tasks.append(
	asyncio.ensure_future(
	_model_call(
	session,
	text,
	api_url,
	headers=headers,
	additional_params=additional_params,
	use_gpu=use_gpu,
	)
	)
	)

	return await asyncio.gather(*tasks)


	def get_huggingface_results(
	blocks: List[Block],
	hf_model_path: str,
	hf_bearer_token: str,
	additional_params: dict = None,
	timeout_seconds: int = 30,
	use_gpu: bool = False,
	) -> List[list]:
	api_url = f"https://api-inference.huggingface.co/models/{hf_model_path}"
	headers = {"Authorization": f"Bearer {hf_bearer_token}"}
	start_time = time.perf_counter()
	results = asyncio.run(
	_model_calls(
	[block.text for block in blocks],
	api_url,
	headers,
	timeout_seconds=timeout_seconds,
	additional_params=additional_params,
	use_gpu=use_gpu,
	)
	)
	total_time = time.perf_counter() - start_time
	logging.info(
	f"Completed {len(blocks)} blocks in {total_time} seconds. ({float(len(blocks)) / total_time} bps)"
	)
	return results