diff --git a/steamship/.DS_Store b/steamship/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..72d865e5be20ae5390397f7225bdc374048d95fc Binary files /dev/null and b/steamship/.DS_Store differ diff --git a/steamship/__init__.py b/steamship/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f356f5676906ff2fb0b7482cb549cc8a7078276f --- /dev/null +++ b/steamship/__init__.py @@ -0,0 +1,57 @@ +from importlib.metadata import PackageNotFoundError, version # pragma: no cover + +try: + # Change here if project is renamed and does not equal the package name + dist_name = __name__ + __version__ = version(dist_name) +except PackageNotFoundError: # pragma: no cover + __version__ = "unknown" +finally: + del version, PackageNotFoundError + +from .base import ( + Configuration, + MimeTypes, + RuntimeEnvironments, + SteamshipError, + Task, + TaskState, + check_environment, +) +from .data import ( + Block, + DocTag, + EmbeddingIndex, + File, + Package, + PackageInstance, + PackageVersion, + PluginInstance, + PluginVersion, + Tag, + Workspace, +) + +from .client import Steamship # isort:skip + +__all__ = [ + "Steamship", + "Configuration", + "SteamshipError", + "MimeTypes", + "Package", + "PackageInstance", + "PackageVersion", + "File", + "Task", + "TaskState", + "Block", + "Tag", + "Workspace", + "PluginInstance", + "PluginVersion", + "DocTag", + "EmbeddingIndex", + "check_environment", + "RuntimeEnvironments", +] diff --git a/steamship/__pycache__/__init__.cpython-39.pyc b/steamship/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2592f531aa561c0cedba40ed893e8d78ee46e517 Binary files /dev/null and b/steamship/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/base/__init__.py b/steamship/base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0b81945821df65923697b57a70ae6642eeab8d8 --- /dev/null +++ b/steamship/base/__init__.py @@ -0,0 +1,15 @@ +from .configuration import Configuration +from .environments import RuntimeEnvironments, check_environment +from .error import SteamshipError +from .mime_types import MimeTypes +from .tasks import Task, TaskState + +__all__ = [ + "Configuration", + "SteamshipError", + "Task", + "TaskState", + "MimeTypes", + "RuntimeEnvironments", + "check_environment", +] diff --git a/steamship/base/__pycache__/__init__.cpython-39.pyc b/steamship/base/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13223a779d928112a77e9839ecf000b8c3095e32 Binary files /dev/null and b/steamship/base/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/client.cpython-39.pyc b/steamship/base/__pycache__/client.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b0dcff91f4d2c99464df69098706671f7b52f87 Binary files /dev/null and b/steamship/base/__pycache__/client.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/configuration.cpython-39.pyc b/steamship/base/__pycache__/configuration.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..febf3cb2cb67976356fa0a311b27221c5beab7ce Binary files /dev/null and b/steamship/base/__pycache__/configuration.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/environments.cpython-39.pyc b/steamship/base/__pycache__/environments.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec3731e4c3bb8c00347a83f0e4ec6e16c5e02294 Binary files /dev/null and b/steamship/base/__pycache__/environments.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/error.cpython-39.pyc b/steamship/base/__pycache__/error.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..184d75a44035d08a05bfd9dedee3b0446820e0ab Binary files /dev/null and b/steamship/base/__pycache__/error.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/mime_types.cpython-39.pyc b/steamship/base/__pycache__/mime_types.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8aa597b2c3863146510b9c54239d07564aed190 Binary files /dev/null and b/steamship/base/__pycache__/mime_types.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/model.cpython-39.pyc b/steamship/base/__pycache__/model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0465d03b48c5421c422bbcaa3252cba1b7fd2808 Binary files /dev/null and b/steamship/base/__pycache__/model.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/package_spec.cpython-39.pyc b/steamship/base/__pycache__/package_spec.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24b4c991017024bfdfea5c6f1c6c38d676eebfa2 Binary files /dev/null and b/steamship/base/__pycache__/package_spec.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/request.cpython-39.pyc b/steamship/base/__pycache__/request.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef0a5fa8d7e64c0bd37aeb5e9e9e5c3b2bc3de48 Binary files /dev/null and b/steamship/base/__pycache__/request.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/response.cpython-39.pyc b/steamship/base/__pycache__/response.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90180ab75e94a50fafd8ae3fb6d9b66171652f6e Binary files /dev/null and b/steamship/base/__pycache__/response.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/tasks.cpython-39.pyc b/steamship/base/__pycache__/tasks.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2970bdb2fbaee033160eb499e834cfa87c0fd069 Binary files /dev/null and b/steamship/base/__pycache__/tasks.cpython-39.pyc differ diff --git a/steamship/base/__pycache__/utils.cpython-39.pyc b/steamship/base/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..597032093ada76e2c54e28d7218bcbc3edf21026 Binary files /dev/null and b/steamship/base/__pycache__/utils.cpython-39.pyc differ diff --git a/steamship/base/client.py b/steamship/base/client.py new file mode 100644 index 0000000000000000000000000000000000000000..f299cd0f6f15cdca4376aaa9190935d07da227c1 --- /dev/null +++ b/steamship/base/client.py @@ -0,0 +1,635 @@ +from __future__ import annotations + +import logging +import typing +from abc import ABC +from inspect import isclass +from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union + +import inflection +from pydantic import BaseModel, PrivateAttr +from requests import Session + +from steamship.base.configuration import Configuration +from steamship.base.error import SteamshipError +from steamship.base.mime_types import MimeTypes +from steamship.base.model import CamelModel, to_camel +from steamship.base.request import Request +from steamship.base.tasks import Task, TaskState +from steamship.utils.url import Verb, is_local + +_logger = logging.getLogger(__name__) + +T = TypeVar("T") # TODO (enias): Do we need this? + + +def _multipart_name(path: str, val: Any) -> List[Tuple[Optional[str], str, Optional[str]]]: + """Decode any object into a series of HTTP Multi-part segments that Vapor will consume. + + https://github.com/vapor/multipart-kit + When sending a JSON object in a MultiPart request, Vapor wishes to see multi part segments as follows: + + single_key + array_key[idx] + obj_key[prop] + + So a File with a list of one tag with kind=Foo would be transmitted as setting the part: + [tags][0][kind] + """ + ret = [] + if isinstance(val, dict): + for key, subval in val.items(): + ret.extend(_multipart_name(f"{path}[{key}]", subval)) + elif isinstance(val, list): + for idx, subval in enumerate(val): + ret.extend(_multipart_name(f"{path}[{idx}]", subval)) + elif val is not None: + ret.append((path, val, None)) + return ret + + +class Client(CamelModel, ABC): + """Client model.py class. + + Separated primarily as a hack to prevent circular imports. + """ + + config: Configuration + _session: Session = PrivateAttr() + + def __init__( + self, + api_key: str = None, + api_base: str = None, + app_base: str = None, + web_base: str = None, + workspace: str = None, + fail_if_workspace_exists: bool = False, + profile: str = None, + config_file: str = None, + config: Configuration = None, + trust_workspace_config: bool = False, # For use by lambda_handler; don't fetch the workspace + **kwargs, + ): + """Create a new client. + + If `workspace` is provided, it will anchor the client in a workspace by that name, creating it if necessary. + Otherwise the `default` workspace will be used. + """ + if config is not None and not isinstance(config, Configuration): + config = Configuration.parse_obj(config) + + self._session = Session() + config = config or Configuration( + api_key=api_key, + api_base=api_base, + app_base=app_base, + web_base=web_base, + workspace_handle=workspace, + profile=profile, + config_file=config_file, + ) + + super().__init__(config=config) + # The lambda_handler will pass in the workspace via the workspace_id, so we need to plumb this through to make sure + # that the workspace switch performed doesn't mistake `workspace=None` as a request for the default workspace + self.switch_workspace( + workspace_handle=workspace or config.workspace_handle, + workspace_id=config.workspace_id, + fail_if_workspace_exists=fail_if_workspace_exists, + trust_workspace_config=trust_workspace_config, + ) + + def switch_workspace( # noqa: C901 + self, + workspace_handle: str = None, + workspace_id: str = None, + fail_if_workspace_exists: bool = False, + trust_workspace_config: bool = False, + # For use by lambda_handler; don't fetch the workspacetrust_workspace_config: bool = False, # For use by lambda_handler; don't fetch the workspace + ): + """Switches this client to the requested workspace, possibly creating it. If all arguments are None, the client + actively switches into the default workspace. + + - API calls are performed manually to not result in circular imports. + - Note that the default workspace is technically not necessary for API usage; it will be assumed by the Engine + in the absense of a Workspace ID or Handle being manually specified in request headers. + """ + workspace = None + + if workspace_handle is None and workspace_id is None: + # Switch to the default workspace since no named or ID'ed workspace was provided + workspace_handle = "default" + + if fail_if_workspace_exists: + logging.info( + f"[Client] Creating workspace with handle/id: {workspace_handle}/{workspace_id}." + ) + else: + logging.info( + f"[Client] Creating/Fetching workspace with handle/id: {workspace_handle}/{workspace_id}." + ) + + # Zero out the workspace_handle on the config block in case we're being invoked from + # `init` (otherwise we'll attempt to create the space IN that non-existant workspace) + old_workspace_handle = self.config.workspace_handle + self.config.workspace_handle = None + + if trust_workspace_config: + if workspace_handle is None or workspace_id is None: + raise SteamshipError( + message="Attempted a trusted workspace switch without providing both workspace handle and workspace id." + ) + return_id = workspace_id + return_handle = workspace_handle + else: + try: + if workspace_handle is not None and workspace_id is not None: + get_params = { + "handle": workspace_handle, + "id": workspace_id, + "fetchIfExists": False, + } + workspace = self.post("workspace/get", get_params) + elif workspace_handle is not None: + get_params = { + "handle": workspace_handle, + "fetchIfExists": not fail_if_workspace_exists, + } + workspace = self.post("workspace/create", get_params) + elif workspace_id is not None: + get_params = {"id": workspace_id} + workspace = self.post("workspace/get", get_params) + + except SteamshipError as e: + self.config.workspace_handle = old_workspace_handle + raise e + + if workspace is None: + raise SteamshipError( + message="Was unable to switch to new workspace: server returned empty Workspace." + ) + + return_id = workspace.get("workspace", {}).get("id") + return_handle = workspace.get("workspace", {}).get("handle") + + if return_id is None or return_handle is None: + raise SteamshipError( + message="Was unable to switch to new workspace: server returned empty ID and Handle." + ) + + # Finally, set the new workspace. + self.config.workspace_id = return_id + self.config.workspace_handle = return_handle + logging.info(f"[Client] Switched to workspace {return_handle}/{return_id}") + + def dict(self, **kwargs) -> dict: + # Because of the trick we do to hack these in as both static and member methods (with different + # implementations), Pydantic will try to include them by default. So we have to suppress that otherwise + # downstream serialization into JSON will fail. + if "exclude" not in kwargs: + kwargs["exclude"] = { + "use": True, + "use_plugin": True, + "_instance_use": True, + "_instance_use_plugin": True, + "config": {"api_key"}, + } + elif isinstance(kwargs["exclude"], set): + kwargs["exclude"].add("use") + kwargs["exclude"].add("use_plugin") + kwargs["exclude"].add("_instance_use") + kwargs["exclude"].add("_instance_use_plugin") + kwargs["exclude"].add( + "config" + ) # the set version cannot exclude subcomponents; we must remove all config + elif isinstance(kwargs["exclude"], dict): + kwargs["exclude"]["use"] = True + kwargs["exclude"]["use_plugin"] = True + kwargs["exclude"]["_instance_use"] = True + kwargs["exclude"]["_instance_use_plugin"] = True + kwargs["exclude"]["config"] = {"api_key"} + + return super().dict(**kwargs) + + def _url( + self, + is_package_call: bool = False, + package_owner: str = None, + operation: str = None, + ): + if not is_package_call: + # Regular API call + base = self.config.api_base + else: + # Do the invocable version + if package_owner is None: + return SteamshipError( + code="UserMissing", + message="Cannot invoke a package endpoint without the package owner's user handle.", + suggestion="Provide the package_owner option, or initialize your package with a lookup.", + ) + + base = self.config.app_base + if not is_local(base): + # We want to prepend the user handle + parts = base.split("//") + base = f"{parts[0]}//{package_owner}.{'//'.join(parts[1:])}" + + # Clean leading and trailing "/" + if base[len(base) - 1] == "/": + base = base[:-1] + if operation[0] == "/": + operation = operation[1:] + + return f"{base}/{operation}" + + def _headers( # noqa: C901 + self, + is_package_call: bool = False, + package_owner: str = None, + package_id: str = None, + package_instance_id: str = None, + as_background_task: bool = False, + wait_on_tasks: List[Union[str, Task]] = None, + ): + headers = {"Authorization": f"Bearer {self.config.api_key.get_secret_value()}"} + + if self.config.workspace_id: + headers["X-Workspace-Id"] = self.config.workspace_id + elif self.config.workspace_handle: + headers["X-Workspace-Handle"] = self.config.workspace_handle + + if is_package_call: + if package_owner: + headers["X-Package-Owner-Handle"] = package_owner + if package_id: + headers["X-Package-Id"] = package_id + if package_instance_id: + headers["X-Package-Instance-Id"] = package_instance_id + + if wait_on_tasks: + # Will result in the engine persisting the inbound HTTP request as a Task for deferred + # execution. Additionally, the task will be scheduled to first wait on the other tasks + # provided in the list of IDs. Accepts a list of EITHER Task objects OR task_id strings. + as_background_task = True + task_ids = [] + for task_or_id in wait_on_tasks: + if isinstance(task_or_id, str): + task_ids.append(task_or_id) + elif isinstance(task_or_id, Task): + task_ids.append(task_or_id.task_id) + else: + raise SteamshipError( + message=f"`wait_on_tasks` should only contain Task or str objects. Got a {type(task_or_id)}." + ) + + headers["X-Task-Dependency"] = ",".join(task_ids) + + if as_background_task: + # Will result in the engine persisting the inbound HTTP request as a Task for deferred + # execution. The client will receive task information back instead of the synchronous API response. + # That task can be polled for eventual completion. + headers["X-Task-Background"] = "true" + + return headers + + @staticmethod + def _prepare_data(payload: Union[Request, dict]): + if payload is None: + data = {} + elif isinstance(payload, dict): + data = payload + elif isinstance(payload, BaseModel): + data = payload.dict(by_alias=True) + else: + raise RuntimeError(f"Unable to parse payload of type {type(payload)}") + + return data + + @staticmethod + def _response_data(resp, raw_response: bool = False): + if resp is None: + return None + + if raw_response: + return resp.content + + if resp.headers: + ct = None + if "Content-Type" in resp.headers: + ct = resp.headers["Content-Type"] + if "content-type" in resp.headers: + ct = resp.headers["content-type"] + if ct is not None: + ct = ct.split(";")[0] # application/json; charset=utf-8 + if ct in [MimeTypes.TXT, MimeTypes.MKD, MimeTypes.HTML]: + return resp.text + elif ct == MimeTypes.JSON: + return resp.json() + else: + return resp.content + + @staticmethod + def _prepare_multipart_data(data, file): + # Note: requests seems to have a bug passing boolean (and maybe numeric?) + # values in the midst of multipart form data. You need to manually convert + # it to a string; otherwise it will pass as False or True (with the capital), + # which is not standard notation outside of Python. + for key in data: + if data[key] is False: + data[key] = "false" + elif data[key] is True: + data[key] = "true" + + result = {} + for key, val in data.items(): + for t in _multipart_name(key, val): + result[t[0]] = t + result["file"] = file + return result + + def _add_client_to_response(self, expect: Type, response_data: Any): + if isinstance(response_data, dict): + self._add_client_to_object(expect, response_data) + elif isinstance(response_data, list): + for el in response_data: + typing_parameters = typing.get_args(expect) + self._add_client_to_response( + typing_parameters[0] if typing_parameters else None, el + ) + + return response_data + + def _add_client_to_object(self, expect, response_data): + if expect and isclass(expect): + if len(response_data.keys()) == 1 and list(response_data.keys())[0] in ( + to_camel(expect.__name__), + to_camel(expect.__name__).replace("package", "invocable"), + # Hack since engine uses "App" instead of "Package" + "index", + "pluginInstance", # Inlined here since `expect` may be a subclass of pluginInstance + ): + # TODO (enias): Hack since the engine responds with incosistent formats e.g. {"plugin" : {plugin_fields}} + for _, v in response_data.items(): + self._add_client_to_response(expect, v) + elif issubclass(expect, BaseModel): + response_data["client"] = self + try: + key_to_type = typing.get_type_hints(expect) + for k, v in response_data.items(): + self._add_client_to_response(key_to_type.get(inflection.underscore(k)), v) + except NameError: + # typing.get_type_hints fails for Workspace + pass + + def call( # noqa: C901 + self, + verb: Verb, + operation: str, + payload: Union[Request, dict] = None, + file: Any = None, + expect: Type[T] = None, + debug: bool = False, + raw_response: bool = False, + is_package_call: bool = False, + package_owner: str = None, + package_id: str = None, + package_instance_id: str = None, + as_background_task: bool = False, + wait_on_tasks: List[Union[str, Task]] = None, + timeout_s: Optional[float] = None, + ) -> Union[ + Any, Task + ]: # TODO (enias): I would like to list all possible return types using interfaces instead of Any + """Post to the Steamship API. + + All responses have the format:: + + .. code-block:: json + + { + "data": "", + "error": {"reason": ""} + } # noqa: RST203 + + For the Python client we return the contents of the `data` field if present, and we raise an exception + if the `error` field is filled in. + """ + # TODO (enias): Review this codebase + url = self._url( + is_package_call=is_package_call, + package_owner=package_owner, + operation=operation, + ) + + headers = self._headers( + is_package_call=is_package_call, + package_owner=package_owner, + package_id=package_id, + package_instance_id=package_instance_id, + as_background_task=as_background_task, + wait_on_tasks=wait_on_tasks, + ) + + data = self._prepare_data(payload=payload) + + logging.debug( + f"Making {verb} to {url} in workspace {self.config.workspace_handle}/{self.config.workspace_id}" + ) + if verb == Verb.POST: + if file is not None: + files = self._prepare_multipart_data(data, file) + resp = self._session.post(url, files=files, headers=headers, timeout=timeout_s) + else: + resp = self._session.post(url, json=data, headers=headers, timeout=timeout_s) + elif verb == Verb.GET: + resp = self._session.get(url, params=data, headers=headers, timeout=timeout_s) + else: + raise Exception(f"Unsupported verb: {verb}") + + logging.debug(f"From {verb} to {url} got HTTP {resp.status_code}") + + if debug is True: + logging.debug(f"Got response {resp}") + + response_data = self._response_data(resp, raw_response=raw_response) + + logging.debug(f"Response JSON {response_data}") + + task = None + error = None + + if isinstance(response_data, dict): + if "status" in response_data: + try: + task = Task.parse_obj( + {**response_data["status"], "client": self, "expect": expect} + ) + if "state" in response_data["status"]: + if response_data["status"]["state"] == "failed": + error = SteamshipError.from_dict(response_data["status"]) + logging.warning(f"Client received error from server: {error}") + except TypeError as e: + # There's an edge case here -- if a Steamship package returns the JSON dictionary + # + # { "status": "status string" } + # + # Then the above handler will attempt to parse it and throw... But we don't actually want to throw + # since we don't take a strong opinion on what the response type of a package endpoint ought to be. + # It *may* choose to conform to the SteamshipResponse type, but it doesn't have to. + if not is_package_call: + raise e + + if task is not None and task.state == TaskState.failed: + error = task.as_error() + + if "data" in response_data: + if expect is not None: + if issubclass(expect, SteamshipError): + data = expect.from_dict({**response_data["data"], "client": self}) + elif issubclass(expect, BaseModel): + data = expect.parse_obj( + self._add_client_to_response(expect, response_data["data"]) + ) + else: + raise RuntimeError(f"obj of type {expect} does not have a from_dict method") + else: + data = response_data["data"] + + if task: + task.output = data + else: + data = response_data + + else: + data = response_data + + if error is not None: + logging.warning(f"Client received error from server: {error}", exc_info=error) + raise error + + if not resp.ok: + raise SteamshipError( + f"API call did not complete successfully. Server returned: {response_data}" + ) + + elif task is not None: + return task + elif data is not None and expect is not None: + # if we have data AND we expect it to be of a certain type, + # we should probably make sure that expectation is met. + if not isinstance(data, expect): + raise SteamshipError( + message=f"Inconsistent response from server (data does not match expected type: {expect}.)", + suggestion="Please contact support via hello@steamship.com and report what caused this error.", + ) + return data + elif data is not None: + return data + else: + raise SteamshipError("Inconsistent response from server. Please contact support.") + + def post( + self, + operation: str, + payload: Union[Request, dict, BaseModel] = None, + file: Any = None, + expect: Any = None, + debug: bool = False, + raw_response: bool = False, + is_package_call: bool = False, + package_owner: str = None, + package_id: str = None, + package_instance_id: str = None, + as_background_task: bool = False, + wait_on_tasks: List[Union[str, Task]] = None, + timeout_s: Optional[float] = None, + ) -> Union[ + Any, Task + ]: # TODO (enias): I would like to list all possible return types using interfaces instead of Any + return self.call( + verb=Verb.POST, + operation=operation, + payload=payload, + file=file, + expect=expect, + debug=debug, + raw_response=raw_response, + is_package_call=is_package_call, + package_owner=package_owner, + package_id=package_id, + package_instance_id=package_instance_id, + as_background_task=as_background_task, + wait_on_tasks=wait_on_tasks, + timeout_s=timeout_s, + ) + + def get( + self, + operation: str, + payload: Union[Request, dict] = None, + file: Any = None, + expect: Any = None, + debug: bool = False, + raw_response: bool = False, + is_package_call: bool = False, + package_owner: str = None, + package_id: str = None, + package_instance_id: str = None, + as_background_task: bool = False, + wait_on_tasks: List[Union[str, Task]] = None, + timeout_s: Optional[float] = None, + ) -> Union[ + Any, Task + ]: # TODO (enias): I would like to list all possible return types using interfaces instead of Any + return self.call( + verb=Verb.GET, + operation=operation, + payload=payload, + file=file, + expect=expect, + debug=debug, + raw_response=raw_response, + is_package_call=is_package_call, + package_owner=package_owner, + package_id=package_id, + package_instance_id=package_instance_id, + as_background_task=as_background_task, + wait_on_tasks=wait_on_tasks, + timeout_s=timeout_s, + ) + + def logs( + self, + offset: int = 0, + number: int = 50, + invocable_handle: Optional[str] = None, + instance_handle: Optional[str] = None, + invocable_version_handle: Optional[str] = None, + path: Optional[str] = None, + ) -> Dict[str, Any]: + """Return generated logs for a client. + + The logs will be workspace-scoped. You will only receive logs + for packages and plugins that you own. + + :param offset: The index of the first log entry to return. This can be used with `number` to page through logs. + :param number: The number of log entries to return. This can be used with `offset` to page through logs. + :param invocable_handle: Enables optional filtering based on the handle of package or plugin. Example: `my-package` + :param instance_handle: Enables optional filtering based on the handle of package instance or plugin instance. Example: `my-instance` + :param invocable_version_handle: Enables optional filtering based on the version handle of package or plugin. Example: `0.0.2` + :param path: Enables optional filtering based on request path. Example: `/generate`. + :return: Returns a dictionary containing the offset and number of log entries as well as a list of `entries` that match the specificed filters. + """ + args = {"from": offset, "size": number} + if invocable_handle: + args["invocableHandle"] = invocable_handle + if instance_handle: + args["invocableInstanceHandle"] = instance_handle + if invocable_version_handle: + args["invocableVersionHandle"] = invocable_version_handle + if path: + args["invocablePath"] = path + + return self.post("logs/list", args) diff --git a/steamship/base/configuration.py b/steamship/base/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..2cd6f6e2807cef12982605dc31d3ba0eeaa675f4 --- /dev/null +++ b/steamship/base/configuration.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Optional + +import inflection +from pydantic import HttpUrl, SecretStr + +from steamship.base.model import CamelModel +from steamship.cli.login import login +from steamship.utils.utils import format_uri + +DEFAULT_WEB_BASE = "https://steamship.com/" +DEFAULT_APP_BASE = "https://steamship.run/" +DEFAULT_API_BASE = "https://api.steamship.com/api/v1/" + +ENVIRONMENT_VARIABLES_TO_PROPERTY = { + "STEAMSHIP_API_KEY": "api_key", + "STEAMSHIP_API_BASE": "api_base", + "STEAMSHIP_APP_BASE": "app_base", + "STEAMSHIP_WEB_BASE": "web_base", + "STEAMSHIP_WORKSPACE_ID": "workspace_id", + "STEAMSHIP_WORKSPACE_HANDLE": "workspace_handle", +} +DEFAULT_CONFIG_FILE = Path.home() / ".steamship.json" + +# This stops us from including the `client` object in the dict() output, which is fine in a dict() +# but explodes if that dict() is turned into JSON. Sadly the `exclude` option in Pydantic doesn't +# cascade down nested objects, so we have to use this structure to catch all the possible combinations +EXCLUDE_FROM_DICT = { + "client": True, + "blocks": {"__all__": {"client": True, "tags": {"__all__": {"client": True}}}}, + "tags": {"__all__": {"client": True}}, +} + + +class Configuration(CamelModel): + api_key: SecretStr + api_base: HttpUrl = DEFAULT_API_BASE + app_base: HttpUrl = DEFAULT_APP_BASE + web_base: HttpUrl = DEFAULT_WEB_BASE + workspace_id: str = None + workspace_handle: str = None + profile: Optional[str] = None + + def __init__( + self, + config_file: Optional[Path] = None, + **kwargs, + ): + # First set the profile + kwargs["profile"] = profile = kwargs.get("profile") or os.getenv("STEAMSHIP_PROFILE") + + # Then load configuration from a file if provided + config_dict = self._load_from_file( + config_file or DEFAULT_CONFIG_FILE, + profile, + raise_on_exception=config_file is not None, + ) + config_dict.update(self._get_config_dict_from_environment()) + kwargs.update({k: v for k, v in config_dict.items() if kwargs.get(k) is None}) + + kwargs["api_base"] = format_uri(kwargs.get("api_base")) + kwargs["app_base"] = format_uri(kwargs.get("app_base")) + kwargs["web_base"] = format_uri(kwargs.get("web_base")) + + if not kwargs.get("api_key") and not kwargs.get("apiKey"): + api_key = login( + kwargs.get("api_base") or DEFAULT_API_BASE, + kwargs.get("web_base") or DEFAULT_WEB_BASE, + ) + Configuration._save_api_key_to_file( + api_key, profile, config_file or DEFAULT_CONFIG_FILE + ) + kwargs["api_key"] = api_key + + super().__init__(**kwargs) + + @staticmethod + def _load_from_file( + file: Path, profile: str = None, raise_on_exception: bool = False + ) -> Optional[dict]: + try: + with file.open() as f: + config_file = json.load(f) + if profile: + if "profiles" not in config_file or profile not in config_file["profiles"]: + raise RuntimeError(f"Profile {profile} requested but not found in {file}") + config = config_file["profiles"][profile] + else: + config = config_file + return {inflection.underscore(k): v for k, v in config.items()} + except FileNotFoundError: + if raise_on_exception: + raise Exception(f"Tried to load configuration file at {file} but it did not exist.") + except Exception as err: + if raise_on_exception: + raise err + return {} + + @staticmethod + def _get_config_dict_from_environment(): + """Overrides configuration with environment variables.""" + return { + property_name: os.getenv(environment_variable_name, None) + for environment_variable_name, property_name in ENVIRONMENT_VARIABLES_TO_PROPERTY.items() + if environment_variable_name in os.environ + } + + @staticmethod + def _save_api_key_to_file(new_api_key: Optional[str], profile: Optional[str], file_path: Path): + # Minimally rewrite config file, adding api key + try: + with file_path.open() as f: + config_file = json.load(f) + if profile: + if "profiles" not in config_file or profile not in config_file["profiles"]: + raise RuntimeError(f"Could not update API key for {profile} in {file_path}") + config = config_file["profiles"][profile] + else: + config = config_file + except FileNotFoundError: + config_file = {} + config = config_file + + config["apiKey"] = new_api_key + + with file_path.open("w") as f: + json.dump(config_file, f, indent="\t") + + @staticmethod + def default_config_file_has_api_key() -> bool: + return Configuration._load_from_file(DEFAULT_CONFIG_FILE).get("api_key") is not None + + @staticmethod + def remove_api_key_from_default_config(): + Configuration._save_api_key_to_file(None, None, DEFAULT_CONFIG_FILE) diff --git a/steamship/base/environments.py b/steamship/base/environments.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d7662fdb2f3c16b52a1b4190166691a3cd8579 --- /dev/null +++ b/steamship/base/environments.py @@ -0,0 +1,99 @@ +import os +from enum import Enum + +from steamship.base.configuration import Configuration +from steamship.base.error import SteamshipError + + +class RuntimeEnvironments(str, Enum): + REPLIT = "replit" + LOCALHOST = "localhost" + + +def _interactively_get_key(env: RuntimeEnvironments): + print( + """Get your free API key here: https://steamship.com/account/api + +You'll get immediate access to our SDK for AI models, including OpenAI, GPT, Cohere, and more. +""" + ) + + api_key = input("Paste your API key to run: ") + + while len(api_key.strip()) == 0: + api_key = input("API Key: ") + + os.environ["STEAMSHIP_API_KEY"] = api_key + + if env == RuntimeEnvironments.REPLIT: + print( + """ +This key is set temporarily. In the future, you can: +- Set the STEAMSHIP_API_KEY Replit Secret +- Close and re-open any Replit shells to make sure secrets are refreshed. + +""" + ) + elif env == RuntimeEnvironments.LOCALHOST: + print( + """ +This key is set temporarily. In the future, you can: +- Set the STEAMSHIP_API_KEY environment variable +- Run `ship login` to create a ~/.steamship.json credential file + +""" + ) + + +def _report_error_and_exit(env: RuntimeEnvironments): + if env == RuntimeEnvironments.REPLIT: + print( + """To run this Replit, you will need a Steamship API Key. + +1) If you're viewing someone else's Replit, clone it + +2) Visit https://steamship.com/account/api to get a key + +3) Add your key as a Replit secret named STEAMSHIP_API_KEY + +4) Close and re-open any shells to make sure your new secret is available + +Then try running again!""" + ) + elif env == RuntimeEnvironments.LOCALHOST: + print( + """To run this script, you will need a Steamship API Key. + +1) Visit https://steamship.com/account/api to get a key + +2) Set your key as the environment variable STEAMSHIP_API_KEY + +Then try running again! + +If you have pip-installed `steamship`, you can also try setting your key by simply running `ship login`. +""" + ) + exit(-1) + + +def check_environment(env: RuntimeEnvironments, interactively_set_key: bool = True): + # This will try loading from STEAMSHIP_API_KEY and also ~/.steamship.json + try: + config = Configuration() + + # If an API key is set, we're good to go! + if config.api_key: + return + except SteamshipError: + # The Configuration object will throw an error if there is no API Key found. + # Since that error is expected from the context of this function, we pass on it to handle it in a more + # user-interactive way. + pass + + # If we're hot-loading config, do it here! + if interactively_set_key: + _interactively_get_key(env) + return + + # If we're still here, we're not interactively setting the key. Display an error message and exit. + _report_error_and_exit(env) diff --git a/steamship/base/error.py b/steamship/base/error.py new file mode 100644 index 0000000000000000000000000000000000000000..7bc294895a2d26afa35f59f70d6144549cde1237 --- /dev/null +++ b/steamship/base/error.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import logging +from typing import Any, Union + +DEFAULT_ERROR_MESSAGE = "Undefined remote error" + + +class SteamshipError(Exception): + message: str = None + internal_message: str = None + suggestion: str = None + code: str = None + error: str = None + + def __init__( + self, + message: str = DEFAULT_ERROR_MESSAGE, + internal_message: str = None, + suggestion: str = None, + code: str = None, + error: Union[Exception, str] = None, + ): + super().__init__() + self.message = message + self.suggestion = suggestion + self.internal_message = internal_message + self.code = code + if error is not None: + self.error = str(error) + + parts = [] + if code is not None: + parts.append(f"[{code}]") + if message is not None: + parts.append(message) + if internal_message is not None: + parts.append(f"Internal Message: {internal_message}") + if suggestion is not None: + parts.append(f"Suggestion: {suggestion}") + + super().__init__("\n".join(parts)) + + def log(self): + logging.error( + f"[{self.code}] {self.message}. [Internal: {self.internal_message}] [Suggestion: {self.suggestion}]", + exc_info=self, + ) + if self.error: + logging.error(self.error) + + def to_dict(self) -> dict: + # Required since Exception cannot be combined with pydantic.BaseModel + return { + "message": self.message, + "internalMessage": self.internal_message, + "suggestion": self.suggestion, + "code": self.code, + "error": self.error, + } + + @staticmethod + def from_dict(d: Any) -> SteamshipError: + """Last resort if subclass doesn't override: pass through.""" + # Required since Exception cannot be combined with pydantic.BaseModel + return SteamshipError( + message=d.get("statusMessage", d.get("message")), + internal_message=d.get("internalMessage"), + suggestion=d.get("statusSuggestion", d.get("suggestion")), + code=d.get("statusCode", d.get("code")), + error=d.get("error", d.get("error")), + ) diff --git a/steamship/base/mime_types.py b/steamship/base/mime_types.py new file mode 100644 index 0000000000000000000000000000000000000000..9b3c94ac2dc3aab12e402c5588c56f56769ef59f --- /dev/null +++ b/steamship/base/mime_types.py @@ -0,0 +1,42 @@ +from enum import Enum + + +class MimeTypes(str, Enum): + UNKNOWN = "unknown" + TXT = "text/plain" + JSON = "application/json" + MKD = "text/markdown" + EPUB = "application/epub+zip" + PDF = "application/pdf" + JPG = "image/jpeg" + PNG = "image/png" + TIFF = "image/tiff" + GIF = "image/gif" + HTML = "text/html" + DOC = "application/msword" + DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + PPT = "applicatino/ms-powerpoint" + PPTX = "application/vnd.openxmlformats-officedocument.presentationml.presentation" + RTF = "application/rtf" + BINARY = "application/octet-stream" + STEAMSHIP_BLOCK_JSON = "application/vnd.steamship-block.json.v1" + WAV = "audio/wav" + MP3 = "audio/mp3" + MP4_VIDEO = "video/mp4" + MP4_AUDIO = "audio/mp4" + WEBM_VIDEO = "video/webm" + WEBM_AUDIO = "audio/webm" + FILE_JSON = "fileJson" + + +class ContentEncodings: + BASE64 = "base64" + + +TEXT_MIME_TYPES = [ + MimeTypes.TXT, + MimeTypes.MKD, + MimeTypes.HTML, + MimeTypes.DOCX, + MimeTypes.PPTX, +] diff --git a/steamship/base/model.py b/steamship/base/model.py new file mode 100644 index 0000000000000000000000000000000000000000..bdb8b681a82de74207b6471cb85da9113ecd2336 --- /dev/null +++ b/steamship/base/model.py @@ -0,0 +1,29 @@ +import re +from typing import TypeVar + +import inflection +from pydantic import BaseModel +from pydantic.generics import GenericModel + +T = TypeVar("T") # Declare type variable + + +def to_camel(s: str) -> str: + s = re.sub("_(url)$", lambda m: f"_{m.group(1).upper()}", s) + return inflection.camelize(s, uppercase_first_letter=False) + + +class CamelModel(BaseModel): + def __init__(self, **kwargs): + kwargs = {k: v for k, v in kwargs.items() if v is not None} + super().__init__(**kwargs) + + class Config: + alias_generator = to_camel + allow_population_by_field_name = True + # Populate enum values with their value, rather than the raw enum. Important to serialise model.dict() + use_enum_values = True + + +class GenericCamelModel(CamelModel, GenericModel): + pass diff --git a/steamship/base/package_spec.py b/steamship/base/package_spec.py new file mode 100644 index 0000000000000000000000000000000000000000..8e51416419e32e4b3f55c0611ec1bb53f6099caf --- /dev/null +++ b/steamship/base/package_spec.py @@ -0,0 +1,150 @@ +"""Objects for recording and reporting upon the introspected interface of a Steamship Package.""" +import inspect +from enum import Enum +from typing import Dict, List, Optional, Union, get_args, get_origin + +from steamship import SteamshipError +from steamship.base.configuration import CamelModel +from steamship.utils.url import Verb + + +class ArgSpec(CamelModel): + """An argument passed to a method.""" + + # The name of the argument. + name: str + # The kind of the argument, reported by str(annotation) via the `inspect` library. E.g. + kind: str + # Possible values, if the kind is an enum type + values: Optional[List[str]] + + def __init__(self, name: str, parameter: inspect.Parameter): + if name == "self": + raise SteamshipError( + message="Attempt to interpret the `self` object as a method parameter." + ) + values = None + if isinstance(parameter.annotation, type): + if issubclass(parameter.annotation, Enum): + values = [choice.value for choice in parameter.annotation] + elif get_origin(parameter.annotation) is Union: + args = get_args(parameter.annotation) + # For now, only deal with the case where the Union is an Optional[Enum] + if len(args) == 2 and type(None) in args: + optional_arg = [t for t in args if t != type(None)][0] # noqa: E721 + if issubclass(optional_arg, Enum): + values = [choice.value for choice in optional_arg] + + super().__init__(name=name, kind=str(parameter.annotation), values=values) + + def pprint(self, name_width: Optional[int] = None, prefix: str = "") -> str: + """Returns a pretty printable representation of this argument.""" + width = name_width or len(self.name) + ret = f"{prefix}{self.name.ljust(width)} - {self.kind}" + return ret + + +class MethodSpec(CamelModel): + """A method, callable remotely, on an object.""" + + # The HTTP Path at which the method is callable. + path: str + + # The HTTP Verb at which the method is callable. Defaults to POST + verb: str + + # The return type. Reported by str(annotation) via the `inspect` library. E.g. + returns: str + + # The docstring of the method. + doc: Optional[str] = None + + # The named arguments of the method. Positional arguments are not permitted. + args: Optional[List[ArgSpec]] = None + + # Additional configuration around this endpoint. + # Note: The actual type of this is Optional[Dict[str, Union[str, bool, int, float]]] + # But if Pydantic sees that, it attempts to force all values to be str, which is wrong. + config: Optional[Dict] = None + + @staticmethod + def clean_path(path: str = "") -> str: + """Ensure that the path always starts with /, and at minimum must be at least /.""" + if not path: + path = "/" + elif path[0] != "/": + path = f"/{path}" + return path + + def __init__( + self, + cls: object, + name: str, + path: str = None, + verb: Verb = Verb.POST, + config: Dict[str, Union[str, bool, int, float]] = None, + ): + # Set the path + if path is None and name is not None: + path = f"/{name}" + path = MethodSpec.clean_path(path) + + # Get the function on the class so that we can inspect it + func = getattr(cls, name) + sig = inspect.signature(func) + + # Set the return type + returns = str(sig.return_annotation) + + # Set the docstring + doc = func.__doc__ + + # Set the arguments + args = [] + for p in sig.parameters: + if p == "self": + continue + args.append(ArgSpec(p, sig.parameters[p])) + + super().__init__(path=path, verb=verb, returns=returns, doc=doc, args=args, config=config) + + def pprint(self, name_width: Optional[int] = None, prefix: str = " ") -> str: + """Returns a pretty printable representation of this method.""" + + width = name_width or len(self.path) + ret = f"{self.verb.ljust(4)} {self.path.lstrip('/').ljust(width)} -> {self.returns}" + if self.args: + name_width = max([(len(arg.name) if arg.name else 0) for arg in self.args]) + for arg in self.args: + arg_doc_string = arg.print(name_width, prefix) + ret += f"\n{arg_doc_string}" + return ret + + +class PackageSpec(CamelModel): + """A package, representing a remotely instantiable service.""" + + # The name of the package + name: str + + # The docstring of the package + doc: Optional[str] = None + + # The list of methods the package exposes remotely + methods: Optional[List[MethodSpec]] = None + + def pprint(self, prefix: str = " ") -> str: + """Returns a pretty printable representation of this package.""" + underline = "=" * len(self.name) + ret = f"{self.name}\n{underline}\n" + if self.doc: + ret += f"{self.doc}\n\n" + else: + ret += "\n" + + if self.methods: + name_width = max([len(method.path) or 0 for method in self.methods]) + for method in self.methods: + method_doc_string = method.pprint(name_width, prefix) + ret += f"\n{method_doc_string}" + return ret diff --git a/steamship/base/request.py b/steamship/base/request.py new file mode 100644 index 0000000000000000000000000000000000000000..2228acc0cea777ea6bfa91f9c633fef51cc7cfab --- /dev/null +++ b/steamship/base/request.py @@ -0,0 +1,33 @@ +from steamship.base.model import CamelModel + + +class Request(CamelModel): + pass + + +class GetRequest(Request): + id: str = None + handle: str = None + + +class CreateRequest(Request): + id: str = None + handle: str = None + + +class UpdateRequest(Request): + id: str = None + handle: str = None + + +class IdentifierRequest(Request): + id: str = None + handle: str = None + + +class ListRequest(Request): + pass + + +class DeleteRequest(Request): + id: str diff --git a/steamship/base/response.py b/steamship/base/response.py new file mode 100644 index 0000000000000000000000000000000000000000..af5537cf14e8a7bd025a5ea0d9c1f752b59e789f --- /dev/null +++ b/steamship/base/response.py @@ -0,0 +1,5 @@ +from steamship.base.model import CamelModel + + +class Response(CamelModel): + pass diff --git a/steamship/base/tasks.py b/steamship/base/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47a771726551b18d7881c0086b577854609bda --- /dev/null +++ b/steamship/base/tasks.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import time +from typing import Any, Callable, Dict, Generic, List, Optional, Set, Type, TypeVar + +from pydantic import BaseModel, Field + +from steamship.base.error import SteamshipError +from steamship.base.model import CamelModel, GenericCamelModel +from steamship.base.request import DeleteRequest, IdentifierRequest, Request +from steamship.utils.metadata import metadata_to_str, str_to_metadata + +T = TypeVar("T") + + +class CreateTaskCommentRequest(Request): + task_id: str + external_id: str = None + external_type: str = None + external_group: str = None + metadata: str = None + + +class ListTaskCommentRequest(Request): + task_id: str = None + external_id: str = None + external_type: str = None + external_group: str = None + + +class TaskComment(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + user_id: str = None + task_id: str = None + external_id: str = None + external_type: str = None + external_group: str = None + metadata: Any = None + created_at: str = None + + def __init__(self, **kwargs): + kwargs["metadata"] = str_to_metadata(kwargs.get("metadata")) + super().__init__(**kwargs) + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["taskComment"] if "taskComment" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + task_id: str = None, + external_id: str = None, + external_type: str = None, + external_group: str = None, + metadata: Any = None, + ) -> TaskComment: + req = CreateTaskCommentRequest( + taskId=task_id, + external_id=external_id, + external_type=external_type, + externalGroup=external_group, + metadata=metadata_to_str(metadata), + ) + return client.post( + "task/comment/create", + req, + expect=TaskComment, + ) + + @staticmethod + def list( + client: Client, + task_id: str = None, + external_id: str = None, + external_type: str = None, + external_group: str = None, + ) -> TaskCommentList: + req = ListTaskCommentRequest( + taskId=task_id, + external_id=external_id, + external_type=external_type, + externalGroup=external_group, + ) + return client.post( + "task/comment/list", + req, + expect=TaskCommentList, + ) + + def delete(self) -> TaskComment: + req = DeleteRequest(id=self.id) + return self.client.post( + "task/comment/delete", + req, + expect=TaskComment, + ) + + +class TaskCommentList(CamelModel): + comments: List[TaskComment] + + +class TaskState: + waiting = "waiting" + running = "running" + succeeded = "succeeded" + failed = "failed" + + +class TaskType: + internal_api = "internalApi" + train = "train" + infer = "infer" + + +class TaskRunRequest(Request): + task_id: str + + +class TaskStatusRequest(Request): + task_id: str + + +class Task(GenericCamelModel, Generic[T]): + """Encapsulates a unit of asynchronously performed work.""" + + # Note: The Field object prevents this from being serialized into JSON (and causing a crash) + client: Client = Field(None, exclude=True) # Steamship client + + task_id: str = None # The id of this task + user_id: str = None # The user who requested this task + workspace_id: str = None # The workspace in which this task is executing + + # Note: The Field object prevents this from being serialized into JSON (and causing a crash) + expect: Type = Field( + None, exclude=True + ) # Type of the expected output once the output is complete. + + input: str = None # The input provided to the task + output: T = None # The output of the task + state: str = None # A value in class TaskState + + status_message: str = None # User-facing message concerning task status + status_suggestion: str = None # User-facing suggestion concerning error remediation + status_code: str = None # User-facing error code for support assistance + status_created_on: str = None # When the status fields were last set + + task_type: str = None # A value in class TaskType; for internal routing + task_executor: str = None # + task_created_on: str = None # When the task object was created + task_last_modified_on: str = None # When the task object was last modified + + # Long Running Plugin Support + # The `remote_status_*` fields govern how Steamship Plugins can communicate long-running work back to the engine. + # If instead of sending data, the plugin sends a status with these fields set, the engine will begin polling for + # updates, echoing the contents of these fields back to the plugin to communicate, e.g., the jobId of the work + # being checked. When the work is complete, simply respond with the Response `data` field set as per usual. + remote_status_input: Optional[ + Dict + ] = None # For re-hydrating state in order to check remote status. + remote_status_output: Optional[ + Dict + ] = None # For reporting structured JSON state for error diagnostics. + remote_status_message: str = None # User facing message string to report on remote status. + + assigned_worker: str = None # The worker assigned to complete this task + started_at: str = None # When the work on this task began + + max_retries: int = None # The maximum number of retries allowed for this task + retries: int = None # The number of retries already used. + + def as_error(self) -> SteamshipError: + return SteamshipError( + message=self.status_message, suggestion=self.status_suggestion, code=self.status_code + ) + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> Task: + obj = obj["task"] if "task" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def get( + client, + _id: str = None, + handle: str = None, + ) -> Task: + return client.post( + "task/get", + IdentifierRequest(id=_id, handle=handle), + expect=Task, + ) + + def update(self, other: Optional[Task] = None): + """Incorporates a `Task` into this object.""" + other = other or Task() + for k, v in other.__dict__.items(): + self.__dict__[k] = v + + def add_comment( + self, + external_id: str = None, + external_type: str = None, + external_group: str = None, + metadata: Any = None, + ) -> TaskComment: + return TaskComment.create( + client=self.client, + task_id=self.task_id, + external_id=external_id, + external_type=external_type, + external_group=external_group, + metadata=metadata, + ) + + def post_update(self, fields: Set[str] = None) -> Task: + """Updates this task in the Steamship Engine.""" + if not isinstance(fields, set): + raise RuntimeError(f'Unexpected type of "fields": {type(fields)}. Expected type set.') + body = self.dict(by_alias=True, include={*fields, "task_id"}) + return self.client.post("task/update", body, expect=Task) + + def wait( + self, + max_timeout_s: float = 180, + retry_delay_s: float = 1, + on_each_refresh: "Optional[Callable[[int, float, Task], None]]" = None, + ): + """Polls and blocks until the task has succeeded or failed (or timeout reached). + + Parameters + ---------- + max_timeout_s : int + Max timeout in seconds. Default: 180s. After this timeout, an exception will be thrown. + retry_delay_s : float + Delay between status checks. Default: 1s. + on_each_refresh : Optional[Callable[[int, float, Task], None]] + Optional call back you can get after each refresh is made, including success state refreshes. + The signature represents: (refresh #, total elapsed time, task) + + WARNING: Do not pass a long-running function to this variable. It will block the update polling. + """ + t0 = time.perf_counter() + refresh_count = 0 + while time.perf_counter() - t0 < max_timeout_s and self.state not in ( + TaskState.succeeded, + TaskState.failed, + ): + time.sleep(retry_delay_s) + self.refresh() + refresh_count += 1 + + # Possibly make a callback so the caller knows we've tried again + if on_each_refresh: + on_each_refresh(refresh_count, time.perf_counter() - t0, self) + + # If the task did not complete within the timeout, throw an error + if self.state not in (TaskState.succeeded, TaskState.failed): + raise SteamshipError( + message=f"Task {self.task_id} did not complete within requested timeout of {max_timeout_s}s. The task is still running on the server. You can retrieve its status via Task.get() or try waiting again with wait()." + ) + + def refresh(self): + if self.task_id is None: + raise SteamshipError(message="Unable to refresh task because `task_id` is None") + + req = TaskStatusRequest(taskId=self.task_id) + # TODO (enias): A status call can return both data and task + # In this case both task and data will include the output (one is string serialized, the other is parsed) + # Ideally task status only returns the status, not the full output object + resp = self.client.post("task/status", payload=req, expect=self.expect) + self.update(resp) + + +from .client import Client # noqa: E402 + +Task.update_forward_refs() +TaskComment.update_forward_refs() diff --git a/steamship/base/utils.py b/steamship/base/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/cli/__init__.py b/steamship/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/cli/__pycache__/__init__.cpython-39.pyc b/steamship/cli/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55f416475dfd2a53e646d99ba568d7d8c91548f1 Binary files /dev/null and b/steamship/cli/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/cli.cpython-39.pyc b/steamship/cli/__pycache__/cli.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8036c064be3995a52ec4a7c4adb86fa5e2ec6b68 Binary files /dev/null and b/steamship/cli/__pycache__/cli.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/deploy.cpython-39.pyc b/steamship/cli/__pycache__/deploy.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca235335bff57af81cd3f075ef2177e131e1a4f4 Binary files /dev/null and b/steamship/cli/__pycache__/deploy.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/login.cpython-39.pyc b/steamship/cli/__pycache__/login.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e75107840c9457b7f7c3549bf28173f6d85483af Binary files /dev/null and b/steamship/cli/__pycache__/login.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/manifest_init_wizard.cpython-39.pyc b/steamship/cli/__pycache__/manifest_init_wizard.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61b0a6c97b85a413cc3a7fdec9728c9a6a98e0d2 Binary files /dev/null and b/steamship/cli/__pycache__/manifest_init_wizard.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/requirements_init_wizard.cpython-39.pyc b/steamship/cli/__pycache__/requirements_init_wizard.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c7f68f217820073e43e36daf20479268b429c23 Binary files /dev/null and b/steamship/cli/__pycache__/requirements_init_wizard.cpython-39.pyc differ diff --git a/steamship/cli/__pycache__/ship_spinner.cpython-39.pyc b/steamship/cli/__pycache__/ship_spinner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34f1bc461c554178b461a8b4ff8d81081e0fdda1 Binary files /dev/null and b/steamship/cli/__pycache__/ship_spinner.cpython-39.pyc differ diff --git a/steamship/cli/cli.py b/steamship/cli/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..11170f254cef9c503e5cabfbfe2768b231270c62 --- /dev/null +++ b/steamship/cli/cli.py @@ -0,0 +1,195 @@ +import json +import logging +import sys +import time +from os import path +from typing import Optional + +import click + +import steamship +from steamship import Steamship, SteamshipError +from steamship.base.configuration import Configuration +from steamship.cli.deploy import ( + PackageDeployer, + PluginDeployer, + bundle_deployable, + update_config_template, +) +from steamship.cli.manifest_init_wizard import manifest_init_wizard +from steamship.cli.requirements_init_wizard import requirements_init_wizard +from steamship.cli.ship_spinner import ship_spinner +from steamship.data.manifest import DeployableType, Manifest +from steamship.data.user import User + + +@click.group() +def cli(): + pass + + +def initialize(suppress_message: bool = False): + logging.root.setLevel(logging.FATAL) + if not suppress_message: + click.echo(f"Steamship PYTHON cli version {steamship.__version__}") + + +@click.command() +def login(): + """Log in to Steamship, creating ~/.steamship.json""" + initialize() + click.echo("Logging into Steamship.") + if sys.argv[1] == "login": + if Configuration.default_config_file_has_api_key(): + overwrite = click.confirm( + text="You already have an API key in your .steamship.json file. Do you want to remove it and login?", + default=False, + ) + if not overwrite: + sys.exit(0) + Configuration.remove_api_key_from_default_config() + + # Carry on with login + client = Steamship() + user = User.current(client) + click.secho(f"🚢🚢🚢 Hooray! You're logged in with user handle: {user.handle} 🚢🚢🚢", fg="green") + + +@click.command() +def ships(): + """Ship some ships""" + initialize() + click.secho("Here are some ships:", fg="cyan") + with ship_spinner(): + time.sleep(5) + click.secho() + + +@click.command() +def deploy(): + """Deploy the package or plugin in this directory""" + initialize() + client = None + try: + client = Steamship() + except SteamshipError as e: + click.secho(e.message, fg="red") + click.get_current_context().abort() + + user = User.current(client) + if path.exists("steamship.json"): + manifest = Manifest.load_manifest() + else: + manifest = manifest_init_wizard(client) + manifest.save() + + if not path.exists("requirements.txt"): + requirements_init_wizard() + + deployable_type = manifest.type + + update_config_template(manifest) + + deployer = None + if deployable_type == DeployableType.PACKAGE: + deployer = PackageDeployer() + elif deployable_type == DeployableType.PLUGIN: + deployer = PluginDeployer() + else: + click.secho("Deployable must be of type package or plugin.", fg="red") + click.get_current_context().abort() + + deployable = deployer.create_or_fetch_deployable(client, user, manifest) + + click.echo("Bundling content... ", nl=False) + bundle_deployable(manifest) + click.echo("Done. 📦") + + _ = deployer.create_version(client, manifest, deployable.id) + + thing_url = f"{client.config.web_base}{deployable_type}s/{manifest.handle}" + click.echo( + f"Deployment was successful. View and share your new {deployable_type} here:\n\n{thing_url}\n" + ) + + # Common error conditions: + # - Package/plugin handle already taken. [handled; asks user for new] + # - Version handle already deployed. [handled; asks user for new] + # - Bad parameter configuration. [mitigated by deriving template from Config object] + # - Package content fails health check (ex. bad import) [Error caught while checking config object] + + +@click.command() +@click.option( + "--workspace", + "-w", + required=True, + type=str, + help="Workspace handle used for scoping logs request. All requests MUST be scoped by workspace.", +) +@click.option( + "--offset", + "-o", + default=0, + type=int, + help="Starting index from sorted logs to return a chunk. Used for paging. Defaults to 0.", +) +@click.option( + "--number", + "-n", + default=50, + type=int, + help="Number of logs to return in a single batch. Defaults to 50.", +) +@click.option( + "--package", + "-p", + type=str, + help="Package handle. Used to filter logs returend to a specific package (across all instances).", +) +@click.option( + "--instance", + "-i", + type=str, + help="Instance handle. Used to filter logs returned to a specific instance of a package.", +) +@click.option( + "--version", + "-v", + type=str, + help="Version handle. Used to filter logs returned to a specific version of a package.", +) +@click.option( + "--path", + "request_path", + type=str, + help="Path invoked by a client operation. Used to filter logs returned to a specific invocation path.", +) +def logs( + workspace: str, + offset: int, + number: int, + package: Optional[str] = None, + instance: Optional[str] = None, + version: Optional[str] = None, + request_path: Optional[str] = None, +): + initialize(suppress_message=True) + client = None + try: + client = Steamship(workspace=workspace) + except SteamshipError as e: + raise click.UsageError(message=e.message) + + click.echo(json.dumps(client.logs(offset, number, package, instance, version, request_path))) + + +cli.add_command(login) +cli.add_command(deploy) +cli.add_command(deploy, name="it") +cli.add_command(ships) +cli.add_command(logs) + + +if __name__ == "__main__": + deploy([]) diff --git a/steamship/cli/deploy.py b/steamship/cli/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..02cb7180a8a60da7bbe4839e872fd0165116e11d --- /dev/null +++ b/steamship/cli/deploy.py @@ -0,0 +1,285 @@ +import importlib.machinery as machinery +import os +import sys +import traceback +import zipfile +from abc import ABC, abstractmethod +from pathlib import Path + +import click +from semver import VersionInfo + +from steamship import Package, PackageVersion, PluginVersion, Steamship, SteamshipError +from steamship.cli.manifest_init_wizard import validate_handle, validate_version_handle +from steamship.cli.ship_spinner import ship_spinner +from steamship.data import Plugin +from steamship.data.manifest import Manifest +from steamship.data.user import User +from steamship.invocable.lambda_handler import get_class_from_module + +DEFAULT_BUILD_IGNORE = [ + "build", + ".git", + ".venv", + ".ipynb_checkpoints", + ".DS_Store", + "venv", + "tests", + "examples", + ".idea", + "__pycache__", +] + + +def update_config_template(manifest: Manifest): + + path = Path("src/api.py") + if not path.exists(): + path = Path("api.py") + if not path.exists(): + raise SteamshipError("Could not find api.py either in root directory or in src.") + + api_module = None + try: + sys.path.append(str(path.parent.absolute())) + + # load the API module to allow config inspection / generation + api_module = machinery.SourceFileLoader("api", str(path)).load_module() + except Exception: + click.secho( + "An error occurred while loading your api.py to check configuration parameters. Full stack trace below.", + fg="red", + ) + traceback.print_exc() + click.get_current_context().abort() + + invocable_type = get_class_from_module(api_module) + + config_parameters = invocable_type.config_cls().get_config_parameters() + + if manifest.configTemplate != config_parameters: + if len(config_parameters) > 0: + click.secho("Config parameters changed; updating steamship.json.", fg="cyan") + for param_name, param in config_parameters.items(): + click.echo(f"{param_name}:") + click.echo(f"\tType: {param.type}") + click.echo(f"\tDefault: {param.default}") + click.echo(f"\tDescription: {param.description}") + else: + click.secho("Config parameters removed; updating steamship.json.", fg="cyan") + + manifest.configTemplate = config_parameters + manifest.save() + + +def get_archive_path(manifest: Manifest) -> Path: + return Path(".") / "build" / "archives" / f"{manifest.handle}_v{manifest.version}.zip" + + +def bundle_deployable(manifest: Manifest): + archive_path = get_archive_path(manifest) + archive_path.parent.mkdir(parents=True, exist_ok=True) + excludes = DEFAULT_BUILD_IGNORE + manifest.build_config.get("ignore", []) + + archive_path.unlink(missing_ok=True) + + # This zipfile packaging is modeled after the typescript CLI. + # Items in non-excluded root folders are moved to the top-level. + + with zipfile.ZipFile( + file=archive_path, mode="a", compression=zipfile.ZIP_DEFLATED, allowZip64=False + ) as zip_file: + + root = Path(".") + for file_path in root.iterdir(): + if file_path.name not in excludes: + if file_path.is_dir(): + for directory, _, files in os.walk(file_path): + subdirectory_path = Path(directory) + if Path(directory).name not in excludes: + for file in files: + pypi_file = subdirectory_path / file + relative_to = pypi_file.relative_to(file_path) + zip_file.write(pypi_file, relative_to) + + else: + zip_file.write(file_path) + + +class DeployableDeployer(ABC): + @abstractmethod + def _create_version(self, client: Steamship, manifest: Manifest, thing_id: str): + pass + + @abstractmethod + def create_object(self, client: Steamship, manifest: Manifest): + pass + + @abstractmethod + def update_object(self, deployable, client: Steamship, manifest: Manifest): + pass + + @abstractmethod + def deployable_type(self): + pass + + def create_or_fetch_deployable(self, client: Steamship, user: User, manifest: Manifest): + if not manifest.handle or len(manifest.handle) == 0: + self.ask_for_new_handle(manifest, was_missing=True) + + deployable = None + while deployable is None: + click.echo( + f"Creating / fetching {self.deployable_type()} with handle [{manifest.handle}]... ", + nl=False, + ) + try: + deployable = self.create_object(client, manifest) + if deployable.user_id != user.id: + self.ask_for_new_handle(manifest) + deployable = None + except SteamshipError as e: + if e.message == "Something went wrong.": + self.ask_for_new_handle(manifest) + else: + click.secho( + f"Unable to create / fetch {self.deployable_type()}. Server returned message: {e.message}" + ) + click.get_current_context().abort() + + self.update_object(deployable, client, manifest) + + click.echo("Done.") + return deployable + + def ask_for_new_handle(self, manifest: Manifest, was_missing: bool = False): + if not was_missing: + try_again = click.confirm( + click.style( + f"\nIt looks like that handle [{manifest.handle}] is already in use. Would you like to change the handle and try again?", + fg="yellow", + ), + default=True, + ) + if not try_again: + click.get_current_context().abort() + + new_handle = click.prompt( + f"What handle would you like to use for your {self.deployable_type()}? Valid characters are a-z and -", + value_proc=validate_handle, + ) + manifest.handle = new_handle + manifest.save() + + def create_version(self, client: Steamship, manifest: Manifest, thing_id: str): + version = None + + if not manifest.version or len(manifest.version) == 0: + self.ask_for_new_version_handle(manifest, was_missing=True) + + while version is None: + click.echo(f"Deploying version {manifest.version} of [{manifest.handle}]... ", nl=False) + try: + with ship_spinner(): + version = self._create_version(client, manifest, thing_id) + except SteamshipError as e: + if "The object you are trying to create already exists." in e.message: + self.ask_for_new_version_handle(manifest) + else: + click.secho(f"\nUnable to deploy {self.deployable_type()} version.", fg="red") + click.secho(f"Server returned message: {e.message}") + if "ModuleNotFoundError" in e.message: + click.secho( + "It looks like you may be missing a dependency in your requirements.txt.", + fg="yellow", + ) + click.get_current_context().abort() + click.echo("\nDone. 🚢") + + def ask_for_new_version_handle(self, manifest: Manifest, was_missing: bool = False): + if not was_missing: + try_again = click.confirm( + click.style( + f"\nIt looks like that version [{manifest.version}] has already been deployed. Would you like to change the version handle and try again?", + fg="yellow", + ), + default=True, + ) + if not try_again: + click.get_current_context().abort() + + default_new = "1.0.0" + try: + default_new = str(VersionInfo.parse(manifest.version).bump_prerelease()) + except ValueError: + pass + old_archive_path = get_archive_path(manifest) + new_version_handle = click.prompt( + "What should the new version be? Valid characters are a-z, 0-9, . and -", + value_proc=validate_version_handle, + default=default_new, + ) + manifest.version = new_version_handle + manifest.save() + new_archive_path = get_archive_path(manifest) + old_archive_path.rename(new_archive_path) + + +class PackageDeployer(DeployableDeployer): + def _create_version(self, client: Steamship, manifest: Manifest, thing_id: str): + return PackageVersion.create( + client=client, + config_template=manifest.config_template_as_dict(), + handle=manifest.version, + filename=f"build/archives/{manifest.handle}_v{manifest.version}.zip", + package_id=thing_id, + ) + + def create_object(self, client: Steamship, manifest: Manifest): + return Package.create( + client, + handle=manifest.handle, + profile=manifest, + is_public=manifest.public, + fetch_if_exists=True, + ) + + def update_object(self, deployable, client: Steamship, manifest: Manifest): + deployable.profile = manifest + + package = deployable.update(client) + return package + + def deployable_type(self): + return "package" + + +class PluginDeployer(DeployableDeployer): + def _create_version(self, client: Steamship, manifest: Manifest, thing_id: str): + return PluginVersion.create( + client=client, + config_template=manifest.config_template_as_dict(), + handle=manifest.version, + filename=f"build/archives/{manifest.handle}_v{manifest.version}.zip", + plugin_id=thing_id, + ) + + def create_object(self, client: Steamship, manifest: Manifest): + return Plugin.create( + client, + description=manifest.description, + is_public=manifest.public, + transport=manifest.plugin.transport, + type_=manifest.plugin.type, + handle=manifest.handle, + fetch_if_exists=True, + ) + + def update_object(self, deployable, client: Steamship, manifest: Manifest): + deployable.profile = manifest + + plugin = deployable.update(client) + return plugin + + def deployable_type(self): + return "plugin" diff --git a/steamship/cli/login.py b/steamship/cli/login.py new file mode 100644 index 0000000000000000000000000000000000000000..10920a17c8e3c31d4b2bdb339bf24fbd9be9e307 --- /dev/null +++ b/steamship/cli/login.py @@ -0,0 +1,62 @@ +import time +import webbrowser + +import requests + +from steamship.base.error import SteamshipError + + +def login(api_base: str, web_base: str) -> str: # noqa: C901 + + # create login token + try: + token_result = requests.post(api_base + "account/create_login_attempt") + token_data = token_result.json().get("data") + except Exception as e: + raise SteamshipError("Could not create login token when attempting login.", error=e) + + if token_data is None: + raise SteamshipError("Could not create login token when attempting login.") + token = token_data.get("token") + if token is None: + raise SteamshipError("Could not create login token when attempting login.") + + # Launch login attempt in browser + login_browser_url = ( + f"{web_base}account/client-login?attemptToken={token}&client=pycli&version=0.0.1" + ) + try: + opened_browser = webbrowser.open(login_browser_url) + except Exception as e: + raise SteamshipError("Exception attempting to launch browser for login.", error=e) + + if not opened_browser: + raise SteamshipError( + """Could not launch browser to log in to Steamship. + +If you are in Replit: + +1) Get an API key at https://steamship.com/account/api +2) Set the STEAMSHIP_API_KEY Replit Secret +3) Close and reopen this shell so that secrets refresh + +If you are in a different headless environment, visit https://docs.steamship.com/configuration/authentication.html""" + ) + + # Wait on result + total_poll_time_s = 0 + time_between_polls_s = 1 + api_key = None + while total_poll_time_s < 300: # Five minutes + params = {"token": token} + login_response = requests.post(f"{api_base}account/poll_login_attempt", json=params).json() + if login_response.get("data", {}).get("status") == "done": + api_key = login_response.get("data", {}).get("apiKey") + break + time.sleep(time_between_polls_s) + total_poll_time_s += time_between_polls_s + + if api_key is None: + raise SteamshipError("Could not fetch api key after login attempt in allotted time.") + + return api_key diff --git a/steamship/cli/manifest_init_wizard.py b/steamship/cli/manifest_init_wizard.py new file mode 100644 index 0000000000000000000000000000000000000000..a71dc52c4350509ca722a5c1d5f95ad140cecc20 --- /dev/null +++ b/steamship/cli/manifest_init_wizard.py @@ -0,0 +1,96 @@ +import re + +import click +from click import BadParameter + +from steamship import Steamship +from steamship.data.manifest import Manifest, PluginConfig, SteamshipRegistry +from steamship.data.user import User + + +def validate_handle(handle: str) -> str: + if re.fullmatch(r"[a-z\-]+", handle) is not None: + return handle + else: + raise BadParameter("Handle must only include lowercase letters and -") + + +def validate_version_handle(handle: str) -> str: + if re.fullmatch(r"[a-z0-9\-.]+", handle) is not None: + return handle + else: + raise BadParameter("Handle must only include lowercase letters, numbers, . and -") + + +def manifest_init_wizard(client: Steamship): + click.secho( + "It looks like you don't yet have a steamship.json to deploy. Let's create one.", + fg="cyan", + ) + + deployable_type = click.prompt( + "Is this a package or a plugin?", + default="package", + type=click.Choice(["package", "plugin"]), + show_choices=False, + ) + + handle = click.prompt( + f"What handle would you like to use for your {deployable_type}? Valid characters are a-z and -", + value_proc=validate_handle, + ) + + # TODO: claim the handle right here! + + version_handle = "0.0.1" + + plugin_detail = None + if deployable_type == "plugin": + plugin_type = click.prompt( + "What type of plugin is this?", + default="tagger", + type=click.Choice( + ["tagger", "blockifier", "exporter", "fileImporter", "corpusImporter", "generator"] + ), + show_choices=True, + ) + if plugin_type == "tagger": + trainable = click.confirm("Is the plugin trainable?", default=False) + else: + trainable = False + plugin_detail = PluginConfig(isTrainable=trainable, type=plugin_type) + + public = click.confirm(f"Do you want this {deployable_type} to be public?", default=True) + + user = User.current(client) + + author = click.prompt("How should we list your author name?", default=user.handle) + + tagline = None + author_github = None + if public: + tagline = click.prompt(f"Want to give the {deployable_type} a tagline?", default="") + author_github = click.prompt( + "If you'd like this associated with your github account, please your github username", + default="", + ) + + tag_string = click.prompt( + f"Want to give the {deployable_type} some tags? (comma separated)", default="Prompt API" + ) + tags = [tag.strip() for tag in tag_string.split(",")] + + return Manifest( + type=deployable_type, + handle=handle, + version=version_handle, + description="", + author=author, + public=public, + plugin=plugin_detail, + build_config={"ignore": ["tests", "examples"]}, + configTemplate={}, + steamshipRegistry=SteamshipRegistry( + tagline=tagline, authorGithub=author_github, authorName=author, tags=tags + ), + ) diff --git a/steamship/cli/requirements_init_wizard.py b/steamship/cli/requirements_init_wizard.py new file mode 100644 index 0000000000000000000000000000000000000000..2162f11e3547d98a3c6517f0d52d27513a2a2b46 --- /dev/null +++ b/steamship/cli/requirements_init_wizard.py @@ -0,0 +1,20 @@ +import click + +import steamship + + +def requirements_init_wizard(): + click.secho( + "Steamship uses requirements.txt to specify dependencies. You do not currently have a requirements.txt in this directory.", + fg="yellow", + ) + if not click.confirm("Would you like to create one automatically?", default=True): + click.secho("Please manually create a requirements.txt and try again.") + click.get_current_context().abort() + + with open("requirements.txt", "w") as requirements_file: + requirements_file.write(f"steamship=={steamship.__version__}\n") + + click.secho( + "Created a requirements.txt with the steamship dependency. If you need others, they must be added manually." + ) diff --git a/steamship/cli/ship_spinner.py b/steamship/cli/ship_spinner.py new file mode 100644 index 0000000000000000000000000000000000000000..bb17d81d48db9684cb873b46022ae4a589e925f4 --- /dev/null +++ b/steamship/cli/ship_spinner.py @@ -0,0 +1,48 @@ +import itertools +import threading + +import click + + +class Spinner(object): + # [" 🚢", " 🚢 ", " 🚢 ", "🚢 "] + # Unfortunately, backspacing doesn't seem to work correctly for emoji in iTerm, so leaving the "spinner" + # as adding ships for now + spinner_cycle = itertools.cycle(["🚢"]) + + def __init__(self): + self.stop_running = None + self.spin_thread = None + + def start(self): + self.stop_running = threading.Event() + self.spin_thread = threading.Thread(target=self.init_spin) + self.spin_thread.start() + + def stop(self): + if self.spin_thread: + self.stop_running.set() + self.spin_thread.join() + + def init_spin(self): + while not self.stop_running.is_set(): + click.echo(next(self.spinner_cycle), nl=False) + self.stop_running.wait(1) + # click.echo("\b", nl=False) + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + return False + + +def ship_spinner(): + """This function creates a context manager that is used to display a + spinner on stdout as long as the context has not exited. + The spinner is created only if stdout is not redirected, or if the spinner + is forced using the `force` parameter. + """ + return Spinner() diff --git a/steamship/client/__init__.py b/steamship/client/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d5128bcd6258855d8720a391c43d32a4dce01af2 --- /dev/null +++ b/steamship/client/__init__.py @@ -0,0 +1,3 @@ +from .steamship import Steamship + +__all__ = ["Steamship"] diff --git a/steamship/client/__pycache__/__init__.cpython-39.pyc b/steamship/client/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c509d86e6eac46da05f24c9a9ede1a6ccf440fd Binary files /dev/null and b/steamship/client/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/client/__pycache__/skill_to_provider.cpython-39.pyc b/steamship/client/__pycache__/skill_to_provider.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9b0c7781b97035a66754e319837dfc4bde9d4b4 Binary files /dev/null and b/steamship/client/__pycache__/skill_to_provider.cpython-39.pyc differ diff --git a/steamship/client/__pycache__/skills.cpython-39.pyc b/steamship/client/__pycache__/skills.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e57f1749865b125dd5040ca56c20715e28f50551 Binary files /dev/null and b/steamship/client/__pycache__/skills.cpython-39.pyc differ diff --git a/steamship/client/__pycache__/steamship.cpython-39.pyc b/steamship/client/__pycache__/steamship.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2398a1a2c1d4156cb8b625e4f513fd77d0671753 Binary files /dev/null and b/steamship/client/__pycache__/steamship.cpython-39.pyc differ diff --git a/steamship/client/__pycache__/vendors.cpython-39.pyc b/steamship/client/__pycache__/vendors.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27c8fe495fbdd8a580af3ef3a16880d11328bd30 Binary files /dev/null and b/steamship/client/__pycache__/vendors.cpython-39.pyc differ diff --git a/steamship/client/skill_to_provider.py b/steamship/client/skill_to_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..2d59ac93f06c3baede7370600e25bd8f22f799af --- /dev/null +++ b/steamship/client/skill_to_provider.py @@ -0,0 +1,51 @@ +from typing import Any, Dict + +from pydantic import BaseModel + +from steamship.client.skills import Skill +from steamship.client.vendors import Vendor + + +class SkillVendorConfig(BaseModel): + plugin_handle: str + config: Dict[str, Any] + + +SKILL_TO_PROVIDER: Dict[Skill, Dict[Vendor, SkillVendorConfig]] = { + Skill.ENTITIES: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", + config={"skills": ["names", "numbers", "business-entities"]}, + ) + }, + Skill.SUMMARY: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["summarize"]} + ) + }, + Skill.SENTIMENTS: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["sentiments"]} + ) + }, + Skill.EMOTIONS: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["emotions"]} + ) + }, + Skill.TOPICS: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["article-topics"]} + ), + }, + Skill.HIGHLIGHTS: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["highlights"]} + ) + }, + Skill.KEYWORDS: { + Vendor.OneAI: SkillVendorConfig( + plugin_handle="oneai-tagger", config={"skills": ["keywords"]} + ) + }, +} diff --git a/steamship/client/skills.py b/steamship/client/skills.py new file mode 100644 index 0000000000000000000000000000000000000000..12ae21474374e46e77cb14a97610790054989824 --- /dev/null +++ b/steamship/client/skills.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class Skill(str, Enum): + ENTITIES = "entities" + SUMMARY = "summary" + SENTIMENTS = "sentiments" + EMOTIONS = "emotions" + TOPICS = "topics" + HIGHLIGHTS = "highlights" + KEYWORDS = "keywords" diff --git a/steamship/client/steamship.py b/steamship/client/steamship.py new file mode 100644 index 0000000000000000000000000000000000000000..c15c918842eef56cf72793a7df7157cfb2d86d7d --- /dev/null +++ b/steamship/client/steamship.py @@ -0,0 +1,327 @@ +from __future__ import annotations + +import logging +import uuid +from contextlib import contextmanager +from typing import Any, Dict, Generator, List, Optional + +from pydantic import BaseModel + +from steamship.base.client import Client +from steamship.base.configuration import Configuration +from steamship.base.error import SteamshipError +from steamship.client.skill_to_provider import SKILL_TO_PROVIDER +from steamship.client.skills import Skill +from steamship.client.vendors import Vendor +from steamship.data.embeddings import EmbedAndSearchRequest, QueryResults +from steamship.data.package.package_instance import PackageInstance +from steamship.data.plugin.index_plugin_instance import EmbeddingIndexPluginInstance +from steamship.data.plugin.plugin_instance import PluginInstance +from steamship.data.plugin.prompt_generation_plugin_instance import PromptGenerationPluginInstance +from steamship.data.workspace import Workspace +from steamship.utils.metadata import hash_dict + +_logger = logging.getLogger(__name__) + + +class Steamship(Client): + """Steamship Python Client.""" + + # Some plugin instances use special subclasses which provide helper methods and/or more complex + # behavior than typical PluginInstance subclass permits. Examples are: + # + # - Embedding indices (which much coordinate both embedding taggers & vector indices) + # - Prompt generators (which benefit from supporting, prompt-specific, methods) + _PLUGIN_INSTANCE_SUBCLASS_OVERRIDES = { + "prompt-generation-default": PromptGenerationPluginInstance, + "prompt-generation-trainable-default": PromptGenerationPluginInstance, + "gpt3": PromptGenerationPluginInstance, + "gpt-3": PromptGenerationPluginInstance, + "cerebrium": PromptGenerationPluginInstance, + "embedding-index": EmbeddingIndexPluginInstance, + } + + def __init__( + self, + api_key: str = None, + api_base: str = None, + app_base: str = None, + web_base: str = None, + workspace: str = None, + fail_if_workspace_exists: bool = False, + profile: str = None, + config_file: str = None, + config: Configuration = None, + trust_workspace_config: bool = False, # For use by lambda_handler; don't fetch the workspace + **kwargs, + ): + super().__init__( + api_key=api_key, + api_base=api_base, + app_base=app_base, + web_base=web_base, + workspace=workspace, + fail_if_workspace_exists=fail_if_workspace_exists, + profile=profile, + config_file=config_file, + config=config, + trust_workspace_config=trust_workspace_config, + **kwargs, + ) + # We use object.__setattr__ here in order to bypass Pydantic's overloading of it (which would block this + # set unless we were to add this as a field) + object.__setattr__(self, "use", self._instance_use) + object.__setattr__(self, "use_plugin", self._instance_use_plugin) + + def __repr_args__(self: BaseModel) -> Any: + """Because of the trick we've done with `use` and `use_plugin`, we need to exclude these from __repr__ + otherwise we'll get an infinite recursion.""" + return [ + (key, value) + for key, value in self.__dict__.items() + if key != "use" and key != "use_plugin" + ] + + def embed_and_search( + self, + query: str, + docs: List[str], + plugin_instance: str, + k: int = 1, + ) -> QueryResults: + req = EmbedAndSearchRequest(query=query, docs=docs, plugin_instance=plugin_instance, k=k) + return self.post( + "plugin/instance/embeddingSearch", + req, + expect=QueryResults, + ) + + @staticmethod + @contextmanager + def temporary_workspace(**kwargs) -> Generator["Steamship", None, None]: + """Create a client rooted in a temporary workspace that will be deleted after use.""" + # Create a new client and switch to a temporary workspace + client = Steamship(**kwargs) + temporary_handle = "temp-" + str(uuid.uuid4()) + client.switch_workspace(temporary_handle) + + # Safety check that we are now working form the new workspace. + if client.config.workspace_handle != temporary_handle: + raise SteamshipError( + message=f"Attempted to switch to temporary workspace {temporary_handle} but the client claimed to be working from {client.config.workspace_handle}" + ) + + yield client + + # Safely delete the temporary workspace. Here we re-fetch the workspace using the temporary_handle + # in case the user switched workspaces yet again upon the client. + workspace = Workspace.get(client, handle=temporary_handle) + if workspace.handle != temporary_handle: + raise SteamshipError( + message=f"Was about to delete temporary workspace {temporary_handle} but its handle is different: {workspace.handle}" + ) + else: + workspace.delete() + + @staticmethod + def use( + package_handle: str, + instance_handle: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, + version: Optional[str] = None, + fetch_if_exists: bool = True, + workspace_handle: Optional[str] = None, + **kwargs, + ) -> PackageInstance: + """Creates/loads an instance of package `package_handle`. + + The instance is named `instance_handle` and located in the Workspace named `instance_handle`. If no + `instance_handle` is provided, the default is `package_handle`. + + For example, one may write the following to always get back the same package instance, no matter how many + times you run it, scoped into its own workspace: + + ```python + instance = Steamship.use('package-handle', 'instance-handle') + ``` + + One may also write: + + ```python + instance = Steamship.use('package-handle') # Instance will also be named `package-handle` + ``` + + If you wish to override the usage of a workspace named `instance_handle`, you can provide the `workspace_handle` + parameter. + """ + if instance_handle is None: + instance_handle = package_handle + kwargs["workspace"] = workspace_handle or instance_handle + client = Steamship(**kwargs) + return client._instance_use( + package_handle=package_handle, + instance_handle=instance_handle, + config=config, + version=version, + fetch_if_exists=fetch_if_exists, + ) + + def _instance_use( + self, + package_handle: str, + instance_handle: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, + version: Optional[str] = None, + fetch_if_exists: bool = True, + ) -> PackageInstance: + """Creates/loads an instance of package `package_handle`. + + The instance is named `instance_handle` and located in the workspace this client is anchored to. + If no `instance_handle` is provided, the default is `package_handle`. + """ + + if instance_handle is None: + if config is None: + instance_handle = package_handle + else: + instance_handle = f"{package_handle}-{hash_dict(config)}" + + return PackageInstance.create( + self, + package_handle=package_handle, + package_version_handle=version, + handle=instance_handle, + config=config, + fetch_if_exists=fetch_if_exists, + ) + + @staticmethod + def use_plugin( + plugin_handle: str, + instance_handle: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, + version: Optional[str] = None, + fetch_if_exists: bool = True, + workspace_handle: Optional[str] = None, + **kwargs, + ) -> PluginInstance: + """Creates/loads an instance of plugin `plugin_handle`. + + The instance is named `instance_handle` and located in the Workspace named `instance_handle`. + If no `instance_handle` is provided, the default is `plugin_handle`. + + For example, one may write the following to always get back the same plugin instance, no matter how many + times you run it, scoped into its own workspace: + + ```python + instance = Steamship.use_plugin('plugin-handle', 'instance-handle') + ``` + + One may also write: + + ```python + instance = Steamship.use('plugin-handle') # Instance will also be named `plugin-handle` + ``` + """ + if instance_handle is None: + instance_handle = plugin_handle + kwargs["workspace"] = workspace_handle or instance_handle + client = Steamship(**kwargs) + return client._instance_use_plugin( + plugin_handle=plugin_handle, + instance_handle=instance_handle, + config=config, + version=version, + fetch_if_exists=fetch_if_exists, + ) + + def use_skill( + self, + skill: Skill, + provider: Optional[Vendor] = None, + instance_handle: Optional[str] = None, + fetch_if_exists: Optional[bool] = True, + ) -> PluginInstance: + + if skill not in SKILL_TO_PROVIDER: + raise SteamshipError( + f"Unsupported skill provided. " + f"Use one of our supported skills: {','.join(SKILL_TO_PROVIDER)}" + ) + + if provider and provider not in SKILL_TO_PROVIDER[skill]: + raise SteamshipError( + f"The provider {provider} has no support for the skill {skill}." + f"Use one of the providers that support your skill: " + f"{','.join(SKILL_TO_PROVIDER[skill])}" + ) + + plugin_setup = ( + SKILL_TO_PROVIDER[skill][provider] + if provider + else list(SKILL_TO_PROVIDER[skill].values())[0] + ) + return self._instance_use_plugin( + plugin_handle=plugin_setup["plugin_handle"], + instance_handle=instance_handle, + config=plugin_setup["config"], + fetch_if_exists=fetch_if_exists, + ) + + def _instance_use_plugin( + self, + plugin_handle: str, + instance_handle: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, + version: Optional[str] = None, + fetch_if_exists: Optional[bool] = True, + ) -> PluginInstance: + """Creates/loads an instance of plugin `plugin_handle`. + + The instance is named `instance_handle` and located in the workspace this client is anchored to. + If no `instance_handle` is provided, the default is `plugin_handle`. + """ + + if instance_handle is None: + if config is None: + instance_handle = plugin_handle + else: + instance_handle = f"{plugin_handle}-{hash_dict(config)}" + + if plugin_handle in Steamship._PLUGIN_INSTANCE_SUBCLASS_OVERRIDES: + return Steamship._PLUGIN_INSTANCE_SUBCLASS_OVERRIDES[plugin_handle].create( + self, + plugin_handle=plugin_handle, + plugin_version_handle=version, + handle=instance_handle, + config=config, + fetch_if_exists=fetch_if_exists, + ) + + return PluginInstance.create( + self, + plugin_handle=plugin_handle, + plugin_version_handle=version, + handle=instance_handle, + config=config, + fetch_if_exists=fetch_if_exists, + ) + + def get_workspace(self) -> Workspace: + # We should probably add a hard-coded way to get this. The client in a Steamship Plugin/App comes + # pre-configured with an API key and the Workspace in which this client should be operating. + # This is a way to load the model object for that workspace. + logging.info( + f"get_workspace() called on client with config workspace {self.config.workspace_handle}/{self.config.workspace_id}" + ) + workspace = Workspace.get( + self, id_=self.config.workspace_id, handle=self.config.workspace_handle + ) + if not workspace: + logging.error("Unable to get workspace.") + raise SteamshipError( + message="Error while retrieving the Workspace associated with this client config.", + internal_message=f"workspace_id={self.config.workspace_id} workspace_handle={self.config.workspace_handle}", + ) + logging.info(f"Got workspace: {workspace.handle}/{workspace.id}") + return workspace diff --git a/steamship/client/vendors.py b/steamship/client/vendors.py new file mode 100644 index 0000000000000000000000000000000000000000..0cdf380998839c7c8e6ee6ba4246bd7c21fe78c1 --- /dev/null +++ b/steamship/client/vendors.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class Vendor(str, Enum): + OneAI = "one-ai" diff --git a/steamship/data/__init__.py b/steamship/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..69c0c4a91edeb3c67db8b7353b217fd55e95ad2b --- /dev/null +++ b/steamship/data/__init__.py @@ -0,0 +1,26 @@ +from .block import Block +from .embeddings import EmbeddingIndex +from .file import File +from .package import Package, PackageInstance, PackageVersion +from .plugin import Plugin, PluginInstance, PluginVersion +from .tags import DocTag, GenerationTag, Tag, TagKind, TagValueKey, TokenTag +from .workspace import Workspace + +__all__ = [ + "Package", + "PackageInstance", + "PackageVersion", + "Block", + "EmbeddingIndex", + "File", + "GenerationTag", + "Plugin", + "PluginInstance", + "PluginVersion", + "Workspace", + "DocTag", + "Tag", + "TagKind", + "TokenTag", + "TagValueKey", +] diff --git a/steamship/data/__pycache__/__init__.cpython-39.pyc b/steamship/data/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec7a34a094a061a0f18d57724783122f04e04b08 Binary files /dev/null and b/steamship/data/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/block.cpython-39.pyc b/steamship/data/__pycache__/block.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05b555a30a7b4860b7833c2e574257cfb0dfa60e Binary files /dev/null and b/steamship/data/__pycache__/block.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/embeddings.cpython-39.pyc b/steamship/data/__pycache__/embeddings.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0cf25cdedda36193e0629daac13761c5902f565 Binary files /dev/null and b/steamship/data/__pycache__/embeddings.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/file.cpython-39.pyc b/steamship/data/__pycache__/file.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47e53d851267587bf8cfcff0d1f9b1ba91d975b4 Binary files /dev/null and b/steamship/data/__pycache__/file.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/manifest.cpython-39.pyc b/steamship/data/__pycache__/manifest.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0484f43888551f8c425a2576dbce5b6b32a6535d Binary files /dev/null and b/steamship/data/__pycache__/manifest.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/search.cpython-39.pyc b/steamship/data/__pycache__/search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8445f650f2c22a4640d9ab46439641b66d3c9389 Binary files /dev/null and b/steamship/data/__pycache__/search.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/user.cpython-39.pyc b/steamship/data/__pycache__/user.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f27926803670f685e39077d5f29b8e1560609ec Binary files /dev/null and b/steamship/data/__pycache__/user.cpython-39.pyc differ diff --git a/steamship/data/__pycache__/workspace.cpython-39.pyc b/steamship/data/__pycache__/workspace.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2551ab9e91fdb2930954aa9b9510b136dcbf7663 Binary files /dev/null and b/steamship/data/__pycache__/workspace.cpython-39.pyc differ diff --git a/steamship/data/block.py b/steamship/data/block.py new file mode 100644 index 0000000000000000000000000000000000000000..b310e345619432bfd0ebddc3ce521d6177bd717c --- /dev/null +++ b/steamship/data/block.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, List, Optional, Type, Union + +import requests +from pydantic import BaseModel, Field + +from steamship import MimeTypes, SteamshipError +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import DeleteRequest, IdentifierRequest, Request +from steamship.base.response import Response +from steamship.data.tags.tag import Tag + + +class BlockQueryRequest(Request): + tag_filter_query: str + + +class BlockUploadType(str, Enum): + FILE = "file" # A file uploaded as bytes or a string + BLOCKS = "blocks" # Blocks are sent to create a file + URL = "url" # content will be fetched from a URL + NONE = "none" # No upload; plain text only + + +class Block(CamelModel): + """A Block is a chunk of content within a File. It can be plain text content, image content, + video content, etc. If the content is not text, the text value may be the empty string + for backwards compatibility. + """ + + client: Client = Field(None, exclude=True) + id: str = None + file_id: str = None + text: str = None + tags: Optional[List[Tag]] = [] + index_in_file: Optional[int] = Field(alias="index") + mime_type: Optional[MimeTypes] + url: Optional[ + str + ] = None # Only for creation of blocks; used to fetch content from a public URL. + content_url: Optional[ + str + ] = None # For overriding the URL of the raw data for ephemeral blocks. Setting this will have no effect + upload_type: Optional[ + BlockUploadType + ] = None # for returning Blocks as the result of a generate request + + class ListRequest(Request): + file_id: str = None + + class ListResponse(Response): + blocks: List[Block] = [] + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["block"] if "block" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def get( + client: Client, + _id: str = None, + ) -> Block: + return client.post( + "block/get", + IdentifierRequest(id=_id), + expect=Block, + ) + + @staticmethod + def create( + client: Client, + file_id: str, + text: str = None, + tags: List[Tag] = None, + content: Union[str, bytes] = None, + url: Optional[str] = None, + mime_type: Optional[MimeTypes] = None, + ) -> Block: + """ + Create a new Block within a File specified by file_id. + + You can create a Block in several ways: + - Providing raw text as the text parameter; + - Providing the content of the block as string or bytes; + - Providing a publicly accessible URL where the content is stored. + + """ + + if content is not None and url is not None: + raise SteamshipError("May provide content or URL, but not both when creating a Block") + + if content is not None: + upload_type = BlockUploadType.FILE + elif url is not None: + upload_type = BlockUploadType.URL + else: + upload_type = BlockUploadType.NONE + + req = { + "fileId": file_id, + "text": text, + "tags": [t.dict(by_alias=True) for t in tags] if tags else [], + "url": url, + "mimeType": mime_type, + "uploadType": upload_type, + } + + file_data = ( + ("file-part", content, "multipart/form-data") + if upload_type == BlockUploadType.FILE + else None + ) + + return client.post( + "block/create", + req, + expect=Block, + file=file_data, + ) + + def delete(self) -> Block: + return self.client.post( + "block/delete", + DeleteRequest(id=self.id), + expect=Tag, + ) + + @staticmethod + def query( + client: Client, + tag_filter_query: str, + ) -> BlockQueryResponse: + req = BlockQueryRequest(tag_filter_query=tag_filter_query) + res = client.post( + "block/query", + payload=req, + expect=BlockQueryResponse, + ) + return res + + def index(self, embedding_plugin_instance: Any = None): + """Index this block.""" + tags = [ + Tag( + text=self.text, + file_id=self.file_id, + block_id=self.id, + kind="block", + start_idx=0, + end_idx=len(self.text), + ) + ] + return embedding_plugin_instance.insert(tags) + + def raw(self): + if self.content_url is not None: + return requests.get(self.content_url).content + else: + return self.client.post( + "block/raw", + payload={ + "id": self.id, + }, + raw_response=True, + ) + + def is_text(self): + """Return whether this is a text Block.""" + return self.mime_type == MimeTypes.TXT + + +class BlockQueryResponse(Response): + blocks: List[Block] + + +Block.ListResponse.update_forward_refs() +Block.update_forward_refs() diff --git a/steamship/data/embeddings.py b/steamship/data/embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..fab18ddc25f40a04d5bf98fd46af4aeddaca4ce2 --- /dev/null +++ b/steamship/data/embeddings.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional, Type, Union + +from pydantic import BaseModel, Field + +from steamship import SteamshipError +from steamship.base import Task +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import DeleteRequest, Request +from steamship.base.response import Response +from steamship.data.search import Hit +from steamship.utils.metadata import metadata_to_str + +MAX_RECOMMENDED_ITEM_LENGTH = 5000 + + +class EmbedAndSearchRequest(Request): + query: str + docs: List[str] + plugin_instance: str + k: int = 1 + + +class QueryResult(CamelModel): + value: Optional[Hit] = None + score: Optional[float] = None + index: Optional[int] = None + id: Optional[str] = None + + +class QueryResults(Request): + items: List[QueryResult] = None + + +class EmbeddedItem(CamelModel): + id: str = None + index_id: str = None + file_id: str = None + block_id: str = None + tag_id: str = None + value: str = None + external_id: str = None + external_type: str = None + metadata: Any = None + embedding: List[float] = None + + def clone_for_insert(self) -> EmbeddedItem: + """Produces a clone with a string representation of the metadata""" + ret = EmbeddedItem( + id=self.id, + index_id=self.index_id, + file_id=self.file_id, + block_id=self.block_id, + tag_id=self.tag_id, + value=self.value, + external_id=self.external_id, + external_type=self.external_type, + metadata=self.metadata, + embedding=self.embedding, + ) + if isinstance(ret.metadata, dict) or isinstance(ret.metadata, list): + ret.metadata = json.dumps(ret.metadata) + return ret + + +class IndexCreateRequest(Request): + handle: str = None + name: str = None + plugin_instance: str = None + fetch_if_exists: bool = True + external_id: str = None + external_type: str = None + metadata: Any = None + + +class IndexInsertRequest(Request): + index_id: str + items: List[EmbeddedItem] = None + value: str = None + file_id: str = None + block_type: str = None + external_id: str = None + external_type: str = None + metadata: Any = None + reindex: bool = True + + +class IndexItemId(CamelModel): + index_id: str = None + id: str = None + + +class IndexInsertResponse(Response): + item_ids: List[IndexItemId] = None + + +class IndexEmbedRequest(Request): + id: str + + +class IndexEmbedResponse(Response): + id: Optional[str] = None + + +class IndexSearchRequest(Request): + id: str + query: str = None + queries: List[str] = None + k: int = 1 + include_metadata: bool = False + + +class ListItemsRequest(Request): + id: str = None + file_id: str = None + block_id: str = None + span_id: str = None + + +class ListItemsResponse(Response): + items: List[EmbeddedItem] + + +class EmbeddingIndex(CamelModel): + """A persistent, read-optimized index over embeddings.""" + + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + name: str = None + plugin: str = None + external_id: str = None + external_type: str = None + metadata: str = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + if "embeddingIndex" in obj: + obj = obj["embeddingIndex"] + elif "index" in obj: + obj = obj["index"] + return super().parse_obj(obj) + + def insert_file( + self, + file_id: str, + block_type: str = None, + external_id: str = None, + external_type: str = None, + metadata: Union[int, float, bool, str, List, Dict] = None, + reindex: bool = True, + ) -> IndexInsertResponse: + if isinstance(metadata, dict) or isinstance(metadata, list): + metadata = json.dumps(metadata) + + req = IndexInsertRequest( + index_id=self.id, + file_id=file_id, + blockType=block_type, + external_id=external_id, + external_type=external_type, + metadata=metadata, + reindex=reindex, + ) + return self.client.post( + "embedding-index/item/create", + req, + expect=IndexInsertResponse, + ) + + def _check_input(self, request: IndexInsertRequest, allow_long_records: bool): + if not allow_long_records: + if request.value is not None and len(request.value) > MAX_RECOMMENDED_ITEM_LENGTH: + raise SteamshipError( + f"Inserted item of length {len(request.value)} exceeded maximum recommended length of {MAX_RECOMMENDED_ITEM_LENGTH} characters. You may insert it anyway by passing allow_long_records=True." + ) + if request.items is not None: + for i, item in enumerate(request.items): + if item is not None: + if isinstance(item, str) and len(item) > MAX_RECOMMENDED_ITEM_LENGTH: + raise SteamshipError( + f"Inserted item {i} of length {len(item)} exceeded maximum recommended length of {MAX_RECOMMENDED_ITEM_LENGTH} characters. You may insert it anyway by passing allow_long_records=True." + ) + if ( + isinstance(item, EmbeddedItem) + and item.value is not None + and len(item.value) > MAX_RECOMMENDED_ITEM_LENGTH + ): + raise SteamshipError( + f"Inserted item {i} of length {len(item.value)} exceeded maximum recommended length of {MAX_RECOMMENDED_ITEM_LENGTH} characters. You may insert it anyway by passing allow_long_records=True." + ) + + def insert_many( + self, + items: List[Union[EmbeddedItem, str]], + reindex: bool = True, + allow_long_records=False, + ) -> IndexInsertResponse: + new_items = [] + for item in items: + if isinstance(item, str): + new_items.append(EmbeddedItem(value=item)) + else: + new_items.append(item) + + req = IndexInsertRequest( + index_id=self.id, + items=[item.clone_for_insert() for item in new_items], + reindex=reindex, + ) + self._check_input(req, allow_long_records) + return self.client.post( + "embedding-index/item/create", + req, + expect=IndexInsertResponse, + ) + + def insert( + self, + value: str, + external_id: str = None, + external_type: str = None, + metadata: Union[int, float, bool, str, List, Dict] = None, + reindex: bool = True, + allow_long_records=False, + ) -> IndexInsertResponse: + + req = IndexInsertRequest( + index_id=self.id, + value=value, + external_id=external_id, + external_type=external_type, + metadata=metadata_to_str(metadata), + reindex=reindex, + ) + self._check_input(req, allow_long_records) + return self.client.post( + "embedding-index/item/create", + req, + expect=IndexInsertResponse, + ) + + def embed( + self, + ) -> Task[IndexEmbedResponse]: + req = IndexEmbedRequest(id=self.id) + return self.client.post( + "embedding-index/embed", + req, + expect=IndexEmbedResponse, + ) + + def list_items( + self, + file_id: str = None, + block_id: str = None, + span_id: str = None, + ) -> ListItemsResponse: + req = ListItemsRequest(id=self.id, file_id=file_id, block_id=block_id, spanId=span_id) + return self.client.post( + "embedding-index/item/list", + req, + expect=ListItemsResponse, + ) + + def delete(self) -> EmbeddingIndex: + return self.client.post( + "embedding-index/delete", + DeleteRequest(id=self.id), + expect=EmbeddingIndex, + ) + + def search( + self, + query: Union[str, List[str]], + k: int = 1, + include_metadata: bool = False, + ) -> Task[QueryResults]: + if isinstance(query, list): + req = IndexSearchRequest( + id=self.id, queries=query, k=k, include_metadata=include_metadata + ) + else: + req = IndexSearchRequest( + id=self.id, query=query, k=k, include_metadata=include_metadata + ) + ret = self.client.post( + "embedding-index/search", + req, + expect=QueryResults, + ) + + return ret + + @staticmethod + def create( + client: Client, + handle: str = None, + name: str = None, + embedder_plugin_instance_handle: str = None, + fetch_if_exists: bool = True, + external_id: str = None, + external_type: str = None, + metadata: Any = None, + ) -> EmbeddingIndex: + req = IndexCreateRequest( + handle=handle, + name=name, + plugin_instance=embedder_plugin_instance_handle, + fetch_if_exists=fetch_if_exists, + external_id=external_id, + external_type=external_type, + metadata=metadata, + ) + return client.post( + "embedding-index/create", + req, + expect=EmbeddingIndex, + ) diff --git a/steamship/data/file.py b/steamship/data/file.py new file mode 100644 index 0000000000000000000000000000000000000000..180ed4083056b6200245d89cef946286ba803b39 --- /dev/null +++ b/steamship/data/file.py @@ -0,0 +1,290 @@ +from __future__ import annotations + +import io +from enum import Enum +from typing import TYPE_CHECKING, Any, List, Optional, Type, Union + +from pydantic import BaseModel, Field + +from steamship import MimeTypes, SteamshipError +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import GetRequest, IdentifierRequest, Request +from steamship.base.response import Response +from steamship.base.tasks import Task +from steamship.data.block import Block +from steamship.data.embeddings import EmbeddingIndex +from steamship.data.tags import Tag +from steamship.utils.binary_utils import flexi_create + +if TYPE_CHECKING: + from steamship.data.operations.tagger import TagResponse + + +class FileUploadType(str, Enum): + FILE = "file" # A file uploaded as bytes or a string + FILE_IMPORTER = "fileImporter" # A fileImporter will be used to create the file + BLOCKS = "blocks" # Blocks are sent to create a file + + +class FileClearResponse(Response): + id: str + + +class ListFileRequest(Request): + pass + + +class ListFileResponse(Response): + files: List[File] + + +class FileQueryRequest(Request): + tag_filter_query: str + + +class File(CamelModel): + """A file.""" + + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + mime_type: MimeTypes = None + workspace_id: str = None + blocks: List[Block] = [] + tags: List[Tag] = [] + filename: str = None + + class CreateResponse(Response): + data_: Any = None + mime_type: str = None + + def __init__( + self, + data: Any = None, + string: str = None, + _bytes: Union[bytes, io.BytesIO] = None, + json: io.BytesIO = None, + mime_type: str = None, + ): + super().__init__() + data, mime_type, encoding = flexi_create( + data=data, string=string, json=json, _bytes=_bytes, mime_type=mime_type + ) + self.data_ = data + self.mime_type = mime_type + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> Response: + obj["data"] = obj.get("data") or obj.get("data_") + if "data_" in obj: + del obj["data_"] + return super().parse_obj(obj) + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["file"] if "file" in obj else obj + return super().parse_obj(obj) + + def delete(self) -> File: + return self.client.post( + "file/delete", + IdentifierRequest(id=self.id), + expect=File, + ) + + @staticmethod + def get( + client: Client, + _id: str = None, + handle: str = None, + ) -> File: + return client.post( + "file/get", + IdentifierRequest(id=_id, handle=handle), + expect=File, + ) + + @staticmethod + def create( + client: Client, + content: Union[str, bytes] = None, + mime_type: MimeTypes = None, + handle: str = None, + blocks: List[Block] = None, + tags: List[Tag] = None, + ) -> File: + + if content is None and blocks is None: + if tags is None: + raise SteamshipError(message="Either filename, content, or tags must be provided.") + else: + content = "" + if content is not None and blocks is not None: + raise SteamshipError( + message="Please provide only `blocks` or `content` to `File.create`." + ) + + if blocks is not None: + upload_type = FileUploadType.BLOCKS + elif content is not None: + upload_type = FileUploadType.FILE + else: + raise Exception("Unable to determine upload type.") + + req = { + "handle": handle, + "type": upload_type, + "mimeType": mime_type, + "blocks": [ + block.dict(by_alias=True, exclude_unset=True, exclude_none=True) + for block in blocks or [] + ], + "tags": [ + tag.dict(by_alias=True, exclude_unset=True, exclude_none=True) for tag in tags or [] + ], + } + + file_data = ( + ("file-part", content, "multipart/form-data") + if upload_type != FileUploadType.BLOCKS + else None + ) + + # Defaulting this here, as opposed to in the Engine, because it is processed by Vapor + return client.post( + "file/create", + payload=req, + file=file_data, + expect=File, + ) + + @staticmethod + def create_with_plugin( + client: Client, + plugin_instance: str, + url: str = None, + mime_type: str = None, + ) -> Task[File]: + + req = { + "type": FileUploadType.FILE_IMPORTER, + "url": url, + "mimeType": mime_type, + "pluginInstance": plugin_instance, + } + + return client.post("file/create", payload=req, expect=File, as_background_task=True) + + def refresh(self) -> File: + refreshed = File.get(self.client, self.id) + self.__init__(**refreshed.dict()) + self.client = refreshed.client + for block in self.blocks: + block.client = self.client + return self + + @staticmethod + def query( + client: Client, + tag_filter_query: str, + ) -> FileQueryResponse: + + req = FileQueryRequest(tag_filter_query=tag_filter_query) + res = client.post( + "file/query", + payload=req, + expect=FileQueryResponse, + ) + return res + + def raw(self): + return self.client.post( + "file/raw", + payload=GetRequest( + id=self.id, + ), + raw_response=True, + ) + + def blockify(self, plugin_instance: str = None, wait_on_tasks: List[Task] = None) -> Task: + from steamship.data.operations.blockifier import BlockifyRequest + from steamship.plugin.outputs.block_and_tag_plugin_output import BlockAndTagPluginOutput + + req = BlockifyRequest(type="file", id=self.id, plugin_instance=plugin_instance) + + return self.client.post( + "plugin/instance/blockify", + payload=req, + expect=BlockAndTagPluginOutput, + wait_on_tasks=wait_on_tasks, + ) + + def tag( + self, + plugin_instance: str = None, + wait_on_tasks: List[Task] = None, + ) -> Task[TagResponse]: + from steamship.data.operations.tagger import TagRequest, TagResponse + from steamship.data.plugin import PluginTargetType + + req = TagRequest(type=PluginTargetType.FILE, id=self.id, plugin_instance=plugin_instance) + return self.client.post( + "plugin/instance/tag", payload=req, expect=TagResponse, wait_on_tasks=wait_on_tasks + ) + + def index(self, plugin_instance: Any = None) -> EmbeddingIndex: + """Index every block in the file. + + TODO(ted): Enable indexing the results of a tag query. + TODO(ted): It's hard to load the EmbeddingIndexPluginInstance with just a handle because of the chain + of things that need to be created to it to function.""" + + # Preserve the prior behavior of embedding the full text of each block. + tags = [ + Tag(text=block.text, file_id=self.id, block_id=block.id, kind="block") + for block in self.blocks or [] + ] + return plugin_instance.insert(tags) + + @staticmethod + def list(client: Client) -> ListFileResponse: + return client.post( + "file/list", + ListFileRequest(), + expect=ListFileResponse, + ) + + def append_block( + self, + text: str = None, + tags: List[Tag] = None, + content: Union[str, bytes] = None, + url: Optional[str] = None, + mime_type: Optional[MimeTypes] = None, + ) -> Block: + """Append a new block to this File. This is a convenience wrapper around + Block.create(). You should provide only one of text, content, or url. + + This is a server-side operation, saving the new Block to the file. The new block + is appended to this client-side File as well for convenience. + """ + block = Block.create( + self.client, + file_id=self.id, + text=text, + tags=tags, + content=content, + url=url, + mime_type=mime_type, + ) + self.blocks.append(block) + return block + + +class FileQueryResponse(Response): + files: List[File] + + +ListFileResponse.update_forward_refs() diff --git a/steamship/data/manifest.py b/steamship/data/manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..efbe070e0997e356294d498db21660c4a1405d3a --- /dev/null +++ b/steamship/data/manifest.py @@ -0,0 +1,89 @@ +import json +from enum import Enum +from typing import Dict, List, Optional, Type, Union + +from pydantic import BaseModel, StrictBool, StrictFloat, StrictInt, StrictStr + +from steamship.base.error import SteamshipError + + +class ConfigParameterType(str, Enum): + NUMBER = "number" + STRING = "string" + BOOLEAN = "boolean" + + @staticmethod + def from_python_type(t: Type): + if issubclass(t, str): + return ConfigParameterType.STRING + elif issubclass(t, bool): # bool is a subclass of int, so must do this first! + return ConfigParameterType.BOOLEAN + elif issubclass(t, float) or issubclass(t, int): + return ConfigParameterType.NUMBER + else: + raise SteamshipError(f"Unknown value type in Config: {t}") + + +class ConfigParameter(BaseModel): + type: ConfigParameterType + description: Optional[str] = None + + # Use strict so that Pydantic doesn't coerce values into the first one that fits + default: Optional[Union[StrictStr, StrictBool, StrictFloat, StrictInt]] = None + + +class DeployableType(str, Enum): + PLUGIN = "plugin" + PACKAGE = "package" + + +class SteamshipRegistry(BaseModel): + tagline: Optional[str] # noqa: N815 + tagline2: Optional[str] # noqa: N815 + usefulFor: Optional[str] # noqa: N815 + videoUrl: Optional[str] # noqa: N815 + githubUrl: Optional[str] # noqa: N815 + demoUrl: Optional[str] # noqa: N815 + blogUrl: Optional[str] # noqa: N815 + jupyterUrl: Optional[str] # noqa: N815 + authorGithub: Optional[str] # noqa: N815 + authorName: Optional[str] # noqa: N815 + authorEmail: Optional[str] # noqa: N815 + authorTwitter: Optional[str] # noqa: N815 + authorUrl: Optional[str] # noqa: N815 + tags: List[str] + + +class PluginConfig(BaseModel): + isTrainable: Optional[bool] = False # noqa: N815 + transport: str = "jsonOverHttp" + type: str # Does not use PluginType due to circular import + + +class Manifest(BaseModel): + type: DeployableType + handle: str + version: str + description: Optional[str] + author: Optional[str] + entrypoint: str = "Unused" + public: bool + plugin: Optional[PluginConfig] + build_config: Dict[str, List[str]] = {"ignore": []} + configTemplate: Optional[Dict[str, ConfigParameter]] # noqa: N815 + steamshipRegistry: SteamshipRegistry # noqa: N815 + + @staticmethod + def load_manifest() -> "Manifest": + return Manifest.parse_file("steamship.json", content_type="application/json") + + def save(self): + with open("steamship.json", "w") as file: + json.dump(self.dict(), file, indent="\t") + + def config_template_as_dict(self): + result = {} + for param, spec in self.configTemplate.items(): + result[param] = {k: v for k, v in spec.dict().items() if v is not None} + + return result diff --git a/steamship/data/operations/__init__.py b/steamship/data/operations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/data/operations/__pycache__/__init__.cpython-39.pyc b/steamship/data/operations/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb9c395d58c0e2059be5e0bbc6b1d426a5c331b1 Binary files /dev/null and b/steamship/data/operations/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/data/operations/__pycache__/blockifier.cpython-39.pyc b/steamship/data/operations/__pycache__/blockifier.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ef8427c3f2f5e6cdb0e6bf8e60472a8628a10c8 Binary files /dev/null and b/steamship/data/operations/__pycache__/blockifier.cpython-39.pyc differ diff --git a/steamship/data/operations/__pycache__/embedder.cpython-39.pyc b/steamship/data/operations/__pycache__/embedder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c8f5db659bed94fbb2957205824a207c5afb9a6 Binary files /dev/null and b/steamship/data/operations/__pycache__/embedder.cpython-39.pyc differ diff --git a/steamship/data/operations/__pycache__/generator.cpython-39.pyc b/steamship/data/operations/__pycache__/generator.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2c0b4c660f0dd60f98a5572fe80551b5f4b4176 Binary files /dev/null and b/steamship/data/operations/__pycache__/generator.cpython-39.pyc differ diff --git a/steamship/data/operations/__pycache__/tagger.cpython-39.pyc b/steamship/data/operations/__pycache__/tagger.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..157b2bed0a03b0972aaa0d983c1e14ffe31ac05b Binary files /dev/null and b/steamship/data/operations/__pycache__/tagger.cpython-39.pyc differ diff --git a/steamship/data/operations/blockifier.py b/steamship/data/operations/blockifier.py new file mode 100644 index 0000000000000000000000000000000000000000..1837b6af000e4b26c7c295b3df2d3e67e31b1b1f --- /dev/null +++ b/steamship/data/operations/blockifier.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from steamship.base.request import Request + + +class BlockifyRequest(Request): + type: str = None + plugin_instance: str = None + id: str = None + handle: str = None + name: str = None diff --git a/steamship/data/operations/embedder.py b/steamship/data/operations/embedder.py new file mode 100644 index 0000000000000000000000000000000000000000..15a00817e97672f26a151857909e28fa805ce2a5 --- /dev/null +++ b/steamship/data/operations/embedder.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from typing import Dict, List + +from steamship.base.request import Request + + +class EmbedRequest(Request): + docs: List[str] + plugin_instance: str + metadata: Dict = None diff --git a/steamship/data/operations/generator.py b/steamship/data/operations/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..1867794d4110df927e4feb33644c55b337f22220 --- /dev/null +++ b/steamship/data/operations/generator.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import List, Optional + +from steamship.base.request import Request +from steamship.base.response import Response +from steamship.data.block import Block + + +class GenerateRequest(Request): + """This class provides the input for a request to a Generator. There are several ways to specify the input; see below""" + + # Input Specification + # You must select one of several ways to specify input for a generator. These are exclusive. + # 1 - A span of Blocks on a File + # 2 - Raw text + # 3 - A query for Blocks + # 4 - (coming soon) A public URL of content + # 5 - (coming soon) Raw bytes of content + + # Must specify plugin instance to use + plugin_instance: str = None + + # May specify blocks by their file_id. If so, may specify start and end index + input_file_id: str = None + input_file_start_block_index: int = None + input_file_end_block_index: Optional[ + int + ] = None # EXCLUSIVE end index, like most programming languages + + # May specify raw text + text: Optional[str] = None + + # May specify raw bytes (ex. an image, audio) [Not yet implemented] + # bytes: Optional[bytes] = None + + # May specify a block query. This may produce input blocks from multiple files. + block_query: Optional[str] = None + + # May specify a public URL to fetch the data from. [Not yet implemented] + # url: Optional[str] = None + + # Desired output specification + + # Whether we want the output appended to a file + append_output_to_file: bool = False + + # May specify a file to which to append the results. This may be the same file as + # the input or not. If appendOutputToFile is true but the outputFileId is not set, + # create a new file. + output_file_id: Optional[str] = None + + # Arbitrary runtime options which may be passed to a generator + options: Optional[dict] + + +class GenerateResponse(Response): + blocks: List[Block] diff --git a/steamship/data/operations/tagger.py b/steamship/data/operations/tagger.py new file mode 100644 index 0000000000000000000000000000000000000000..dc09ba833cb3871c86553b963810c72fd0abb048 --- /dev/null +++ b/steamship/data/operations/tagger.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from steamship.base.request import Request +from steamship.base.response import Response + +from ..file import File + + +class TagRequest(Request): + type: str = None + id: str = None + name: str = None + handle: str = None + plugin_instance: str = None + file: File = None + + +class TagResponse(Response): + file: File = None diff --git a/steamship/data/package/__init__.py b/steamship/data/package/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8bffdef4441ff2a0f38bd576ad8af41447879dd7 --- /dev/null +++ b/steamship/data/package/__init__.py @@ -0,0 +1,9 @@ +from .package import Package +from .package_instance import PackageInstance +from .package_version import PackageVersion + +__all__ = [ + "Package", + "PackageInstance", + "PackageVersion", +] diff --git a/steamship/data/package/__pycache__/__init__.cpython-39.pyc b/steamship/data/package/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a39f3d56ef65ece2dc1b0effa07f2aeaa90cbca Binary files /dev/null and b/steamship/data/package/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/data/package/__pycache__/package.cpython-39.pyc b/steamship/data/package/__pycache__/package.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1e6479d25c5b2c4ff9539c37290acc2d39b53e1 Binary files /dev/null and b/steamship/data/package/__pycache__/package.cpython-39.pyc differ diff --git a/steamship/data/package/__pycache__/package_instance.cpython-39.pyc b/steamship/data/package/__pycache__/package_instance.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1a7e770fc124385b00b02063fc83a76cfc4019d Binary files /dev/null and b/steamship/data/package/__pycache__/package_instance.cpython-39.pyc differ diff --git a/steamship/data/package/__pycache__/package_version.cpython-39.pyc b/steamship/data/package/__pycache__/package_version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be3032909f070e3092d908161a8a74d9300cc67a Binary files /dev/null and b/steamship/data/package/__pycache__/package_version.cpython-39.pyc differ diff --git a/steamship/data/package/package.py b/steamship/data/package/package.py new file mode 100644 index 0000000000000000000000000000000000000000..f9519342128cf48deabde2d0cbf94d90b34c3181 --- /dev/null +++ b/steamship/data/package/package.py @@ -0,0 +1,73 @@ +# +# This is the CLIENT-side abstraction for an invocable. +# +# If you are implementing a package, see: steamship.invocable.server.App +# + +from __future__ import annotations + +from typing import Any, Optional, Type + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import CreateRequest, GetRequest, UpdateRequest +from steamship.data.manifest import Manifest + + +class PackageCreateRequest(CreateRequest): + is_public: bool = False + fetch_if_exists = False + profile: Optional[Manifest] = None + + +class PackageUpdateRequest(UpdateRequest): + id: Optional[str] = None + handle: Optional[str] = None + description: Optional[str] = None + profile: Optional[Manifest] = None + readme: Optional[str] = None + + +class Package(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + user_id: str = None + profile: Optional[Manifest] = None + description: Optional[str] = None + readme: Optional[str] = None + is_public: bool = False + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["package"] if "package" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + handle: str = None, + profile: Manifest = None, + is_public=False, + fetch_if_exists=False, + ) -> Package: + req = PackageCreateRequest( + handle=handle, profile=profile, is_public=is_public, fetch_if_exists=fetch_if_exists + ) + return client.post("package/create", payload=req, expect=Package) + + @staticmethod + def get(client: Client, handle: str) -> Package: + return client.post("package/get", GetRequest(handle=handle), expect=Package) + + def update(self, client: Client) -> Package: + return client.post( + "package/update", + PackageUpdateRequest( + id=self.id, description=self.description, profile=self.profile, readme=self.readme + ), + expect=Package, + ) diff --git a/steamship/data/package/package_instance.py b/steamship/data/package/package_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..6122e182afa84599f43be1c266843ca4add0d30b --- /dev/null +++ b/steamship/data/package/package_instance.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional, Type + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import DeleteRequest, IdentifierRequest, Request +from steamship.data.workspace import Workspace +from steamship.utils.url import Verb + + +class CreatePackageInstanceRequest(Request): + id: str = None + package_id: str = None + package_handle: str = None + package_version_id: str = None + package_version_handle: str = None + handle: str = None + fetch_if_exists: bool = None + config: Dict[str, Any] = None + workspace_id: str = None + + +class PackageInstance(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + package_id: str = None + package_handle: Optional[str] = None + user_handle: str = None + package_version_id: str = None + package_version_handle: Optional[str] = None + user_id: str = None + invocation_url: str = None + config: Dict[str, Any] = None + workspace_id: str = None + workspace_handle: str = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["packageInstance"] if "packageInstance" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + package_id: str = None, + package_handle: str = None, + package_version_id: str = None, + package_version_handle: str = None, + handle: str = None, + fetch_if_exists: bool = None, + config: Dict[str, Any] = None, + ) -> PackageInstance: + req = CreatePackageInstanceRequest( + handle=handle, + package_id=package_id, + package_handle=package_handle, + package_version_id=package_version_id, + package_version_handle=package_version_handle, + fetch_if_exists=fetch_if_exists, + config=config, + ) + + return client.post("package/instance/create", payload=req, expect=PackageInstance) + + def delete(self) -> PackageInstance: + req = DeleteRequest(id=self.id) + return self.client.post("package/instance/delete", payload=req, expect=PackageInstance) + + def load_missing_workspace_handle(self): + if ( + self.client is not None + and self.workspace_handle is None + and self.workspace_id is not None + ): + # Get the workspaceHandle + workspace = Workspace.get(self.client, id_=self.workspace_id) + if workspace: + self.workspace_handle = workspace.handle + + @staticmethod + def get(client: Client, handle: str) -> PackageInstance: + return client.post( + "package/instance/get", IdentifierRequest(handle=handle), expect=PackageInstance + ) + + def invoke( + self, path: str, verb: Verb = Verb.POST, timeout_s: Optional[float] = None, **kwargs + ): + self.load_missing_workspace_handle() + if path[0] == "/": + path = path[1:] + + return self.client.call( + verb=verb, + operation=f"/{self.workspace_handle or '_'}/{self.handle or '_'}/{path}", + payload=kwargs, + is_package_call=True, + package_owner=self.user_handle, + package_id=self.package_id, + package_instance_id=self.id, + as_background_task=False, + timeout_s=timeout_s, + ) + + def full_url_for(self, path: str): + return f"{self.invocation_url}{path}" diff --git a/steamship/data/package/package_version.py b/steamship/data/package/package_version.py new file mode 100644 index 0000000000000000000000000000000000000000..aaac275d9e09ccaf09eb68020fb4b45a6789606b --- /dev/null +++ b/steamship/data/package/package_version.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import json +from typing import Any, Dict, Type + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import Request + + +class CreatePackageVersionRequest(Request): + package_id: str = None + handle: str = None + type: str = "file" + hosting_handler: str = None + # Note: this is a Dict[str, Any] but should be transmitted to the Engine as a JSON string + config_template: str = None + + +class PackageVersion(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + package_id: str = None + handle: str = None + config_template: Dict[str, Any] = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["packageVersion"] if "packageVersion" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + package_id: str = None, + handle: str = None, + filename: str = None, + filebytes: bytes = None, + config_template: Dict[str, Any] = None, + hosting_handler: str = None, + ) -> PackageVersion: + + if filename is None and filebytes is None: + raise Exception("Either filename or filebytes must be provided.") + if filename is not None and filebytes is not None: + raise Exception("Only either filename or filebytes should be provided.") + + if filename is not None: + with open(filename, "rb") as f: + filebytes = f.read() + + req = CreatePackageVersionRequest( + handle=handle, + package_id=package_id, + config_template=json.dumps(config_template or {}), + hosting_handler=hosting_handler, + ) + + task = client.post( + "package/version/create", + payload=req, + file=("package.zip", filebytes, "multipart/form-data"), + expect=PackageVersion, + ) + task.wait() + return task.output diff --git a/steamship/data/plugin/__init__.py b/steamship/data/plugin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e456085f95697eee99e0c3202b7495abaa574f2 --- /dev/null +++ b/steamship/data/plugin/__init__.py @@ -0,0 +1,18 @@ +from .hosting import HostingCpu, HostingEnvironment, HostingMemory, HostingTimeout, HostingType +from .plugin import Plugin, PluginAdapterType, PluginTargetType, PluginType +from .plugin_instance import PluginInstance +from .plugin_version import PluginVersion + +__all__ = [ + "Plugin", + "PluginVersion", + "PluginInstance", + "HostingMemory", + "HostingTimeout", + "HostingCpu", + "HostingEnvironment", + "HostingType", + "PluginType", + "PluginAdapterType", + "PluginTargetType", +] diff --git a/steamship/data/plugin/__pycache__/__init__.cpython-39.pyc b/steamship/data/plugin/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25bdf2ea475aa08a0a3da77d9fb759b75c87326e Binary files /dev/null and b/steamship/data/plugin/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/hosting.cpython-39.pyc b/steamship/data/plugin/__pycache__/hosting.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a196eced4b92df59dfb2b81f34272ec57a76743f Binary files /dev/null and b/steamship/data/plugin/__pycache__/hosting.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/index_plugin_instance.cpython-39.pyc b/steamship/data/plugin/__pycache__/index_plugin_instance.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4706a2427caf5dbc70954c726e53b9cef1624354 Binary files /dev/null and b/steamship/data/plugin/__pycache__/index_plugin_instance.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/plugin.cpython-39.pyc b/steamship/data/plugin/__pycache__/plugin.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b92b0c8895fc6a028fd3cb6775c1493b27ea8383 Binary files /dev/null and b/steamship/data/plugin/__pycache__/plugin.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/plugin_instance.cpython-39.pyc b/steamship/data/plugin/__pycache__/plugin_instance.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cce0c8f7d50e369c2e3da57c5c535f869f04ca78 Binary files /dev/null and b/steamship/data/plugin/__pycache__/plugin_instance.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/plugin_version.cpython-39.pyc b/steamship/data/plugin/__pycache__/plugin_version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b023d57486dabde67158fb6ff1b2ff60557a14f2 Binary files /dev/null and b/steamship/data/plugin/__pycache__/plugin_version.cpython-39.pyc differ diff --git a/steamship/data/plugin/__pycache__/prompt_generation_plugin_instance.cpython-39.pyc b/steamship/data/plugin/__pycache__/prompt_generation_plugin_instance.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f56562bb5d1f5483984c968a34487a0217b162b7 Binary files /dev/null and b/steamship/data/plugin/__pycache__/prompt_generation_plugin_instance.cpython-39.pyc differ diff --git a/steamship/data/plugin/hosting.py b/steamship/data/plugin/hosting.py new file mode 100644 index 0000000000000000000000000000000000000000..2785c94d9d6d813d6377c156f060d5a49075ee2e --- /dev/null +++ b/steamship/data/plugin/hosting.py @@ -0,0 +1,66 @@ +from enum import Enum + + +class HostingType(str, Enum): + """The type of hosting provider to deploy to.""" + + LAMBDA = "lambda" + ECS = "ecs" + + +class HostingEnvironment(str, Enum): + """The software environment required for deployment.""" + + PYTHON38 = "python38" + STEAMSHIP_PYTORCH_CPU = "inferenceCpu" + + +class HostingMemory(str, Enum): + """The amount of memory required for deployment. + + This is mapped to a value dependent on the HostingType it is combined with. + """ + + MIN = "min" + XXS = "xxs" + XS = "xs" + SM = "sm" + MD = "md" + LG = "lg" + XL = "xl" + XXL = "xxl" + MAX = "max" + + +class HostingCpu(str, Enum): + """The amount of CPU required for deployment. + + This is mapped to a value dependent on the HostingType it is combined with. + """ + + MIN = "min" + XXS = "xxs" + XS = "xs" + SM = "sm" + MD = "md" + LG = "lg" + XL = "xl" + XXL = "xxl" + MAX = "max" + + +class HostingTimeout(str, Enum): + """The request timeout required for deployment. + + This is mapped to a value dependent on the HostingType it is combined with. + """ + + MIN = "min" + XXS = "xxs" + XS = "xs" + SM = "sm" + MD = "md" + LG = "lg" + XL = "xl" + XXL = "xxl" + MAX = "max" diff --git a/steamship/data/plugin/index_plugin_instance.py b/steamship/data/plugin/index_plugin_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..8501c7e7c3fcd0c2f798ec603426576aa1885deb --- /dev/null +++ b/steamship/data/plugin/index_plugin_instance.py @@ -0,0 +1,202 @@ +from typing import Any, Dict, List, Optional, Union, cast + +from pydantic import Field + +from steamship.base.client import Client +from steamship.base.error import SteamshipError +from steamship.base.model import CamelModel +from steamship.base.tasks import Task +from steamship.data.embeddings import EmbeddedItem, EmbeddingIndex, QueryResult, QueryResults +from steamship.data.plugin.plugin_instance import PluginInstance +from steamship.data.tags.tag import Tag + + +class EmbedderInvocation(CamelModel): + """The parameters capable of creating/fetching an Embedder (Tagger) Plugin Instance.""" + + plugin_handle: str + instance_handle: Optional[str] = None + config: Optional[Dict[str, Any]] = None + version: Optional[str] = None + fetch_if_exists: bool = True + + +class SearchResult(CamelModel): + """A single scored search result -- which is always a tag. + + This class is intended to eventually replace the QueryResult object currently used with the Embedding layer.""" + + tag: Optional[Tag] = None + score: Optional[float] = None + + @staticmethod + def from_query_result(query_result: QueryResult) -> "SearchResult": + hit = query_result.value + value = hit.metadata or {} + + # To make this change Python-only, some fields are stached in `hit.metadata`. + # This has the temporary consequence of these keys not being safe. This will be resolved when we spread + # this refactor to the engine. + block_id = None + if "_block_id" in value: + block_id = value.get("_block_id") + del value["_block_id"] + + file_id = None + if "_file_id" in value: + file_id = value.get("_file_id") + del value["_file_id"] + + tag_id = None + if "_tag_id" in value: + tag_id = value.get("_tag_id") + del value["_tag_id"] + + tag = Tag( + id=hit.id, + kind=hit.external_type, + name=hit.external_id, + block_id=block_id, + tag_id=tag_id, + file_id=file_id, + text=hit.value, + value=value, + ) + return SearchResult(tag=tag, score=query_result.score) + + +class SearchResults(CamelModel): + """Results of a search operation -- which is always a list of ranked tag. + + This class is intended to eventually replace the QueryResults object currently used with the Embedding layer. + TODO: add in paging support.""" + + items: List[SearchResult] = None + + @staticmethod + def from_query_results(query_results: QueryResults) -> "SearchResults": + items = [SearchResult.from_query_result(qr) for qr in query_results.items or []] + return SearchResults(items=items) + + +class EmbeddingIndexPluginInstance(PluginInstance): + """A persistent, read-optimized index over embeddings. + + This is currently implemented as an object which behaves like a PluginInstance even though + it isn't from an implementation perspective on the back-end. + """ + + client: Client = Field(None, exclude=True) + embedder: PluginInstance = Field(None, exclude=True) + index: EmbeddingIndex = Field(None, exclude=True) + + def delete(self): + """Delete the EmbeddingIndexPluginInstnace. + + For now, we will have this correspond to deleting the `index` but not the `embedder`. This is likely + a temporary design. + """ + return self.index.delete() + + def insert(self, tags: Union[Tag, List[Tag]], allow_long_records: bool = False): + """Insert tags into the embedding index.""" + + # Make a list if a single tag was provided + if isinstance(tags, Tag): + tags = [tags] + + for tag in tags: + if not tag.text: + raise SteamshipError( + message="Please set the `text` field of your Tag before inserting it into an index." + ) + + # Now we need to prepare an EmbeddingIndexItem of a particular shape that encodes the tag. + metadata = tag.value or {} + if not isinstance(metadata, dict): + raise SteamshipError( + "Only Tags with a dict or None value can be embedded. " + + f"This tag had a value of type: {type(tag.value)}" + ) + + # To make this change Python-only, some fields are stached in `hit.metadata`. + # This has the temporary consequence of these keys not being safe. This will be resolved when we spread + # this refactor to the engine. + metadata["_file_id"] = tag.file_id + metadata["_tag_id"] = tag.id + metadata["_block_id"] = tag.block_id + tag.value = metadata + + embedded_items = [ + EmbeddedItem( + value=tag.text, + external_id=tag.name, + external_type=tag.kind, + metadata=tag.value, + ) + for tag in tags + ] + + # We always reindex in this new style; to not do so is to expose details (when embedding occurrs) we'd rather + # not have users exercise control over. + self.index.insert_many(embedded_items, reindex=True, allow_long_records=allow_long_records) + + def search(self, query: str, k: Optional[int] = None) -> Task[SearchResults]: + """Search the embedding index. + + This wrapper implementation simply projects the `Hit` data structure into a `Tag` + """ + if query is None or len(query.strip()) == 0: + raise SteamshipError(message="Query field must be non-empty.") + + # Metadata will always be included; this is the equivalent of Tag.value + wrapped_result = self.index.search(query, k=k, include_metadata=True) + + # For now, we'll have to do this synchronously since we're trying to avoid changing things on the engine. + wrapped_result.wait() + + # We're going to do a switcheroo on the output type of Task here. + search_results = SearchResults.from_query_results(wrapped_result.output) + wrapped_result.output = search_results + + # Return the index's search result, but projected into the data structure of Tags + return cast(Task[SearchResults], wrapped_result) + + @staticmethod + def create( + client: Any, + plugin_id: str = None, + plugin_handle: str = None, + plugin_version_id: str = None, + plugin_version_handle: str = None, + handle: str = None, + fetch_if_exists: bool = True, + config: Dict[str, Any] = None, + ) -> "EmbeddingIndexPluginInstance": + """Create a class that simulates an embedding index re-implemented as a PluginInstance.""" + + # Perform a manual config validation check since the configuration isn't actually being sent up to the Engine. + # In this case, an embedding index has special behavior which is to instantiate/fetch an Embedder that it can use. + if "embedder" not in config: + raise SteamshipError( + message="Config key missing. Please include a field named `embedder` with type `EmbedderInvocation`." + ) + + # Just for pydantic validation. + embedder_invocation = EmbedderInvocation.parse_obj(config["embedder"]) + + # Create the embedder + embedder = client.use_plugin(**embedder_invocation.dict()) + + # Create the index + index = EmbeddingIndex.create( + client=client, + handle=handle, + embedder_plugin_instance_handle=embedder.handle, + fetch_if_exists=fetch_if_exists, + ) + + # Now return the plugin wrapper + return EmbeddingIndexPluginInstance( + id=index.id, handle=index.handle, index=index, embedder=embedder + ) diff --git a/steamship/data/plugin/plugin.py b/steamship/data/plugin/plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..8776f9f801952687f1e9078d227dbb818c8943a6 --- /dev/null +++ b/steamship/data/plugin/plugin.py @@ -0,0 +1,146 @@ +# Plugin +# +# This file contains the abstractions for managing Steamship plugins. +# To see how to implement a Steamship Plugin, see plugin_service.py in the same folder. +# +# + +from __future__ import annotations + +import json +from enum import Enum +from typing import Any, Dict, List, Optional, Type, Union + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import IdentifierRequest, Request, UpdateRequest +from steamship.base.response import Response +from steamship.data.manifest import Manifest + +from .hosting import HostingType + + +class CreatePluginRequest(Request): + training_platform: Optional[HostingType] = None + id: str = None + type: str = None + transport: str = None + is_public: bool = None + handle: str = None + description: str = None + metadata: str = None + fetch_if_exists: bool = False + + +class PluginUpdateRequest(UpdateRequest): + id: Optional[str] = None + handle: Optional[str] = None + description: Optional[str] = None + profile: Optional[Manifest] = None + readme: Optional[str] = None + + +class ListPluginsRequest(Request): + type: Optional[str] = None + + +class ListPluginsResponse(Response): + plugins: List[Plugin] + + +class PluginType(str, Enum): + parser = "parser" + classifier = "classifier" + tagger = "tagger" + embedder = "embedder" + generator = "generator" + + +class PluginAdapterType(str, Enum): + steamship_docker = "steamshipDocker" + steamship_sagemaker = "steamshipSagemaker" + huggingface = "huggingface" + openai = "openai" + + +class PluginTargetType(str, Enum): + FILE = "file" + WORKSPACE = "workspace" + + +class Plugin(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + type: str = None + transport: str = None + is_public: bool = None + training_platform: Optional[HostingType] = None + handle: str = None + description: str = None + metadata: str = None + profile: Optional[Manifest] = None + readme: Optional[str] = None + user_id: Optional[str] = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["plugin"] if "plugin" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + description: str, + type_: str, + transport: str, + is_public: bool, + handle: str = None, + training_platform: Optional[HostingType] = None, + metadata: Union[str, Dict, List] = None, + fetch_if_exists: bool = False, + ) -> Plugin: + if isinstance(metadata, dict) or isinstance(metadata, list): + metadata = json.dumps(metadata) + + req = CreatePluginRequest( + training_platform=training_platform, + type=type_, + transport=transport, + is_public=is_public, + handle=handle, + description=description, + metadata=metadata, + fetch_if_exists=fetch_if_exists, + ) + return client.post( + "plugin/create", + req, + expect=Plugin, + ) + + @staticmethod + def list(client: Client, t: str = None) -> ListPluginsResponse: + return client.post( + "plugin/list", + ListPluginsRequest(type=t), + expect=ListPluginsResponse, + ) + + @staticmethod + def get(client: Client, handle: str): + return client.post("plugin/get", IdentifierRequest(handle=handle), expect=Plugin) + + def update(self, client: Client) -> Plugin: + return client.post( + "plugin/update", + PluginUpdateRequest( + id=self.id, description=self.description, profile=self.profile, readme=self.readme + ), + expect=Plugin, + ) + + +ListPluginsResponse.update_forward_refs() diff --git a/steamship/data/plugin/plugin_instance.py b/steamship/data/plugin/plugin_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..39d171c1b0e5d966d5f8d7ab12e2e696310d3f18 --- /dev/null +++ b/steamship/data/plugin/plugin_instance.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional, Type, Union + +from pydantic import BaseModel, Field + +from steamship.base import Task +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import DeleteRequest, IdentifierRequest, Request +from steamship.data.block import Block +from steamship.data.file import File +from steamship.data.operations.generator import GenerateRequest, GenerateResponse +from steamship.data.operations.tagger import TagRequest, TagResponse +from steamship.data.plugin import ( + HostingCpu, + HostingEnvironment, + HostingMemory, + HostingTimeout, + HostingType, +) +from steamship.plugin.inputs.export_plugin_input import ExportPluginInput +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput +from steamship.plugin.outputs.train_plugin_output import TrainPluginOutput +from steamship.plugin.outputs.training_parameter_plugin_output import TrainingParameterPluginOutput + + +class CreatePluginInstanceRequest(Request): + id: str = None + plugin_id: str = None + plugin_handle: str = None + plugin_version_id: str = None + plugin_version_handle: str = None + handle: str = None + fetch_if_exists: bool = None + config: Dict[str, Any] = None + + +SIGNED_URL_EXPORTER_INSTANCE_HANDLE = "signed-url-exporter-1.0" + + +class PluginInstance(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + plugin_id: str = None + plugin_version_id: str = None + plugin_handle: Optional[str] = None + plugin_version_handle: Optional[str] = None + workspace_id: Optional[str] = None + user_id: str = None + config: Dict[str, Any] = None + hosting_type: Optional[HostingType] = None + hosting_cpu: Optional[HostingCpu] = None + hosting_memory: Optional[HostingMemory] = None + hosting_timeout: Optional[HostingTimeout] = None + hosting_environment: Optional[HostingEnvironment] = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["pluginInstance"] if "pluginInstance" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + plugin_id: str = None, + plugin_handle: str = None, + plugin_version_id: str = None, + plugin_version_handle: str = None, + handle: str = None, + fetch_if_exists: bool = True, + config: Dict[str, Any] = None, + ) -> PluginInstance: + """Create a plugin instance + + When handle is empty the engine will automatically assign one + fetch_if_exists controls whether we want to re-use an existing plugin instance or not.""" + req = CreatePluginInstanceRequest( + handle=handle, + plugin_id=plugin_id, + plugin_handle=plugin_handle, + plugin_version_id=plugin_version_id, + plugin_version_handle=plugin_version_handle, + fetch_if_exists=fetch_if_exists, + config=config, + ) + + return client.post("plugin/instance/create", payload=req, expect=PluginInstance) + + @staticmethod + def get(client: Client, handle: str) -> PluginInstance: + return client.post( + "plugin/instance/get", IdentifierRequest(handle=handle), expect=PluginInstance + ) + + def tag( + self, + doc: Union[str, File], + ) -> Task[ + TagResponse + ]: # TODO (enias): Should we remove this helper function in favor of always working with files? + req = TagRequest( + type="inline", + file=File(blocks=[Block(text=doc)]) if isinstance(doc, str) else doc, + plugin_instance=self.handle, + ) + return self.client.post( + "plugin/instance/tag", + req, + expect=TagResponse, + ) + + def generate( + self, + input_file_id: str = None, + input_file_start_block_index: int = None, + input_file_end_block_index: Optional[int] = None, + text: Optional[str] = None, + # bytes: Optional[bytes] = None, [Not yet implemented] + block_query: Optional[str] = None, + # url: Optional[str] = None, [Not yet implemented] + append_output_to_file: bool = False, + output_file_id: Optional[str] = None, + options: Optional[dict] = None, + ): + req = GenerateRequest( + plugin_instance=self.handle, + input_file_id=input_file_id, + input_file_start_block_index=input_file_start_block_index, + input_file_end_block_index=input_file_end_block_index, + text=text, + # bytes=bytes, + block_query=block_query, + # url=url, + append_output_to_file=append_output_to_file, + output_file_id=output_file_id, + options=options, + ) + return self.client.post("plugin/instance/generate", req, expect=GenerateResponse) + + def delete(self) -> PluginInstance: + req = DeleteRequest(id=self.id) + return self.client.post("plugin/instance/delete", payload=req, expect=PluginInstance) + + def train( + self, + training_request: TrainingParameterPluginInput = None, + training_epochs: Optional[int] = None, + export_query: Optional[str] = None, + testing_holdout_percent: Optional[float] = None, + test_split_seed: Optional[int] = None, + training_params: Optional[Dict] = None, + inference_params: Optional[Dict] = None, + ) -> Task[TrainPluginOutput]: + """Train a plugin instance. Please provide either training_request OR the other parameters; passing + training_request ignores all other parameters, but is kept for backwards compatibility. + """ + input_params = training_request or TrainingParameterPluginInput( + plugin_instance=self.handle, + training_epochs=training_epochs, + export_plugin_input=ExportPluginInput( + plugin_instance=SIGNED_URL_EXPORTER_INSTANCE_HANDLE, type="file", query=export_query + ), + testing_holdout_percent=testing_holdout_percent, + test_split_seed=test_split_seed, + training_params=training_params, + inference_params=inference_params, + ) + return self.client.post( + "plugin/instance/train", + payload=input_params, + expect=TrainPluginOutput, + ) + + def get_training_parameters( + self, training_request: TrainingParameterPluginInput + ) -> TrainingParameterPluginOutput: + return self.client.post( + "plugin/instance/getTrainingParameters", + payload=training_request, + expect=TrainingParameterPluginOutput, + ) diff --git a/steamship/data/plugin/plugin_version.py b/steamship/data/plugin/plugin_version.py new file mode 100644 index 0000000000000000000000000000000000000000..c2512e572ea880adb16f70de7dbaf4ae3a2a02ec --- /dev/null +++ b/steamship/data/plugin/plugin_version.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional, Type + +from pydantic import BaseModel, Field + +from steamship.base import Task +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import Request +from steamship.base.response import Response +from steamship.data.plugin import HostingMemory, HostingTimeout + + +class CreatePluginVersionRequest(Request): + plugin_id: str = None + handle: str = None + hosting_memory: Optional[HostingMemory] = None + hosting_timeout: Optional[HostingTimeout] = None + hosting_handler: str = None + is_public: bool = None + is_default: bool = None + type: str = "file" + # Note: this is a Dict[str, Any] but should be transmitted to the Engine as a JSON string + config_template: str = None + + +class ListPluginVersionsRequest(Request): + handle: str + plugin_id: str + + +class ListPluginVersionsResponse(Response): + plugins: List[PluginVersion] + + +class PluginVersion(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + plugin_id: str = None + handle: str = None + hosting_memory: Optional[HostingMemory] = None + hosting_timeout: Optional[HostingTimeout] = None + hosting_handler: str = None + is_public: bool = None + is_default: bool = None + config_template: Dict[str, Any] = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["pluginVersion"] if "pluginVersion" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def create( + client: Client, + handle: str, + plugin_id: str = None, + filename: str = None, + filebytes: bytes = None, + hosting_memory: Optional[HostingMemory] = None, + hosting_timeout: Optional[HostingTimeout] = None, + hosting_handler: str = None, + is_public: bool = None, + is_default: bool = None, + config_template: Dict[str, Any] = None, + ) -> Task[PluginVersion]: + + if filename is None and filebytes is None: + raise Exception("Either filename or filebytes must be provided.") + if filename is not None and filebytes is not None: + raise Exception("Only either filename or filebytes should be provided.") + + if filename is not None: + with open(filename, "rb") as f: + filebytes = f.read() + + req = CreatePluginVersionRequest( + handle=handle, + plugin_id=plugin_id, + hosting_memory=hosting_memory, + hosting_timeout=hosting_timeout, + hosting_handler=hosting_handler, + is_public=is_public, + is_default=is_default, + config_template=json.dumps(config_template or {}), + ) + + task = client.post( + "plugin/version/create", + payload=req, + file=("plugin.zip", filebytes, "multipart/form-data"), + expect=PluginVersion, + ) + + task.wait() + return task.output + + @staticmethod + def list( + client: Client, plugin_id: str = None, handle: str = None, public: bool = True + ) -> ListPluginVersionsResponse: + return client.post( + f"plugin/version/{'public' if public else 'private'}", + ListPluginVersionsRequest(handle=handle, plugin_id=plugin_id), + expect=ListPluginVersionsResponse, + ) diff --git a/steamship/data/plugin/prompt_generation_plugin_instance.py b/steamship/data/plugin/prompt_generation_plugin_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..a74923ea516c4a9002d7d1b4211439d1825ab39b --- /dev/null +++ b/steamship/data/plugin/prompt_generation_plugin_instance.py @@ -0,0 +1,110 @@ +import logging +from typing import Any, Dict, Optional + +from steamship.base.client import Client +from steamship.base.error import SteamshipError +from steamship.data.plugin.plugin_instance import CreatePluginInstanceRequest, PluginInstance +from steamship.data.tags.tag_constants import TagKind, TagValueKey + + +class PromptGenerationPluginInstance(PluginInstance): + """An instance of a configured prompt completion service such as GPT-3. + + The `generate` method synchronously invokes the prompt against a set of variables that parameterize it. + The return value is a single string. + + Example Usage: + llm = Steamship.use('prompt-generation-default', config={ "temperature": 0.9 }) + PROMPT = "Greet {name} as if he were a {relation}." + greeting = llm.generate(PROMPT, {"name": "Ted", "relation": "old friend"}) + """ + + def generate( + self, prompt: str, variables: Optional[Dict] = None, clean_output: bool = True + ) -> str: + """Complete the provided prompt, interpolating any variables.""" + + # Interpolate the prompt with Python formatting semantics. If no variables provided, supply an empty dict. + try: + prompt_text = prompt.format(**(variables or {})) + except KeyError as e: + raise SteamshipError( + message="Some variables in the prompt template were not provided.", error=e + ) + + # This requests generation from the parameterized prompt. Tagging with our prompt generator + # plugin will result in a new tag that contains the generated output. + tag_task = self.tag(doc=prompt_text) + + # We `wait()` because generation of text is done asynchronously and may take a few moments + # (somewhat depending on the complexity of your prompt). + tag_task.wait() + + # Here, we iterate through the content blocks associated with a file + # as well as any tags on that content to find the generated text. + # + # The Steamship data model provides flexible content organization, + # storage, and lookup. Read more about the data model via: + # https://docs.steamship.com/workspaces/data_model/index.html + try: + for text_block in tag_task.output.file.blocks: + for block_tag in text_block.tags: + if block_tag.kind == TagKind.GENERATION: + generation = block_tag.value[TagValueKey.STRING_VALUE] + if clean_output: + return self._clean_output(generation) + else: + return generation + except Exception as e: + logging.error( + "generate() got unexpected response shape back. This suggests an error rather an merely an empty response." + ) + logging.exception(e) + raise e + return "" + + @staticmethod + def create( + client: Client, + plugin_id: str = None, + plugin_handle: str = None, + plugin_version_id: str = None, + plugin_version_handle: str = None, + handle: str = None, + fetch_if_exists: bool = True, + config: Dict[str, Any] = None, + ) -> "PromptGenerationPluginInstance": + """Create a plugin instance + + When handle is empty the engine will automatically assign one + fetch_if_exists controls whether we want to re-use an existing plugin instance or not.""" + req = CreatePluginInstanceRequest( + handle=handle, + plugin_id=plugin_id, + plugin_handle=plugin_handle, + plugin_version_id=plugin_version_id, + plugin_version_handle=plugin_version_handle, + fetch_if_exists=fetch_if_exists, + config=config, + ) + + return client.post( + "plugin/instance/create", payload=req, expect=PromptGenerationPluginInstance + ) + + def _clean_output(self, text: str): + """Remove any leading/trailing whitespace and partial sentences. + + This assumes that your generated output will include consistent punctuation. You may + want to alter this method to better fit the format of your generated text. + """ + last_punc = -1 + for i, c in enumerate(reversed(text)): + if c in '.!?"': + last_punc = len(text) - i + break + if last_punc != -1: + result = text[: last_punc + 1] + else: + result = text + return result.strip() diff --git a/steamship/data/search.py b/steamship/data/search.py new file mode 100644 index 0000000000000000000000000000000000000000..c878800ada4e9a574229ee98a52ba5ce2f00c5f1 --- /dev/null +++ b/steamship/data/search.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import json +from json import JSONDecodeError +from typing import Any + +from steamship.base.model import CamelModel + + +class Hit(CamelModel): + id: str = None + index: int = None + index_source: str = None + value: str = None + score: float = None + external_id: str = None + external_type: str = None + metadata: Any = None + query: str = None + + def __init__(self, **kwargs): + super().__init__(**kwargs) + metadata = kwargs.get("metadata") + if metadata is not None: + try: + self.metadata = json.loads(metadata) + except JSONDecodeError: + pass diff --git a/steamship/data/tags/__init__.py b/steamship/data/tags/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d9766ef9f9dab6ee7106bc40148b35ec7e556ce7 --- /dev/null +++ b/steamship/data/tags/__init__.py @@ -0,0 +1,11 @@ +from .tag import Tag +from .tag_constants import DocTag, GenerationTag, TagKind, TagValueKey, TokenTag + +__all__ = [ + "DocTag", + "Tag", + "TagKind", + "TokenTag", + "TagValueKey", + "GenerationTag", +] diff --git a/steamship/data/tags/__pycache__/__init__.cpython-39.pyc b/steamship/data/tags/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63f9fc1b297887130a2330d6ac7883fe7e2061f4 Binary files /dev/null and b/steamship/data/tags/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/data/tags/__pycache__/tag.cpython-39.pyc b/steamship/data/tags/__pycache__/tag.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..28be42616528b32a86fa81fc6c15e0743dccd7b8 Binary files /dev/null and b/steamship/data/tags/__pycache__/tag.cpython-39.pyc differ diff --git a/steamship/data/tags/__pycache__/tag_constants.cpython-39.pyc b/steamship/data/tags/__pycache__/tag_constants.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b85f8b506bfbe491ba02f5abae0ce93b56787a1 Binary files /dev/null and b/steamship/data/tags/__pycache__/tag_constants.cpython-39.pyc differ diff --git a/steamship/data/tags/tag.py b/steamship/data/tags/tag.py new file mode 100644 index 0000000000000000000000000000000000000000..9d16bb711ee41029d1d71057fd7a39a72eb93d80 --- /dev/null +++ b/steamship/data/tags/tag.py @@ -0,0 +1,340 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import Field + +from steamship import SteamshipError +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import Request +from steamship.base.response import Response +from steamship.data.tags.tag_constants import GenerationTag, TagKind, TagValueKey + + +class TagQueryRequest(Request): + tag_filter_query: str + + +class Tag(CamelModel): + # Steamship client. + client: Client = Field(None, exclude=True) + + # ID of the tag in the database. + id: str = None + + # ID of the file associated with the tag. + file_id: str = None + + # ID of the block associated with the tag. If not None, `start_idx` and `end_idx` should be set. + block_id: Optional[str] = None + + # The kind of tag. See the ``TagKind`` enum class for suggestions. + kind: Union[TagKind, str] = None # E.g. ner + + # The name of tag. See the ``DocTag``, ``TokenTag``, etc enum classes for suggestions. + name: Optional[str] = None # E.g. person + + # The value payload of the tag. Always a JSON-style object. + value: Optional[Dict[Union[TagValueKey, str], Any]] = None + + # Character index in associated block of the start of the span of text this tag comments upon. Start-inclusive. + start_idx: Optional[int] = None + + # Character index in associated block of the end of the span of text this tag comments upon. End-exclusive. + end_idx: Optional[int] = None + + # The text covered by the tag. + # Note: + # The text will not always be materialized into the tag object + # itself; you may have to fetch it with file.text[tag.start_idx:tag.end_idx] + # Note: + # Changing this field will not result in changes to Steamship's database. + # TODO(ted): Consider refactoring as a read-only property. + # + text: Optional[str] = None + + class DeleteRequest(Request): + id: str = None + file_id: str = None + block_id: str = None + + class ListRequest(Request): + file_id: str = None + block_id: str = None + + class ListResponse(Response): + tags: List[Tag] = None + + @staticmethod + def create( + client: Client, + file_id: str = None, + block_id: str = None, + kind: str = None, + name: str = None, + start_idx: int = None, + end_idx: int = None, + value: Dict[str, Any] = None, + ) -> Tag: + req = Tag( + file_id=file_id, + block_id=block_id, + kind=kind, + name=name, + start_idx=start_idx, + end_idx=end_idx, + value=value, + ) + return client.post("tag/create", req, expect=Tag) + + def delete(self) -> Tag: + return self.client.post( + "tag/delete", + Tag.DeleteRequest(id=self.id, file_id=self.file_id, block_id=self.block_id), + expect=Tag, + ) + + def index(self, plugin_instance: Any = None): + """Index this tag.""" + return plugin_instance.insert(self) + + @staticmethod + def query( + client: Client, + tag_filter_query: str, + ) -> TagQueryResponse: + req = TagQueryRequest(tag_filter_query=tag_filter_query) + res = client.post( + "tag/query", + payload=req, + expect=TagQueryResponse, + ) + return res + + +class TimestampTag(Tag): + def __init__( + self, + start_time_s: float, + end_time_s: float, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + value = value or {} + super().__init__( + kind=TagKind.TIMESTAMP, + start_idx=start_idx, + end_idx=end_idx, + value={ + **value, + TagValueKey.START_TIME_S: start_time_s, + TagValueKey.END_TIME_S: end_time_s, + }, + ) + + +class TokenizationTag(Tag): + class Type(str, Enum): + PARAGRAPH = "paragraph" + SENTENCE = "sentence" + WORD = "word" + CHARACTER = "character" + + def __init__( + self, + type=Type, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.TOKENIZATION, name=type, start_idx=start_idx, end_idx=end_idx, value=value + ) + + +class SummaryTag(Tag): + def __init__( + self, + summary: str, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + value = value or {} + super().__init__( + kind=TagKind.SUMMARY, + start_idx=start_idx, + end_idx=end_idx, + value={**value, TagValueKey.VALUE: summary}, + ) + + +class TopicTag(Tag): + def __init__( + self, + topic: str, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.TOPIC, name=topic, start_idx=start_idx, end_idx=end_idx, value=value + ) + + +class EmotionTag(Tag): + class Emotion(str, Enum): + POSITIVE = "positive" + NEUTRAL = "neutral" + NEGATIVE = "negative" + HAPPINESS = "happiness" + SADNESS = "sadness" + JOY = "joy" + LOVE = "love" + ANGER = "anger" + FEAR = "fear" + SURPRISE = "surprise" + HUMOR = "humor" + CONCERN = "concern" + SERIOUSNESS = "seriousness" + SCORE = "score" + + def __init__( + self, + emotion: Emotion, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.EMOTION, name=emotion, start_idx=start_idx, end_idx=end_idx, value=value + ) + + +class IntentTag(Tag): + class Intent(str, Enum): + SALUTATION = "salutation" + PRAISE = "praise" + COMPLAINT = "complaint" + QUESTION = "question" + REQUEST = "request" + EXPLANATION = "explanation" + SCHEDULING_REQUEST = "scheduling-request" + ARE_YOU_THERE = "are-you-there" + REVISITING_TOPIC = "revisiting-topic" + + def __init__( + self, + intent: Intent, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.INTENT, name=intent, start_idx=start_idx, end_idx=end_idx, value=value + ) + + +class EntityTag(Tag): + class EntityType(str, Enum): + PERSON = "person" + ORGANIZATION = "organization" + PRODUCT = "product" + LOCATION = "location" + DATE = "date" + TIME = "time" + MONEY = "money" + PERCENT = "percent" + FACILITY = "facility" + GEO_POLITICAL_ENTITY = "geo-political-entity" + + def __init__( + self, + entity_name: str, + entity_type: EntityType, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + if TagValueKey.ENTITY_NAME in value: + raise SteamshipError( + f"The value of your EntityTag cannot contain the key {TagValueKey.ENTITY_NAME}." + ) + super().__init__( + kind=TagKind.ENTITY, + name=entity_type, + start_idx=start_idx, + end_idx=end_idx, + value={**value, TagValueKey.ENTITY_NAME: entity_name}, + ) + + +class SentimentTag(Tag): + class Sentiment(str, Enum): + POSITIVE = "positive" + NEUTRAL = "neutral" + NEGATIVE = "negative" + SCORE = "score" + + def __init__( + self, + sentiment: Sentiment, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.ENTITY, name=sentiment, start_idx=start_idx, end_idx=end_idx, value=value + ) + + +class EmbeddingTag(Tag): + def __init__( + self, + embedding: List[Union[float, int]] = None, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.EMBEDDING, + name=EmbeddingTag, + start_idx=start_idx, + end_idx=end_idx, + value={**value, TagValueKey.VECTOR_VALUE: embedding}, + ) + + +class PromptCompletionTag(Tag): + def __init__( + self, + text: str = None, + start_idx: Optional[int] = None, + end_idx: Optional[int] = None, + value: Optional[Dict[str, Any]] = None, + ): + super().__init__( + kind=TagKind.GENERATION, + name=GenerationTag.PROMPT_COMPLETION, + start_idx=start_idx, + end_idx=end_idx, + value={**value, TagValueKey.STRING_VALUE: text}, + ) + + +class TagQueryResponse(Response): + tags: List[Tag] + + +Tag.ListResponse.update_forward_refs() +TimestampTag.update_forward_refs() +TopicTag.update_forward_refs() +SummaryTag.update_forward_refs() +TokenizationTag.update_forward_refs() +SentimentTag.update_forward_refs() +EntityTag.update_forward_refs() +IntentTag.update_forward_refs() +EmotionTag.update_forward_refs() diff --git a/steamship/data/tags/tag_constants.py b/steamship/data/tags/tag_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..6baf3f62d3bdc060bae163bec99459d1ac122f47 --- /dev/null +++ b/steamship/data/tags/tag_constants.py @@ -0,0 +1,246 @@ +from enum import Enum +from typing import Optional + + +class TagKind(str, Enum): + """A set of `kind` constants for Tags. + + These define broad categories of tags. Suggested `name` values for each category are found in + separate enums. For example: kind=TagKind.DOCUMENT, name=DocTag.H1 + """ + + PART_OF_SPEECH = "part-of-speech" + DEPENDENCY = "dependency" + SENTIMENT = "sentiment" + EMOTION = "emotion" + ENTITY = "entity" + DOCUMENT = "document" + TOKEN = "token" # noqa: S105 + INTENT = "intent" + EMBEDDING = "embedding" + GENERATION = "generation" + PROVENANCE = "provenance" + TOPIC = "topic" + TOKENIZATION = "tokenization" + KIND = "summary" + TIMESTAMP = "timestamp" + SUMMARY = "summary" + SEARCH_RESULT = "search-result" + ROLE = "role" + + +class DocTag(str, Enum): + """A set of `name` constants for Tags with a `kind` of `TagKind.doc`; appropriate for HTML and Markdown ideas.""" + + DOCUMENT = "document" + PAGE = "page" # E.g. in a PDF + REGION = "region" # E.g., abstract catchall region in a document + HEADER = "header" + H1 = "h1" + H2 = "h2" + H3 = "h3" + H4 = "h4" + H5 = "h5" + LINE = "line" + TITLE = "title" + SUBTITLE = "subtitle" + FOOTER = "footer" + PARAGRAPH = "paragraph" + ORDERED_LIST = "ordered-list" + UNORDERED_LIST = "unordered-list" + LIST_ITEM = "list-item" + LINK = "link" + CAPTION = "caption" + IMAGE = "image" + BLOCK_QUOTE = "block-quote" + BLOCK_CODE = "block-code" + UNKNOWN = "unknown" + SENTENCE = "sentence" + TOKEN = "token" # noqa: S105 + SPAN = "span" + DIV = "div" + PRE = "pre" + STRONG = "strong" + EMPHASIZED = "emphasized" + UNDERLINED = "underlined" + TELETYPE = "teletype" + ARTICLE = "article" + MAIN = "main" + CHAPTER = "chapter" + TEXT = "text" + + @staticmethod + def from_html_tag(tagname: Optional[str]) -> Optional["DocTag"]: # noqa: C901 + if tagname is None: + return None + + name = tagname.lower().strip() + + if name == "p": + return DocTag.PARAGRAPH + elif name == "h1": + return DocTag.H1 + elif name == "h2": + return DocTag.H2 + elif name == "h3": + return DocTag.H3 + elif name == "h4": + return DocTag.H4 + elif name == "h5": + return DocTag.H5 + elif name == "ul": + return DocTag.UNORDERED_LIST + elif name == "ol": + return DocTag.ORDERED_LIST + elif name == "li": + return DocTag.LIST_ITEM + elif name == "a": + return DocTag.LINK + elif name == "div": + return DocTag.DIV + elif name == "img": + return DocTag.IMAGE + elif name == "span": + return DocTag.SPAN + elif name == "pre": + return DocTag.PRE + elif name == "code": + return DocTag.BLOCK_CODE + elif name == "blockquote": + return DocTag.BLOCK_QUOTE + elif name == "strong": + return DocTag.STRONG + elif name == "b": + return DocTag.STRONG + elif name == "emph": + return DocTag.EMPHASIZED + elif name == "i": + return DocTag.EMPHASIZED + elif name == "u": + return DocTag.UNDERLINED + elif name == "tt": + return DocTag.TELETYPE + elif name == "article": + return DocTag.ARTICLE + elif name == "header": + return DocTag.HEADER + elif name == "footer": + return DocTag.FOOTER + elif name == "main": + return DocTag.MAIN + + return None + + +class TokenTag(str, Enum): + """A set of `name` constants for Tags with a `kind` of `TagKind.token`; appropriate for parsing-level ideas.""" + + TEXT_WITH_WHITESPACE = "text-with-whitespace" + TEXT = "text" + WHITESPACE = "whitespace" + HEAD = "head" + LEFT_EDGE = "left-edge" + RIGHT_EDGE = "right-edge" + ENTITY_TYPE = "entity-type" + ENTITY_IOB = "entity-iob" + LEMMA = "lemma" + NORMALIZED = "normalized" + SHAPE = "shape" + PREFIX = "prefix" + SUFFIX = "suffix" + IS_ALPHA = "is-alpha" + IS_ASCII = "is-ascii" + IS_DIGIT = "is-digit" + IS_TITLE = "is-title" + IS_PUNCT = "is-punct" + IS_LEFT_PUNCT = "is-left-punct" + IS_RIGHT_PUNCT = "is-right-punct" + IS_SPACE = "is-space" + IS_BRACKET = "is-bracket" + IS_QUOTE = "is-quote" + IS_CURRENCY = "is-currency" + LIKE_URL = "like-url" + LIKE_NUM = "like-num" + LIKE_EMAIL = "like-email" + IS_OUT_OF_VOCABULARY = "is-out-of-vocabulary" + IS_STOPWORD = "is-stopword" + LANGUAGE = "language" + + +class TagValueKey(str, Enum): + """A set of key constants for the `value` object within a tag.""" + + # Catch-all for confidence, score, ranking + SCORE = "score" + + # Catch-all for values of different types such as integers, floats, booleans, and strings + VALUE = "value" + + # An array of floats or integers + VECTOR_VALUE = "vector-value" + + # A float or integer + NUMBER_VALUE = "number-value" + + # A bool + BOOL_VALUE = "bool-value" + + # A string + STRING_VALUE = "string-value" + + # Whether some annotation is direct ("Susan said 'Hi'") + DIRECT = "direct" + + # Start time of a region of a document, in some other medium (seconds) + START_TIME_S = "start-time-s" + + # End time of a region of a document, in some other medium (seconds) + END_TIME_S = "end-time-s" + + # The normalized name of an entity + ENTITY_NAME = "entity_name" + + # Timestamp. Can be used to provide a time-based sort-ordering for tags. + TIMESTAMP_VALUE = "timestamp-value" + + +class GenerationTag(str, Enum): + """A set of `name` constants for Tags with a `kind` of `TagKind.generation`.""" + + # A generated summary of some region of a document + SUMMARY = "summary" + + # A generated headline for some region of a document + HEADLINE = "headline" + + # A generated "micro summary" of some region of a document + GIST = "gist" + + # A generated completion using some region of the document as input + PROMPT_COMPLETION = "prompt-completion" + + +class ProvenanceTag(str, Enum): + """A set of `name` constants for Tags with a `kind` of `TagKind.provenance`.""" + + # The speaker of a section of a document + SPEAKER = "speaker" + + # The URL from which some section of a document was sourced + URL = "url" + + # The File from which some section of a document was sourced + FILE = "file" + + +class RoleTag(str, Enum): + """A set of `name` constants for Tags with a `kind` of `TagKind.ROLE`.""" + + # This block's content was created by the System; likely instructional text on how to respond + SYSTEM = "system" + + # This block's content was created by an end user + USER = "user" + + # This block's content was created by the generative AI assistant + ASSISTANT = "assistant" diff --git a/steamship/data/user.py b/steamship/data/user.py new file mode 100644 index 0000000000000000000000000000000000000000..6b889ff61e45bebe37d8fc3756f9c6f3880382de --- /dev/null +++ b/steamship/data/user.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Any, Type + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel + + +class User(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj = obj["user"] if "user" in obj else obj + return super().parse_obj(obj) + + @staticmethod + def current(client: Client) -> User: + return client.get("account/current", expect=User) diff --git a/steamship/data/workspace.py b/steamship/data/workspace.py new file mode 100644 index 0000000000000000000000000000000000000000..e1ade108090a0f303567a316ad20246611e95f95 --- /dev/null +++ b/steamship/data/workspace.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import logging +from enum import Enum +from typing import Any, List, Optional, Type + +from pydantic import BaseModel, Field + +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.base.request import GetRequest, IdentifierRequest +from steamship.base.request import Request +from steamship.base.request import Request as SteamshipRequest +from steamship.base.response import Response +from steamship.base.response import Response as SteamshipResponse + + +class ListWorkspacesRequest(Request): + pass + + +class ListWorkspacesResponse(Response): + workspaces: List[Workspace] + + +class Workspace(CamelModel): + client: Client = Field(None, exclude=True) + id: str = None + handle: str = None + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side\ + obj = obj["workspace"] if "workspace" in obj else obj + return super().parse_obj(obj) + + class CreateRequest(SteamshipRequest): + id: Optional[str] = None + handle: Optional[str] = None + fetch_if_exists: Optional[bool] = None + external_id: Optional[str] = None + external_type: Optional[str] = None + metadata: Optional[str] = None + + def delete(self) -> Workspace: + return self.client.post("workspace/delete", IdentifierRequest(id=self.id), expect=Workspace) + + @staticmethod + def get( + client: Client, id_: str = None, handle: str = None, fetch_if_exists: bool = None + ) -> Workspace: + req = GetRequest(id=id_, handle=handle, fetch_if_exists=fetch_if_exists) + return client.post("workspace/get", req, expect=Workspace) + + @staticmethod + def create( + client: Client, + handle: Optional[str] = None, + external_id: Optional[str] = None, + external_type: Optional[str] = None, + metadata: Any = None, + fetch_if_exists: bool = True, + ) -> Workspace: + req = Workspace.CreateRequest( + handle=handle, + fetch_if_exists=fetch_if_exists, + external_id=external_id, + external_type=external_type, + metadata=metadata, + ) + return client.post("workspace/create", req, expect=Workspace) + + def create_signed_url(self, request: SignedUrl.Request) -> SignedUrl.Response: + logging.info(f"Requesting signed URL: {request}") + ret = self.client.post( + "workspace/createSignedUrl", payload=request, expect=SignedUrl.Response + ) + logging.debug(f"Got signed URL: {ret}") + return ret + + @staticmethod + def list(client: Client, t: str = None) -> ListWorkspacesResponse: + return client.post( + "workspace/list", + ListWorkspacesRequest(type=t), + expect=ListWorkspacesResponse, + ) + + +class SignedUrl: + class Bucket(str, Enum): + EXPORTS = "exports" + IMPORTS = "imports" + USER_DATA = "userData" + PLUGIN_DATA = "pluginData" + APP_DATA = "appData" + + class Operation(str, Enum): + READ = "Read" + WRITE = "Write" + + class Request(SteamshipRequest): + bucket: SignedUrl.Bucket + filepath: str + operation: SignedUrl.Operation + expires_in_minutes: int = None + + class Response(SteamshipResponse): + bucket: str = None + filepath: str = None + operation: str = None + expires_in_minutes: int = None + signed_url: str = Field(None, alias="signedUrl") + + +SignedUrl.Request.update_forward_refs() +ListWorkspacesResponse.update_forward_refs() diff --git a/steamship/invocable/__init__.py b/steamship/invocable/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f531045d1157e02821ac11e1096fe818bd669bb0 --- /dev/null +++ b/steamship/invocable/__init__.py @@ -0,0 +1,24 @@ +from .config import Config +from .invocable import Invocable, get, post +from .invocable_request import InvocableRequest, Invocation, InvocationContext, LoggingConfig +from .invocable_response import InvocableResponse +from .lambda_handler import create_handler, safe_handler +from .package_service import PackageService +from .paramater_types import fileurl, longstr + +__all__ = [ + "Invocable", + "create_handler", + "Config", + "Invocation", + "InvocableRequest", + "InvocableResponse", + "get", + "post", + "InvocationContext", + "LoggingConfig", + "PackageService", + "safe_handler", + "longstr", + "fileurl", +] diff --git a/steamship/invocable/__pycache__/__init__.cpython-39.pyc b/steamship/invocable/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..416a3d7aaece33842e26e0d5f290c1c5fe7da0e0 Binary files /dev/null and b/steamship/invocable/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/config.cpython-39.pyc b/steamship/invocable/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..592e478de37c9c85d3d08497305583e027e8e898 Binary files /dev/null and b/steamship/invocable/__pycache__/config.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/entrypoint.cpython-39.pyc b/steamship/invocable/__pycache__/entrypoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5506bd9f81d3a3e862914254f3ab3e4dc14d78c5 Binary files /dev/null and b/steamship/invocable/__pycache__/entrypoint.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/invocable.cpython-39.pyc b/steamship/invocable/__pycache__/invocable.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8305b6e2277b37efea5190fe42663fea3654d476 Binary files /dev/null and b/steamship/invocable/__pycache__/invocable.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/invocable_request.cpython-39.pyc b/steamship/invocable/__pycache__/invocable_request.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9aa180fe71484173905a3fd45a09cf59fab5cc49 Binary files /dev/null and b/steamship/invocable/__pycache__/invocable_request.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/invocable_response.cpython-39.pyc b/steamship/invocable/__pycache__/invocable_response.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fb4d64c33e44833e69649ec94dfdb39d462099f Binary files /dev/null and b/steamship/invocable/__pycache__/invocable_response.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/lambda_handler.cpython-39.pyc b/steamship/invocable/__pycache__/lambda_handler.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9996f260e82469396c350a784a90c409e4a7aab1 Binary files /dev/null and b/steamship/invocable/__pycache__/lambda_handler.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/package_service.cpython-39.pyc b/steamship/invocable/__pycache__/package_service.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba99dea56004293b9146aa4c22f7c6bd1e52f323 Binary files /dev/null and b/steamship/invocable/__pycache__/package_service.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/paramater_types.cpython-39.pyc b/steamship/invocable/__pycache__/paramater_types.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f363e20669e3bd5a9b092e722b4f105fdbfcafe Binary files /dev/null and b/steamship/invocable/__pycache__/paramater_types.cpython-39.pyc differ diff --git a/steamship/invocable/__pycache__/plugin_service.cpython-39.pyc b/steamship/invocable/__pycache__/plugin_service.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fecdd3caff8298383b538242fb715efc72a4243a Binary files /dev/null and b/steamship/invocable/__pycache__/plugin_service.cpython-39.pyc differ diff --git a/steamship/invocable/config.py b/steamship/invocable/config.py new file mode 100644 index 0000000000000000000000000000000000000000..d6dc12a4b53a9dffcbe0c1c32b12c5c65eb6a218 --- /dev/null +++ b/steamship/invocable/config.py @@ -0,0 +1,64 @@ +import json +from enum import Enum +from pathlib import Path +from typing import Dict + +from steamship.base.error import SteamshipError +from steamship.base.model import CamelModel +from steamship.data.manifest import ConfigParameter, ConfigParameterType + + +class Config(CamelModel): + """Base class Steamship Package and Plugin configuration objects.""" + + def __init__(self, **kwargs): + kwargs = {k: v for k, v in kwargs.items() if v is not None} + super().__init__(**kwargs) + + def extend_with_dict(self, d: dict, overwrite: bool = False): + """Sets the attributes on this object with provided keys and values.""" + for key, val in (d or {}).items(): + if hasattr(self, key) and (overwrite or getattr(self, key) is None): + setattr(self, key, val) + + def extend_with_json_file( + self, path: Path, overwrite: bool = False, fail_on_missing_file: bool = True + ): + """Extends this config object's values with a JSON file from disk. + + This is useful for applying late-bound defaults, such as API keys added to a deployment bundle.""" + if not path.exists(): + if fail_on_missing_file: + raise SteamshipError( + message=f"Attempted to extend Config object with {path}, but the file was not found." + ) + return + + with open(path) as f: + data = json.load(f) + if not isinstance(data, dict): + raise SteamshipError( + message=f"Attempted to extend Config object with {path}, but the file did not contain a JSON `dict` object." + ) + self.extend_with_dict(data, overwrite) + + @staticmethod + def strip_enum(default_value): + if issubclass(type(default_value), Enum): + return default_value.value + else: + return default_value + + @classmethod + def get_config_parameters(cls) -> Dict[str, ConfigParameter]: + result = {} + for field_name, field in cls.__fields__.items(): + description = field.field_info.description + type_ = ConfigParameterType.from_python_type(field.type_) + result[field_name] = ConfigParameter( + type=type_, + default=cls.strip_enum(field.default), + description=description, + ) + + return result diff --git a/steamship/invocable/entrypoint.py b/steamship/invocable/entrypoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b7be51ec7f0172a3aab065e596b9c4782e06080a --- /dev/null +++ b/steamship/invocable/entrypoint.py @@ -0,0 +1,19 @@ +""" +This class is necessary to be able to please the entrypoints of both localstack and AWS. + +If we set the entrypoint directly to steamship.invocable.safe_handler (imported in the init +from lambda_handler), AWS is happy, but localstack is not because it tries to read steamship.invocable as a py file, +not a module. + +If we set the entrypoint to steamship.invocable.lambda_handler.safe_handler, Localstack is happy, but AWS +is not, because it tries to read lambda_handler first, which imports things from steamship.invocable, which imports +things from lambda_handler. + +By adding this file which basically no-ops safe_handler into steamship.invocable.entrypoint.safe_handler, both are +happy. + +""" + +from steamship.invocable import safe_handler + +_ = safe_handler # No op line so that my "unused" import does not get removed. diff --git a/steamship/invocable/invocable.py b/steamship/invocable/invocable.py new file mode 100644 index 0000000000000000000000000000000000000000..757ea1c0a19b4a5c50c238f53e0a467d055c5f92 --- /dev/null +++ b/steamship/invocable/invocable.py @@ -0,0 +1,264 @@ +"""Please see https://docs.steamship.com/ for information about building a Steamship Package""" +import inspect +import logging +import pathlib +import time +from abc import ABC +from collections import defaultdict +from functools import wraps +from http import HTTPStatus +from typing import Any, Dict, Optional, Type, Union + +import toml + +from steamship.base.package_spec import MethodSpec, PackageSpec +from steamship.client.steamship import Steamship +from steamship.invocable import Config +from steamship.invocable.config import ConfigParameter +from steamship.invocable.invocable_request import InvocableRequest, InvocationContext +from steamship.invocable.invocable_response import InvocableResponse +from steamship.utils.url import Verb + + +def make_registering_decorator(decorator): + """ + Returns a copy of foreignDecorator, which is identical in every + way(*), except also appends a .decorator property to the callable it + spits out. + + (*)We can be somewhat "hygienic", but newDecorator still isn't signature-preserving, + i.e. you will not be able to get a runtime list of parameters. + For that, you need hackish libraries...but in this case, the only argument is func, so it's not a big issue + """ + + def new_decorator(func): + # Call to newDecorator(method) + # Exactly like old decorator, but output keeps track of what decorated it + output = decorator( + func + ) # apply foreignDecorator, like call to foreignDecorator(method) would have done + output.decorator = new_decorator # keep track of decorator + # R.original = func # might as well keep track of everything! + return output + + new_decorator.__name__ = decorator.__name__ + new_decorator.__doc__ = decorator.__doc__ + new_decorator.__is_endpoint__ = True + return new_decorator + + +# https://stackoverflow.com/questions/2366713/can-a-decorator-of-an-instance-method-access-the-class +# noinspection PyUnusedLocal +def endpoint(verb: str = None, path: str = None, **kwargs): + """By using ``kwargs`` we can tag the function with Any parameters.""" # noqa: RST210 + + def decorator(function): + # This is used in conjunction with the __init_subclass__ code! + # Otherwise the __name__ won't be correct in maybeDecorated.__name__! + # noinspection PyShadowingNames + @wraps(function) + def wrap(self, *args, **kwargs): + return function(self, *args, **kwargs) + + # Build a dictionary of String->Primitive Types to pass back with endpoint + # This enables the Engine to add support for features like public=True, etc, without the Client changing. + config: Dict[str, Union[str, bool, int, float]] = {} + for key, val in kwargs.items(): + if isinstance(val, (str, bool, int, float)): + config[key] = val + + wrap.__path__ = path + wrap.__verb__ = verb + wrap.__endpoint_config__ = config + + return wrap + + decorator = make_registering_decorator(decorator) + return decorator + + +def get(path: str, **kwargs): + return endpoint(verb=Verb.GET, path=path, **kwargs) + + +def post(path: str, **kwargs): + return endpoint(verb=Verb.POST, path=path, **kwargs) + + +class Invocable(ABC): + """A Steamship microservice. + + This model.py class: + + 1. Provide a pre-authenticated instance of the Steamship client + 2. Provides a Lambda handler that routes to registered functions + 3. Provides useful methods connecting functions to the router. + """ + + _method_mappings = defaultdict(dict) + _package_spec: PackageSpec + config: Config + context: InvocationContext + + def __init__( + self, + client: Steamship = None, + config: Dict[str, Any] = None, + context: InvocationContext = None, + ): + self.context = context + + try: + secret_kwargs = toml.load(".steamship/secrets.toml") + except FileNotFoundError: # Support local secret loading + try: + local_secrets_file = ( + pathlib.Path(inspect.getfile(type(self))).parent / ".steamship" / "secrets.toml" + ) + secret_kwargs = toml.load(str(local_secrets_file)) + except (TypeError, FileNotFoundError): + secret_kwargs = {} + + # The configuration for the Invocable is the union of: + # + # 1) The `secret_kwargs` dict, read in from .steamship/secrets.toml, if it exists, and + # 2) The `config` dict, provided upon instantiation. + # + # When invoked from within Steamship, the `config` dict is frozen, at the instance level, upon instance + # creation. All subsequent method invocations reuse that frozen config. + config = { + **secret_kwargs, + **{k: v for k, v in (config or {}).items() if v != ""}, + } + + # Finally, we set the config object to an instance of the class returned by `self.config_cls` + if config: + self.config = self.config_cls()(**config) + else: + self.config = self.config_cls()() + + self.client = client + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + + start_time = time.time() + cls._package_spec = PackageSpec(name=cls.__name__, doc=cls.__doc__, methods=[]) + cls._method_mappings = defaultdict(dict) + base_fn_list = [ + may_be_decorated + for base_cls in cls.__bases__ + for may_be_decorated in base_cls.__dict__.values() + ] + for attribute in base_fn_list + list(cls.__dict__.values()): + decorator = getattr(attribute, "decorator", None) + if decorator: + if getattr(decorator, "__is_endpoint__", False): + path = getattr(attribute, "__path__", None) + verb = getattr(attribute, "__verb__", None) + config = getattr(attribute, "__endpoint_config__", {}) + method_spec = cls._register_mapping( + name=attribute.__name__, verb=verb, path=path, config=config + ) + cls._package_spec.methods.append(method_spec) + + # Add the HTTP GET /__dir__ method which returns a serialization of the PackageSpec. + # Wired up to both GET and POST for convenience (since POST is the default from the Python client, but + # GET is the default if using from a browser). + cls._register_mapping(name="__steamship_dir__", verb=Verb.GET, path="/__dir__") + cls._register_mapping(name="__steamship_dir__", verb=Verb.POST, path="/__dir__") + end_time = time.time() + logging.info(f"Registered package functions in {end_time - start_time} seconds.") + + def __steamship_dir__(self) -> dict: + """Return this Invocable's PackageSpec for remote inspection -- e.g. documentation or OpenAPI generation.""" + return self._package_spec.dict() + + @classmethod + def config_cls(cls) -> Type[Config]: + """Returns the configuration object for the Invocable. + + By default, Steamship packages and plugins will not take any configuration. Steamship packages and plugins may + declare a configuration object which extends from Config, if needed, as follows: + + class MyPackageOrPlugin: + class MyConfig(Config): + ... + + @classmethod + def config_cls(cls): + return MyPackageOrPlugin.MyConfig + """ # noqa: RST301 + return Config + + @classmethod + def _register_mapping( + cls, + name: str, + verb: Optional[Verb] = None, + path: str = "", + config: Dict[str, Union[int, float, bool, str]] = None, + ) -> MethodSpec: + """Registering a mapping permits the method to be invoked via HTTP.""" + method_spec = MethodSpec(cls, name, path=path, verb=verb, config=config) + # It's important to use method_spec.path below since that's the CLEANED path. + cls._method_mappings[verb][method_spec.path] = name + logging.info(f"[{cls.__name__}] {verb} {path} => {name}") + return method_spec + + def __call__(self, request: InvocableRequest, context: Any = None) -> InvocableResponse: + """Invokes a method call if it is registered.""" + if not hasattr(self.__class__, "_method_mappings"): + logging.error("__call__: No mappings available on invocable.") + return InvocableResponse.error( + code=HTTPStatus.NOT_FOUND, message="No mappings available for invocable." + ) + + if request.invocation is None: + logging.error("__call__: No invocation on request.") + return InvocableResponse.error( + code=HTTPStatus.NOT_FOUND, message="No invocation was found." + ) + + verb = Verb(request.invocation.http_verb.strip().upper()) + path = request.invocation.invocation_path + + path = MethodSpec.clean_path(path) + + logging.info(f"[{verb}] {path}") + + method_mappings = self.__class__._method_mappings + + if verb not in method_mappings: + logging.error(f"__call__: Verb '{verb}' not found in method_mappings.") + return InvocableResponse.error( + code=HTTPStatus.NOT_FOUND, + message=f"No methods for verb {verb} available.", + ) + + if path not in method_mappings[verb]: + logging.error(f"__call__: Path '{path}' not found in method_mappings[{verb}].") + return InvocableResponse.error( + code=HTTPStatus.NOT_FOUND, + message=f"No handler for {verb} {path} available.", + ) + + method = method_mappings[verb][path] + if not (hasattr(self, method) and callable(getattr(self, method))): + logging.error( + f"__call__: Method not found or not callable for '{path}' in method_mappings[{verb}]." + ) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message=f"Handler for {verb} {path} not callable.", + ) + + arguments = request.invocation.arguments + if arguments is None: + return getattr(self, method)() + else: + return getattr(self, method)(**arguments) + + @classmethod + def get_config_parameters(cls) -> Dict[str, ConfigParameter]: + return cls.config_cls().get_config_parameters() diff --git a/steamship/invocable/invocable_request.py b/steamship/invocable/invocable_request.py new file mode 100644 index 0000000000000000000000000000000000000000..5564defc6522ee315a21aba3a60f4c0a3a619631 --- /dev/null +++ b/steamship/invocable/invocable_request.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import Any, Dict + +from steamship.base import Configuration +from steamship.base.model import CamelModel + + +class Invocation(CamelModel): + http_verb: str = None + invocation_path: str = None # e.g. /hello/there + arguments: Dict[str, Any] = None + config: Dict[str, Any] = None + + +class LoggingConfig(CamelModel): + logging_host: str = None + logging_port: str = None + + +class InvocationContext(CamelModel): + tenant_id: str = None + user_id: str = None + workspace_id: str = None + invocable_handle: str = None + invocable_version_handle: str = None + invocable_instance_handle: str = None + invocable_type: str = None + invocable_owner_id: str = None + + +class InvocableRequest(CamelModel): + """A request as the Steamship Hosting Framework receives it from the Engine. + + This class is different from the other `Request` class: + * `steamship.base.request` represents a request from the Steamship Client + * this class represents a request from the Steamship Engine to a Steamship-hosted App/Plugin + + It contains both a package/plugin invocation and also the client configuration in which that invocation + is intended to execute. + """ + + client_config: Configuration = None + invocation: Invocation = None + logging_config: LoggingConfig = None + invocation_context: InvocationContext = None diff --git a/steamship/invocable/invocable_response.py b/steamship/invocable/invocable_response.py new file mode 100644 index 0000000000000000000000000000000000000000..fad7d90c82bd15c51372267c44f88c8349c9d612 --- /dev/null +++ b/steamship/invocable/invocable_response.py @@ -0,0 +1,231 @@ +from __future__ import annotations + +import io +import json +import logging +from typing import Any, Dict, Generic, Optional, TypeVar, Union + +from pydantic import BaseModel +from pydantic.generics import GenericModel + +from steamship.base import MimeTypes, SteamshipError, Task, TaskState +from steamship.base.client import Client +from steamship.base.error import DEFAULT_ERROR_MESSAGE +from steamship.base.mime_types import ContentEncodings +from steamship.base.model import CamelModel +from steamship.utils.binary_utils import flexi_create + + +class Http(CamelModel): + status: int = None + # If true, we're signaling to the Steamship Proxy that the `data` field of the SteamshipResponse object + # has been wrapped in base64. In this situation, we can return the bytes within directly to the Proxy + # caller without interpreting it. + base64_wrapped: bool = None + headers: Dict[str, str] = None + + +T = TypeVar("T") + + +class InvocableResponse(GenericModel, Generic[T]): + """Mirrors the Response object in the Steamship server.""" + + data: T = None # Data for successful or synchronous requests. + status: Task = None # Reporting for errors and async status + http: Http = None # Additional HTTP information for Steamship Proxy (headers, etc) + + def __init__( + self, + status: Task = None, + error: SteamshipError = None, + http: Http = None, + data: Any = None, + string: str = None, + json: Any = None, + _bytes: Union[bytes, io.BytesIO] = None, + mime_type=None, + ): + super().__init__() + # Note: + # This function has to be very defensively coded since Any errors thrown here will not be returned + # to the end-user via our proxy (as this is the constructor for the response itself!) + if http is not None: + self.http = http + else: + self.http = Http(status=200, headers={}) + + try: + self.set_data(data=data, string=string, json=json, _bytes=_bytes, mime_type=mime_type) + except Exception as ex: + logging.error("Exception within Response.__init__.", exc_info=ex) + if error is not None: + if error.message: + error.message = f"{error.message}. Also found error - unable to serialize data to response. {ex}" + else: + error.message = f"Unable to serialize data to response. {ex}" + else: + error = SteamshipError(message=f"Unable to serialize data to response. {ex}") + logging.error(error, exc_info=error) + + # Handle the task provided + if status is None: + self.status = Task() + elif isinstance(status, Task): + self.status = status + else: + self.status = Task() + self.status.state = TaskState.failed + self.status.status_message = ( + f"Status field of response should be of type Task. " + f"Instead was of type {type(status)} and had value {status}." + ) + + if error: + self.status.state = TaskState.failed + self.status.status_message = error.message + self.status.status_suggestion = error.suggestion + self.status.status_code = error.code + logging.error( + "steamship.invocable.response - Response created with error.", exc_info=error + ) + else: + if self.status.state is None: + self.status.state = TaskState.succeeded + + def set_data( + self, + data: Any = None, + string: str = None, + json: Any = None, + _bytes: Union[bytes, io.BytesIO] = None, + mime_type=None, + ): + data, mime_type, encoding = flexi_create( + data=data, string=string, json=json, _bytes=_bytes, mime_type=mime_type + ) + + self.data = data + + self.http.headers = self.http.headers or {} + self.http.headers["Content-Type"] = mime_type or MimeTypes.BINARY + + if encoding == ContentEncodings.BASE64: + self.http.base64_wrapped = True + + @staticmethod + def error( + code: int, + message: Optional[str] = None, + error: Optional[SteamshipError] = None, + exception: Optional[Exception] = None, + prefix: Optional[str] = None, + ) -> InvocableResponse[T]: + """Merges a number of error channels into one unified Response object. + + Aggregates all possible messages into a single " | "-delimeted error message. + + If the final resulting error message is non-null, prefixes with the provided `prefix` + """ + # Use or create the return error + error = error or SteamshipError() + + messages = [] + if error.message != DEFAULT_ERROR_MESSAGE: + messages.append(error.message) + + # Set or append the additional message + if message is not None and message not in messages: + messages.append(message) + + # Set or append the exception + if exception is not None: + exception_str = f"{exception}" + if exception_str not in messages: + messages.append(exception_str) + + messages = [m.strip() for m in messages if m is not None and len(m.strip())] + if len(messages) > 0: + error.message = " | ".join(messages) + + # Finally, add the prefix if requested. + if prefix and error.message: + error.message = f"{prefix}{error.message}" + + return InvocableResponse(error=error, http=Http(status=code)) + + @staticmethod + def from_obj(obj: Any) -> InvocableResponse: # noqa: C901 + if obj is None: + return InvocableResponse.error(500, "Handler provided no response.") + + if isinstance(obj, InvocableResponse): + return obj + elif isinstance(obj, SteamshipError): + return InvocableResponse.error(500, error=obj) + elif isinstance(obj, Exception): + return InvocableResponse.error(500, error=SteamshipError(error=obj)) + elif isinstance(obj, io.BytesIO): + return InvocableResponse(_bytes=obj) + elif isinstance(obj, dict): + return InvocableResponse(json=obj) + elif isinstance(obj, list): + return InvocableResponse(json=obj) + elif isinstance(obj, str): + return InvocableResponse(string=obj) + elif isinstance(obj, (float, int, bool)): + return InvocableResponse(json=obj) + elif isinstance(obj, CamelModel): + return InvocableResponse(json=obj.dict(by_alias=True)) + elif isinstance(obj, BaseModel): + return InvocableResponse(json=obj.dict()) + + return InvocableResponse.error( + 500, message=f"Handler provided unknown response type: {type(obj)}" + ) + + def post_update(self, client: Client): + """Pushes this response object to the corresponding Task on the Steamship Engine. + + Typically apps and plugins return their results to the Engine synchronously via HTTP. + But sometimes that's not practice -- for example: + + - Microsoft's OCR endpoint returns a Job Token that can be exchanged for updates, and eventually a result. + - Google's AutoML can take 20-30 minutes to train. + - Fine-tuning BERT on ECS can take an arbitrarily long amount of time. + + In these cases, it can be useful for the package/plugin to occasionally post updates to the Engine outside + of the Engine's initial synchronous request-response conversation. + """ + if self.status is None or self.status.task_id is None: + raise SteamshipError( + message="An App/Plugin response can only be pushed to the Steamship Engine if " + + "it is associated with a Task. Please set the `status.task_id` field." + ) + if client is None: + raise SteamshipError( + message="Unable to push Response to Steamship: Associated client is None" + ) + + # Create a task object + task = Task(client=client, task_id=self.status.task_id) + update_fields = set() + + if self.status.state is not None: + task.state = self.status.state + update_fields.add("state") + + if self.status.status_message is not None: + task.status_message = self.status.status_message + update_fields.add("status_message") + + if self.status.status_suggestion is not None: + task.status_suggestion = self.status.status_suggestion + update_fields.add("status_suggestion") + + if self.data is not None: + # This object itself should always be the output of the Training Task object. + task.output = json.dumps(self.data) + update_fields.add("output") + + task.post_update(fields=update_fields) diff --git a/steamship/invocable/lambda_handler.py b/steamship/invocable/lambda_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..86f4d5e48bf49cbe90f9ac0992f7fa28cdc50e1f --- /dev/null +++ b/steamship/invocable/lambda_handler.py @@ -0,0 +1,318 @@ +import importlib +import inspect +import json +import logging +import sys +import traceback +import uuid +from http import HTTPStatus +from os import environ +from typing import Callable, Dict, Type + +from fluent import asynchandler as fluenthandler +from fluent.handler import FluentRecordFormatter + +from steamship import Configuration +from steamship.base import SteamshipError +from steamship.client import Steamship +from steamship.data.workspace import SignedUrl +from steamship.invocable import Invocable, InvocableRequest, InvocableResponse, InvocationContext +from steamship.utils.signed_urls import upload_to_signed_url + + +def encode_exception(obj): + """When logging an exception ex: logging.exception(some_error), the exception must be turned into a string + so that it is accepted by elasticsearch""" + if isinstance(obj, SteamshipError): + return json.dumps(obj.to_dict()) + if isinstance(obj, Exception): + return f"exception_class: {type(obj).__name__}, args: {obj.args}" + return obj + + +def internal_handler( # noqa: C901 + invocable_cls_func: Callable[[], Type[Invocable]], + event: Dict, + client: Steamship, + invocation_context: InvocationContext, +) -> InvocableResponse: + + try: + request = InvocableRequest.parse_obj(event) + except SteamshipError as se: + logging.exception(se) + return InvocableResponse.from_obj(se) + except Exception as ex: + logging.exception(ex) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler was unable to parse inbound request.", + exception=ex, + ) + + if request and request.invocation: + error_prefix = ( + f"[ERROR - {request.invocation.http_verb} {request.invocation.invocation_path}] " + ) + else: + error_prefix = "[ERROR - ?VERB ?PATH] " + + if request.invocation.invocation_path == "/__dir__": + # Return the DIR result without (1) Constructing invocable_cls or (2) Parsing its config (in the constructor) + try: + cls = invocable_cls_func() + return InvocableResponse(json=cls.__steamship_dir__(cls)) + except SteamshipError as se: + logging.exception(se) + return InvocableResponse.from_obj(se) + except Exception as ex: + logging.exception(ex) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + prefix=error_prefix, + message="Unable to initialize package/plugin.", + exception=ex, + ) + + try: + invocable = invocable_cls_func()( + client=client, config=request.invocation.config, context=invocation_context + ) + except SteamshipError as se: + logging.exception(se) + return InvocableResponse.from_obj(se) + except Exception as ex: + logging.exception(ex) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + prefix=error_prefix, + message="Unable to initialize package/plugin.", + exception=ex, + ) + + if not invocable: + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + prefix=error_prefix, + message="Unable to construct package/plugin for invocation.", + ) + + try: + response = invocable(request) + return InvocableResponse.from_obj(response) + except SteamshipError as se: + logging.exception(se) + se.message = f"{error_prefix}{se.message}" + return InvocableResponse.from_obj(se) + except Exception as ex: + logging.exception(ex) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + prefix=error_prefix, + exception=ex, + ) + + +def handler(internal_handler, event: Dict, _: Dict = None) -> dict: # noqa: C901 + logging_config = event.get("loggingConfig") + + if logging_config is None: + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler did not receive a remote logging config.", + ).dict(by_alias=True) + + logging_host = logging_config.get("loggingHost") + logging_port = logging_config.get("loggingPort") + + logging.basicConfig(level=logging.INFO) + logging_handler = None + + invocation_context_dict = event.get("invocationContext") + if invocation_context_dict is None: + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler did not receive an invocation context.", + ).dict(by_alias=True) + + invocation_context = InvocationContext.parse_obj(invocation_context_dict) + # These log statements intentionally go to the logging handler pre-remote attachment, to debug logging configuration issues + logging.info(f"Logging host: {logging_host} Logging port: {logging_port}") + logging.info(f"Invocation context: {invocation_context}") + + if ( + logging_host != "none" + ): # Key off the string none, not 'is None', to avoid config errors where remote host isn't passed + # Configure remote logging + if logging_host is None: + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler did receive a remote logging config, but it did not include a loggingHost.", + ).dict(by_alias=True) + + if logging_port is None: + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler did receive a remote logging config, but it did not include a loggingPort.", + ).dict(by_alias=True) + + custom_format = { + "level": "%(levelname)s", + "host": "%(hostname)s", + "where": "%(module)s.%(filename)s.%(funcName)s:%(lineno)s", + "type": "%(levelname)s", + "stack_trace": "%(exc_text)s", + "component": "package-plugin-lambda", + "userId": invocation_context.user_id, + "workspaceId": invocation_context.workspace_id, + "tenantId": invocation_context.tenant_id, + "invocableHandle": invocation_context.invocable_handle, + "invocableVersionHandle": invocation_context.invocable_version_handle, + "invocableInstanceHandle": invocation_context.invocable_instance_handle, + "invocableType": invocation_context.invocable_type, + "invocableOwnerId": invocation_context.invocable_owner_id, + "path": event.get("invocation", {}).get("invocationPath"), + } + + # At the point in the code, the root log level seems to default to WARNING unless set to INFO, even with + # the BasicConfig setting to INFO above. + logging.root.setLevel(logging.INFO) + + logging_handler = fluenthandler.FluentHandler( + "steamship.deployed_lambda", + host=logging_host, + port=logging_port, + nanosecond_precision=True, + msgpack_kwargs={"default": encode_exception}, + ) + + # Without explicit instruction, the fluent handler defaults to UNSET. We want to make sure it is INFO. + logging_handler.setLevel(logging.INFO) + + formatter = FluentRecordFormatter(custom_format) + logging_handler.setFormatter(formatter) + # The below should make it so calls to logging.info etc are also routed to the remote logger + logging.root.addHandler(logging_handler) + + try: + # Config will accept `workspace_id` as passed from the Steamship Engine, whereas the `Steamship` + # class itself is limited to accepting `workspace` (`config.workspace_handle`) since that is the manner + # of interaction ideal for developers. + config = Configuration(**event.get("clientConfig", {})) + client = Steamship(config=config, trust_workspace_config=True) + except SteamshipError as se: + logging.exception(se) + return InvocableResponse.from_obj(se).dict(by_alias=True) + except Exception as ex: + logging.exception(ex) + return InvocableResponse.error( + code=HTTPStatus.INTERNAL_SERVER_ERROR, + message="Plugin/App handler was unable to create Steamship client.", + exception=ex, + ).dict(by_alias=True) + logging.info(f"Localstack hostname: {environ.get('LOCALSTACK_HOSTNAME')}.") + response = internal_handler(event, client, invocation_context) + + result = response.dict(by_alias=True, exclude={"client"}) + # When created with data > 4MB, data is uploaded to a bucket. + # This is a very ugly way to get the deep size of this object + data = json.dumps(result.get("data", None)).encode("UTF-8") + data_size = sys.getsizeof(data) + logging.info(f"Response data size {data_size}") + if data_size > 4e6 and invocation_context.invocable_type == "plugin": + logging.info("Response data size >4MB, must upload to bucket") + + filepath = str(uuid.uuid4()) + signed_url = ( + client.get_workspace() + .create_signed_url( + SignedUrl.Request( + bucket=SignedUrl.Bucket.PLUGIN_DATA, + filepath=filepath, + operation=SignedUrl.Operation.WRITE, + ) + ) + .signed_url + ) + + logging.info(f"Got signed url for writing: {signed_url}") + + upload_to_signed_url(signed_url, data) + + # Now remove raw data and replace with bucket + del result["data"] + result["dataBucket"] = SignedUrl.Bucket.PLUGIN_DATA.value + result["dataFilepath"] = filepath + + if logging_handler is not None: + logging_handler.close() + + return result + + +def create_handler(invocable_cls: Type[Invocable]): + """Deprecated wrapper function for a Steamship invocable within an AWS Lambda function. Called by code within a + plugin or package. + """ + logging.warning( + "Creating deprecated (unsafe imports) create_handler. This is no longer necessary. Please remove handler = create_handler(...) from your package or plugin." + ) + + def deprecated_handler(event, context=None): + logging.error( + "Calling deprecated (unsafe imports) create_handler. This indicates use of newer SDK against an older platform version." + ) + + return deprecated_handler + + +def safely_find_invocable_class() -> Type[Invocable]: + """ + Safely find the invocable class within invocable code. + """ + try: + module = importlib.import_module("api") + return get_class_from_module(module) + except Exception as e: + logging.exception(e) + raise SteamshipError( + message=f"There was an error loading the main file (it must be named api.py):\n{traceback.format_exc()}", + error=e, + ) + + +def get_class_from_module(module) -> Type[Invocable]: + invocable_classes = [] + for element in [getattr(module, x) for x in dir(module)]: + if inspect.isclass(element): + # Using names and not issubclass(element, Invocable) because latter was returning false? + superclass_names = [c.__name__ for c in inspect.getmro(element)] + if "Invocable" in superclass_names and element.__module__ == "api": + invocable_classes.append(element) + if len(invocable_classes) == 0: + raise SteamshipError( + message="Could not find package or plugin class in api.py. Define your package or plugin by subclassing from PluginService or PackageService." + ) + if len(invocable_classes) > 1: + raise SteamshipError( + message=f"Found too many invocable classes {invocable_classes} in api.py. Only one is supported." + ) + invocable_class = invocable_classes[0] + logging.info(f"Safely loaded main class: {invocable_class.__name__}") + return invocable_class + + +def create_safe_handler(known_invocable_for_testing: Type[Invocable] = None): + if known_invocable_for_testing is not None: + invocable_getter = lambda: known_invocable_for_testing # noqa: E731 + else: + invocable_getter = safely_find_invocable_class + bound_internal_handler = lambda event, client, context: internal_handler( # noqa: E731 + invocable_getter, event, client, context + ) + return lambda event, context=None: handler(bound_internal_handler, event, context) + + +# safe_handler is the new handler entrypoint, allowing the import section of user-provided code to run in a +# context where we can trap errors. +safe_handler = create_safe_handler() diff --git a/steamship/invocable/package_service.py b/steamship/invocable/package_service.py new file mode 100644 index 0000000000000000000000000000000000000000..454713d6682a5166020233c5b53812e306ca3cb7 --- /dev/null +++ b/steamship/invocable/package_service.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import logging +from typing import Any, Dict, List + +from steamship import SteamshipError, Task +from steamship.invocable import Invocable + +# Note! +# ===== +# +# This the files in this package are for Package Implementors. +# If you are using the Steamship Client, you probably are looking for either steamship.client or steamship.data +# +from steamship.utils.url import Verb + + +class PackageService(Invocable): + """The Abstract Base Class of a Steamship Package. + + Packages may implement whatever methods they like. To expose these methods as invocable HTTP routes, + annotate the method with @get or @post and the route name. + + Package *implementations* are effectively stateless, though they will have stateful + + """ + + def invoke_later( + self, + method: str, + verb: Verb = Verb.POST, + wait_on_tasks: List[Task] = None, + arguments: Dict[str, Any] = None, + ) -> Task[Any]: + """Schedule a method for future invocation. + + Parameters + ---------- + method: str + The method to invoke, as registered with Steamship in the @get or @post decorator. + verb: Verb + The HTTP Verb to use. Default is POST. + wait_on_tasks: List[Task] + A list of Task objects (or task IDs) that should be waited upon before invocation. + arguments: Dict[str, Any] + The keyword arguments of the invoked method + + Returns + ------- + Task[Any] + A Task representing the future work + """ + + if self.context is None: + raise SteamshipError( + message="Unable to call invoke_later because the InvocationContext was None" + ) + if self.context.invocable_instance_handle is None: + raise SteamshipError( + message="Unable to call invoke_later because the invocable_instance_handle on InvocationContext was None" + ) + + payload = { + "instanceHandle": self.context.invocable_instance_handle, + "payload": { + "httpVerb": verb.value, + "invocationPath": method, + "arguments": arguments or {}, + }, + } + operation = "package/instance/invoke" + + logging.info( + f"Scheduling {verb} {method} for future invocation on me ({self.context.invocable_handle})" + ) + + resp = self.client.post( + operation, + payload, + expect=Task[Task], # This operation should return a task + as_background_task=True, # This operation should always be asynchronous + wait_on_tasks=wait_on_tasks, # This operation might await other tasks first + ) + return resp diff --git a/steamship/invocable/paramater_types.py b/steamship/invocable/paramater_types.py new file mode 100644 index 0000000000000000000000000000000000000000..546951f9a039baf5fa35b6a8b81aaa0a569f9c98 --- /dev/null +++ b/steamship/invocable/paramater_types.py @@ -0,0 +1,10 @@ +class longstr(str): # noqa: N801 + """Long string functions mostly as a type annotation for the web.""" + + pass + + +class fileurl(str): # noqa: N801 + """Type alias that, if used in a package method argument, will cause a file upload widget to appear.""" + + pass diff --git a/steamship/invocable/plugin_service.py b/steamship/invocable/plugin_service.py new file mode 100644 index 0000000000000000000000000000000000000000..3f5a67a377cf358cec51a1fb769807522163db85 --- /dev/null +++ b/steamship/invocable/plugin_service.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import Generic, Type, TypeVar, Union + +# Note! +# ===== +# +# This the files in this package are for Plugin Implementors. +# If you are using the Steamship Client, you probably are looking for either steamship.client or steamship.data +# +from steamship.invocable import Invocable, InvocableResponse +from steamship.plugin.inputs.train_plugin_input import TrainPluginInput +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput +from steamship.plugin.outputs.train_plugin_output import TrainPluginOutput +from steamship.plugin.outputs.training_parameter_plugin_output import TrainingParameterPluginOutput +from steamship.plugin.request import PluginRequest +from steamship.plugin.trainable_model import TrainableModel + +IN = TypeVar("IN") +OUT = TypeVar("OUT") + + +class PluginService(Invocable, Generic[IN, OUT], ABC): + """The Abstract Base Class of a Steamship Plugin. + + All Steamship Plugins implement the operation: + + - run(PluginRequest[T]) -> Response[U] + + Many plugins are effectively stateless. This run operation defines their entire capability. + Examples of such stateless plugins are: + - File Import Plugin + - Export Plugin + + Other plugins have state but in a very controlled way: + - they can be trained, + - this trainable process produces a "model", + - that model acts as the state on which the `run` method is conditioned + + This model is stored in the Steamship Workspace that owns the Plugin Instance, and access to it is provided by the + hosting environment that runs the model. + - TODO(ted) Document this process. + + These stateful plugins are called "Trainable Plugins," and they must implement the following additional methods: + + - get_training_parameters(PluginRequest[TrainingParameterInput]) -> Response[TrainingParameterOutput] + - train(PluginRequest[TrainPluginInput]) -> Response[TrainPluginOutput] + + """ + + @abstractmethod + def run(self, request: PluginRequest[IN]) -> Union[OUT, InvocableResponse[OUT]]: + """Runs the core operation implemented by this plugin: import, export, blockify, tag, etc. + + This is the method that a Steamship Plugin implements to perform its main work. + """ + pass + + +class TrainablePluginService(PluginService, Generic[IN, OUT], ABC): + @abstractmethod + def model_cls(self) -> Type[TrainableModel]: + """Returns the constructor of the TrainableModel this TrainablePluginService uses. + + This is required so the `run` method below can load the model and provide it to the subclass implementor. + """ + pass + + def run(self, request: PluginRequest[IN]) -> Union[OUT, InvocableResponse[OUT]]: + """Loads the trainable model before passing the request to the `run_with_model` handler on the subclass.""" + logging.info("TrainablePluginService:run() - Loading model") + model = self.model_cls().load_remote( + client=self.client, # This field comes from being a subclass of App + plugin_instance_id=request.context.plugin_instance_id, + checkpoint_handle=None, # Will use default + use_cache=True, + plugin_instance_config=self.config, + ) + logging.info("TrainablePluginService:run() - Loaded model; invoking run_with_model") + return self.run_with_model(request, model) + + @abstractmethod + def run_with_model( + self, request: PluginRequest[IN], model: TrainableModel + ) -> Union[OUT, InvocableResponse[OUT]]: + """Rather than implementing run(request), a TrainablePluginService implements run_with_model(request, model)""" + pass + + @abstractmethod + def get_training_parameters( + self, request: PluginRequest[TrainingParameterPluginInput] + ) -> InvocableResponse[TrainingParameterPluginOutput]: + """Produces the trainable parameters for this plugin. + + This method is run by the Steamship Engine prior to training to fetch hyperparameters. + + - The user themselves can provide hyperparameters on the TrainingParameterPluginInput object. + - This method then transforms those into the TrainingParameterPluginOutput object, altering the user's values + if desired. + - The Engine then takes those TrainingParameterPluginOutput and presents them on the TrainPluginInput + + """ + pass + + @abstractmethod + def train( + self, request: PluginRequest[TrainPluginInput], model: TrainableModel + ) -> InvocableResponse[TrainPluginOutput]: + """Train the model.""" + pass + + @abstractmethod + def train_status( + self, request: PluginRequest[TrainPluginInput], model: TrainableModel + ) -> InvocableResponse[TrainPluginOutput]: + """Train the model.""" + pass diff --git a/steamship/plugin/__init__.py b/steamship/plugin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/plugin/__pycache__/__init__.cpython-39.pyc b/steamship/plugin/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8cb6e7e3843ddf7beeb87af7351d2d758f2cc7c Binary files /dev/null and b/steamship/plugin/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/embedder.cpython-39.pyc b/steamship/plugin/__pycache__/embedder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e5b23d2cbe582e0ff6bf8f5a186a1e62dec697f Binary files /dev/null and b/steamship/plugin/__pycache__/embedder.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/file_importer.cpython-39.pyc b/steamship/plugin/__pycache__/file_importer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a30736434753da8fbcae07cad2e651b543f2c2b Binary files /dev/null and b/steamship/plugin/__pycache__/file_importer.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/generator.cpython-39.pyc b/steamship/plugin/__pycache__/generator.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40b0256e01843c57a0a9087c387986cf4986d122 Binary files /dev/null and b/steamship/plugin/__pycache__/generator.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/request.cpython-39.pyc b/steamship/plugin/__pycache__/request.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4bb10b4e489697060615acd0803c39cf112f3883 Binary files /dev/null and b/steamship/plugin/__pycache__/request.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/tagger.cpython-39.pyc b/steamship/plugin/__pycache__/tagger.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69e32171e651e3225a0c719fbaeb17945161b5ca Binary files /dev/null and b/steamship/plugin/__pycache__/tagger.cpython-39.pyc differ diff --git a/steamship/plugin/__pycache__/trainable_model.cpython-39.pyc b/steamship/plugin/__pycache__/trainable_model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..491f78f05054f60ba2cd7cceca2edade283e8430 Binary files /dev/null and b/steamship/plugin/__pycache__/trainable_model.cpython-39.pyc differ diff --git a/steamship/plugin/blockifier/__init__.py b/steamship/plugin/blockifier/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..519363be3ab9f5f11f95081b941cc5fc1ab291da --- /dev/null +++ b/steamship/plugin/blockifier/__init__.py @@ -0,0 +1,4 @@ +from .blockifier import Blockifier +from .transcriber import Transcriber + +__all__ = ["Blockifier", "Transcriber"] diff --git a/steamship/plugin/blockifier/__pycache__/__init__.cpython-39.pyc b/steamship/plugin/blockifier/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29e56655d222b52d1168be6653a4b0fe78946ffd Binary files /dev/null and b/steamship/plugin/blockifier/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/plugin/blockifier/__pycache__/blockifier.cpython-39.pyc b/steamship/plugin/blockifier/__pycache__/blockifier.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3153d15e3c7f5a465bba640f61763be7852d5699 Binary files /dev/null and b/steamship/plugin/blockifier/__pycache__/blockifier.cpython-39.pyc differ diff --git a/steamship/plugin/blockifier/__pycache__/transcriber.cpython-39.pyc b/steamship/plugin/blockifier/__pycache__/transcriber.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c34de5b565d4ef92af54df5983449bd0fcf330f3 Binary files /dev/null and b/steamship/plugin/blockifier/__pycache__/transcriber.cpython-39.pyc differ diff --git a/steamship/plugin/blockifier/blockifier.py b/steamship/plugin/blockifier/blockifier.py new file mode 100644 index 0000000000000000000000000000000000000000..d2c1ce804012abe32550380651a9ac98daf14b39 --- /dev/null +++ b/steamship/plugin/blockifier/blockifier.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod + +from steamship.invocable import InvocableResponse, post +from steamship.invocable.plugin_service import PluginRequest, PluginService +from steamship.plugin.inputs.raw_data_plugin_input import RawDataPluginInput +from steamship.plugin.outputs.block_and_tag_plugin_output import BlockAndTagPluginOutput + +# Note! +# ===== +# +# This is the PLUGIN IMPLEMENTOR's View of a Blockifier. +# +# If you are using the Steamship Client, you probably want steamship.client.operations.converter instead +# of this file. +# + + +class Blockifier(PluginService[RawDataPluginInput, BlockAndTagPluginOutput], ABC): + @abstractmethod + def run( + self, request: PluginRequest[RawDataPluginInput] + ) -> InvocableResponse[BlockAndTagPluginOutput]: + raise NotImplementedError() + + @post("blockify") + def run_endpoint(self, **kwargs) -> InvocableResponse[BlockAndTagPluginOutput]: + """Exposes the Corpus Importer's `run` operation to the Steamship Engine via the expected HTTP path POST /import""" + return self.run(PluginRequest[RawDataPluginInput].parse_obj(kwargs)) diff --git a/steamship/plugin/blockifier/transcriber.py b/steamship/plugin/blockifier/transcriber.py new file mode 100644 index 0000000000000000000000000000000000000000..f7f796d5dbcb8f5b79e0e60b6bd9d4e00b938019 --- /dev/null +++ b/steamship/plugin/blockifier/transcriber.py @@ -0,0 +1,61 @@ +from abc import abstractmethod +from typing import List, Optional + +from steamship import Block, File, SteamshipError, Tag, Task, TaskState +from steamship.invocable import InvocableResponse +from steamship.invocable.plugin_service import PluginRequest +from steamship.plugin.blockifier.blockifier import Blockifier +from steamship.plugin.inputs.raw_data_plugin_input import RawDataPluginInput +from steamship.plugin.outputs.block_and_tag_plugin_output import BlockAndTagPluginOutput + +TRANSCRIPT_ID = "transcript_id" + + +class Transcriber(Blockifier): + @abstractmethod + def start_transcription(self, audio_file: PluginRequest[RawDataPluginInput]) -> str: + """Start a transcription job and return an id to identify the transcription.""" + raise NotImplementedError() + + @abstractmethod + def get_transcript(self, transcript_id: str) -> (Optional[str], Optional[List[Tag]]): + """Method to retrieve the transcript and optional Tags. If the transcription is not ready, return None""" + raise NotImplementedError() + + def _get_transcript(self, transcript_id: str) -> InvocableResponse: + """Retrieve the transcript using the transcript_id.""" + transcript, tags = self.get_transcript(transcript_id) + if transcript is None and tags is None: + return InvocableResponse( + status=Task( + state=TaskState.running, + remote_status_message="Transcription is ongoing.", + remote_status_input={"transcript_id": transcript_id}, + ) + ) + else: + return InvocableResponse( + data=BlockAndTagPluginOutput( + file=File( + blocks=[ + Block( + text=transcript, + tags=tags, + ) + ] + ) + ) + ) + + def run( + self, request: PluginRequest[RawDataPluginInput] + ) -> InvocableResponse[BlockAndTagPluginOutput]: + if request.is_status_check: + if TRANSCRIPT_ID not in request.status.remote_status_input: + raise SteamshipError(message="Status check requests need to provide a valid job id") + transcript_id = request.status.remote_status_input[TRANSCRIPT_ID] + return self._get_transcript(transcript_id) + + else: + transcript_id = self.start_transcription(audio_file=request.data.data) + return self._get_transcript(transcript_id) diff --git a/steamship/plugin/embedder.py b/steamship/plugin/embedder.py new file mode 100644 index 0000000000000000000000000000000000000000..403f13e5320269273e9be3cdacba743b4cf6b34a --- /dev/null +++ b/steamship/plugin/embedder.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod + +from steamship.invocable import InvocableResponse, post +from steamship.invocable.plugin_service import PluginRequest, PluginService +from steamship.plugin.inputs.block_and_tag_plugin_input import BlockAndTagPluginInput +from steamship.plugin.outputs.embedded_items_plugin_output import EmbeddedItemsPluginOutput + + +# Note! +# ===== +# +# This is the PLUGIN IMPLEMENTOR's View of an Embedder. +# +# If you are using the Steamship Client, you probably want steamship.client.operations.embedder instead +# of this file. +# +class Embedder(PluginService[BlockAndTagPluginInput, EmbeddedItemsPluginOutput], ABC): + @abstractmethod + def run( + self, request: PluginRequest[BlockAndTagPluginInput] + ) -> InvocableResponse[EmbeddedItemsPluginOutput]: + raise NotImplementedError() + + @post("tag") + def run_endpoint(self, **kwargs) -> InvocableResponse[EmbeddedItemsPluginOutput]: + """Exposes the Embedder's `run` operation to the Steamship Engine via the expected HTTP path POST /tag""" + return self.run(PluginRequest[BlockAndTagPluginInput](**kwargs)) diff --git a/steamship/plugin/file_importer.py b/steamship/plugin/file_importer.py new file mode 100644 index 0000000000000000000000000000000000000000..2949170947c299b459b3303c782dfcb6e15800e7 --- /dev/null +++ b/steamship/plugin/file_importer.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod + +from steamship.invocable import InvocableResponse, post +from steamship.invocable.plugin_service import PluginRequest, PluginService +from steamship.plugin.inputs.file_import_plugin_input import FileImportPluginInput +from steamship.plugin.outputs.raw_data_plugin_output import RawDataPluginOutput + + +# Note! +# ===== +# +# This is the PLUGIN IMPLEMENTOR's View of a File Importer. +# +# If you are using the Steamship Client, you probably want steamship.client.operations.file_importer instead +# of this file. +# +class FileImporter(PluginService[FileImportPluginInput, RawDataPluginOutput], ABC): + @abstractmethod + def run( + self, request: PluginRequest[FileImportPluginInput] + ) -> InvocableResponse[RawDataPluginOutput]: + raise NotImplementedError() + + @post("import") + def run_endpoint(self, **kwargs) -> InvocableResponse[RawDataPluginOutput]: + """Exposes the File Importer's `run` operation to the Steamship Engine via the expected HTTP path POST /import""" + return self.run(PluginRequest[FileImportPluginInput](**kwargs)) diff --git a/steamship/plugin/generator.py b/steamship/plugin/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..c1c71f3ab1ea95806f7d92ce48c6cedecb7d3965 --- /dev/null +++ b/steamship/plugin/generator.py @@ -0,0 +1,72 @@ +import logging +from abc import ABC, abstractmethod + +from steamship.invocable import InvocableResponse, post +from steamship.invocable.plugin_service import PluginRequest, PluginService, TrainablePluginService +from steamship.plugin.inputs.raw_block_and_tag_plugin_input import RawBlockAndTagPluginInput +from steamship.plugin.inputs.train_plugin_input import TrainPluginInput +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput +from steamship.plugin.outputs.raw_block_and_tag_plugin_output import RawBlockAndTagPluginOutput +from steamship.plugin.outputs.train_plugin_output import TrainPluginOutput +from steamship.plugin.outputs.training_parameter_plugin_output import TrainingParameterPluginOutput +from steamship.plugin.trainable_model import TrainableModel + +# Note! +# ===== +# +# This is the PLUGIN IMPLEMENTOR's View of a Generator. +# +# If you are using the Steamship Client, you probably want steamship.client.operations.generator instead +# of this file. +# + + +class Generator(PluginService[RawBlockAndTagPluginInput, RawBlockAndTagPluginOutput], ABC): + @abstractmethod + def run( + self, request: PluginRequest[RawBlockAndTagPluginInput] + ) -> InvocableResponse[RawBlockAndTagPluginOutput]: + raise NotImplementedError() + + @post("generate") + def run_endpoint(self, **kwargs) -> InvocableResponse[RawBlockAndTagPluginOutput]: + """Exposes the Tagger's `run` operation to the Steamship Engine via the expected HTTP path POST /tag""" + return self.run(PluginRequest[RawBlockAndTagPluginInput].parse_obj(kwargs)) + + +class TrainableGenerator( + TrainablePluginService[RawBlockAndTagPluginInput, RawBlockAndTagPluginOutput], ABC +): + @abstractmethod + def run_with_model( + self, request: PluginRequest[RawBlockAndTagPluginInput], model: TrainableModel + ) -> InvocableResponse[RawBlockAndTagPluginOutput]: + raise NotImplementedError() + + # noinspection PyUnusedLocal + @post("generate") + def run_endpoint(self, **kwargs) -> InvocableResponse[RawBlockAndTagPluginOutput]: + """Exposes the Tagger's `run` operation to the Steamship Engine via the expected HTTP path POST /generate""" + return self.run(PluginRequest[RawBlockAndTagPluginInput].parse_obj(kwargs)) + + # noinspection PyUnusedLocal + @post("getTrainingParameters") + def get_training_parameters_endpoint( + self, **kwargs + ) -> InvocableResponse[TrainingParameterPluginOutput]: + """Exposes the Service's `get_training_parameters` operation to the Steamship Engine via the expected HTTP path POST /getTrainingParameters""" + return self.get_training_parameters(PluginRequest[TrainingParameterPluginInput](**kwargs)) + + # noinspection PyUnusedLocal + @post("train") + def train_endpoint(self, **kwargs) -> InvocableResponse[TrainPluginOutput]: + """Exposes the Service's `train` operation to the Steamship Engine via the expected HTTP path POST /train""" + logging.info(f"Tagger:train_endpoint called. Calling train {kwargs}") + arg = PluginRequest[TrainPluginInput].parse_obj(kwargs) + model = self.model_cls()() + model.receive_config(config=self.config) + + if arg.is_status_check: + return self.train_status(arg, model) + else: + return self.train(arg, model) diff --git a/steamship/plugin/inputs/__init__.py b/steamship/plugin/inputs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/plugin/inputs/__pycache__/__init__.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c82f13d4ad124cc7b27d35f431aba9f340be98b4 Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/block_and_tag_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/block_and_tag_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..57c911c5353e1fdd5d779702ce80ff192af3caae Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/block_and_tag_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/export_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/export_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5415525efaaa73ddab69dfc0067dc29de12fd8ea Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/export_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/file_import_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/file_import_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70500039b1330abf19bfb2aa6865247752eda33f Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/file_import_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/raw_block_and_tag_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/raw_block_and_tag_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5682d9319786d848067aeecc746f908a5a1ddfa Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/raw_block_and_tag_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/raw_data_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/raw_data_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a161c95b8e0db820d872cb9b1936106534fd69a Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/raw_data_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/train_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/train_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..301eb2da51a0cd6a0ad7e9503b0bcaec8569c766 Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/train_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/__pycache__/training_parameter_plugin_input.cpython-39.pyc b/steamship/plugin/inputs/__pycache__/training_parameter_plugin_input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e04c4fc468ce65031b9e82e5b778732be948ebe Binary files /dev/null and b/steamship/plugin/inputs/__pycache__/training_parameter_plugin_input.cpython-39.pyc differ diff --git a/steamship/plugin/inputs/block_and_tag_plugin_input.py b/steamship/plugin/inputs/block_and_tag_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..2443c95abf591ac8ee19482a4d286757fe2f3880 --- /dev/null +++ b/steamship/plugin/inputs/block_and_tag_plugin_input.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from steamship import File +from steamship.base import SteamshipError +from steamship.base.model import CamelModel +from steamship.utils.signed_urls import url_to_json + + +class BlockAndTagPluginInput(CamelModel): + file: File = None + + def __init__(self, **kwargs): + if url := kwargs.get("url"): + # If `url` was provided, we assume that some or all of the new object's parameterization exists + # at that location, encoded as JSON. We fetch it, parse as JSON, and fold into the kwarg dict. + result = url_to_json(url) + if not isinstance(result, dict): + raise SteamshipError( + message=f"BlockAndTagPluginInput received a URL that resolved to {type(result)}. Needed a `dict`" + ) + kwargs.update(result) + + super().__init__(**kwargs) diff --git a/steamship/plugin/inputs/export_plugin_input.py b/steamship/plugin/inputs/export_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..01843097de187fa66000eadd1b31d1189e1c8ca0 --- /dev/null +++ b/steamship/plugin/inputs/export_plugin_input.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from steamship.base.model import CamelModel + + +class ExportPluginInput(CamelModel): + plugin_instance: str = None + id: str = None + handle: str = None + type: str = None + filename: str = None + query: str = None diff --git a/steamship/plugin/inputs/file_import_plugin_input.py b/steamship/plugin/inputs/file_import_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bd881ad7f5d87751123463432554b7d170f6f0 --- /dev/null +++ b/steamship/plugin/inputs/file_import_plugin_input.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from steamship.base.model import CamelModel + + +class FileImportPluginInput(CamelModel): + value: str = None + data: str = None + url: str = None + plugin_instance: str = None + mime_type: str = None diff --git a/steamship/plugin/inputs/raw_block_and_tag_plugin_input.py b/steamship/plugin/inputs/raw_block_and_tag_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..03291880a5f10145cf4ecac827cede42d0d35fed --- /dev/null +++ b/steamship/plugin/inputs/raw_block_and_tag_plugin_input.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import List, Optional + +from steamship import Block +from steamship.base import SteamshipError +from steamship.base.model import CamelModel +from steamship.utils.signed_urls import url_to_json + + +class RawBlockAndTagPluginInput(CamelModel): + blocks: List[Block] + options: Optional[dict] + + def __init__(self, **kwargs): + if url := kwargs.get("url"): + # If `url` was provided, we assume that some or all of the new object's parameterization exists + # at that location, encoded as JSON. We fetch it, parse as JSON, and fold into the kwarg dict. + result = url_to_json(url) + if not isinstance(result, dict): + raise SteamshipError( + message=f"BlockAndTagPluginInput received a URL that resolved to {type(result)}. Needed a `dict`" + ) + kwargs.update(result) + + super().__init__(**kwargs) diff --git a/steamship/plugin/inputs/raw_data_plugin_input.py b/steamship/plugin/inputs/raw_data_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..f9af9b2834bc6b05396400772d6160f430df0a38 --- /dev/null +++ b/steamship/plugin/inputs/raw_data_plugin_input.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import base64 +from typing import Any + +from steamship.base.mime_types import TEXT_MIME_TYPES, MimeTypes +from steamship.base.model import CamelModel +from steamship.utils.signed_urls import url_to_bytes + + +def is_base64(sb): + # noinspection PyBroadException + try: + if isinstance(sb, str): + # If there's Any unicode here, an exception will be thrown and the function will return false + sb_bytes = bytes(sb, "ascii") + elif isinstance(sb, bytes): + sb_bytes = sb + else: + raise ValueError("Argument must be string or bytes") + return base64.b64encode(base64.b64decode(sb_bytes)) == sb_bytes + except Exception: + return False + + +class RawDataPluginInput(CamelModel): + """Input for a plugin that accepts raw data, plus a mime type. + + A plugin author need only ever concern themselves with two fields: + - `data` - Raw bytes + ` `default_mime_type` - The best guess as to `data`'s MIME Type unless otherwise known to be different. + + In practice, however, the lifecycle of this object involves a bit more under the hood: + + - **Potentially Base64 Decoding Data**. When decoding from a dict, the `data` field is assumed to be Base64 encoded. + This is to support JSON as a transport encoding over the wire. The constructor automatically performs the + decoding, and the Steamship Engine automatically performs the encoding, so the Plugin Author can mostly ignore + this fact. + + - **Potentially late-fetching the `data` from a `url`**. Some files are too large to comfortably send as Base64 + within JSON. The Steamship Engine sometimes chooses to send an empty `data` field paired with a non-empty + `url` field. When this happens, the constructor proactively, synchronously fetches the contents of that `url` + and assigns it to the `data` field, throwing a SteamshipError if the fetch fails. Again, this is done + automatically so the Plugin Author can mostly ignore this fact. + """ + + plugin_instance: str = None + data: Any = None + default_mime_type: MimeTypes = None + + def __init__(self, **kwargs): + data = kwargs.get("data") + url = kwargs.get("url") + + if data is not None and is_base64(data): + data_bytes = base64.b64decode(data) + if kwargs.get("defaultMimeType") in TEXT_MIME_TYPES: + kwargs["data"] = data_bytes.decode("utf-8") + else: + kwargs["data"] = data_bytes + elif url is not None: + kwargs["data"] = url_to_bytes(url) # Resolve the URL into the data field + kwargs.pop( + "url" + ) # Remove the URL field to preserve a simple interface for the consumer + + super().__init__(**kwargs) diff --git a/steamship/plugin/inputs/train_plugin_input.py b/steamship/plugin/inputs/train_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..3a0e6d27ed2e58a6e1177e4184a30f78114bf20a --- /dev/null +++ b/steamship/plugin/inputs/train_plugin_input.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import Optional + +from pydantic import Field + +from steamship.base.model import CamelModel + + +class TrainPluginInput(CamelModel): + """ + This is the object passed as input to a trainable operation, stored as the `input` field of a `train` task. + """ + + plugin_instance: str + + # How may epochs of trainable to perform, if relevant and supported + training_epochs: Optional[int] = None + + # How much data to hold out for testing & reporting, if relevant and supported. + testing_holdout_percent: Optional[float] = None + + # An optional seed for the train-test split + test_split_seed: Optional[int] = None + + # Arbitrary key-valued data to provide to the particular `modelName` trainer. + training_params: Optional[dict] = None + + # Arbitrary key-valued data to provide to the inference runner in the TrainPluginOutput object. + # The trainable process will have the opportunity to amend this before writing it to the output + inference_params: Optional[dict] = None + + # A pre-signed URL at which the trainable data can be found + training_data_url: Optional[str] = Field(None, alias="trainingDataUrl") diff --git a/steamship/plugin/inputs/training_parameter_plugin_input.py b/steamship/plugin/inputs/training_parameter_plugin_input.py new file mode 100644 index 0000000000000000000000000000000000000000..a11dad10315358f2d95cf77e12d6644ca6b6bd64 --- /dev/null +++ b/steamship/plugin/inputs/training_parameter_plugin_input.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Dict, Optional + +from steamship.base.model import CamelModel +from steamship.plugin.inputs.export_plugin_input import ExportPluginInput + + +class TrainingParameterPluginInput(CamelModel): + # The plugin instance handle that should perform the training. + plugin_instance: Optional[str] = None + # An export request to produce the training data file, if training data is required. + export_plugin_input: Optional[ExportPluginInput] = None + + # How many epochs to train (if supported by the supplied `pluginInstance`) + training_epochs: Optional[int] = None + + # How much of the data to hold out for testing (if supported by the supplied `pluginInstance`) + testing_holdout_percent: Optional[float] = None + + # Random seed for performing the train/test split (if supported by the supplied `pluginInstance`) + test_split_seed: Optional[int] = None + + # Custom training-time parameters, specific to the pluginInstance + training_params: Optional[Dict] = None + + # Custom inference-time parameters, specific to the pluginInstance + inference_params: Optional[Dict] = None diff --git a/steamship/plugin/outputs/__init__.py b/steamship/plugin/outputs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/steamship/plugin/outputs/__pycache__/__init__.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3523b9a987545cc20e13f85d11534bc29109917c Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/block_and_tag_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/block_and_tag_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c352be6689772cccb02b7ffec45ae8f6e873626a Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/block_and_tag_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/embedded_items_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/embedded_items_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ff1f12e6639d90eaf1f0b6f9e0fbd07436034a1 Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/embedded_items_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/model_checkpoint.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/model_checkpoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b53ffddc872dc7e0e8b7984e8dbd6fd86087325b Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/model_checkpoint.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/raw_block_and_tag_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/raw_block_and_tag_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..554ab37572afb57104663bec58b440f02a961ea9 Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/raw_block_and_tag_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/raw_data_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/raw_data_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..399039a73979bbd556d0d55762f4aba307299b5c Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/raw_data_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/train_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/train_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76d306a12baf07ea56d0d00011562651793623ad Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/train_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/__pycache__/training_parameter_plugin_output.cpython-39.pyc b/steamship/plugin/outputs/__pycache__/training_parameter_plugin_output.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1b53560070d7515eb1157b52c7b1336076d1fe8 Binary files /dev/null and b/steamship/plugin/outputs/__pycache__/training_parameter_plugin_output.cpython-39.pyc differ diff --git a/steamship/plugin/outputs/block_and_tag_plugin_output.py b/steamship/plugin/outputs/block_and_tag_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..055dae575c3afdc0d72392fea8c09998e421825d --- /dev/null +++ b/steamship/plugin/outputs/block_and_tag_plugin_output.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from steamship.base.model import CamelModel +from steamship.data.file import File + + +class BlockAndTagPluginOutput(CamelModel): + file: File = None diff --git a/steamship/plugin/outputs/embedded_items_plugin_output.py b/steamship/plugin/outputs/embedded_items_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..c7e80c794e08cbd73e46fb4432b235165e86a410 --- /dev/null +++ b/steamship/plugin/outputs/embedded_items_plugin_output.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import List + +from steamship.base.model import CamelModel + + +class EmbeddedItemsPluginOutput(CamelModel): + embeddings: List[List[float]] diff --git a/steamship/plugin/outputs/model_checkpoint.py b/steamship/plugin/outputs/model_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..7211818d3381de83098b839b8709ada2367756ee --- /dev/null +++ b/steamship/plugin/outputs/model_checkpoint.py @@ -0,0 +1,140 @@ +import logging +import tempfile +from pathlib import Path +from typing import ClassVar, Optional + +from steamship import Steamship, SteamshipError +from steamship.base.client import Client +from steamship.base.model import CamelModel +from steamship.data.workspace import SignedUrl, Workspace +from steamship.utils.signed_urls import download_from_signed_url, upload_to_signed_url +from steamship.utils.zip_archives import unzip_folder, zip_folder + + +class ModelCheckpoint(CamelModel): + # The default model checkpoint handle unless one is provided. + DEFAULT_HANDLE: ClassVar[str] = "default" + + """Represents the saved state of a trained PluginInstance. + """ + client: Client + workspace: Optional[Workspace] = None + plugin_instance_id: str + + parent_directory: Optional[Path] = None # e.g. /tmp + handle: str = None # The handle of this ModelCheckpoint. + plugin_instance_id: str = None # + + def __init__( + self, + client: Steamship, + parent_directory: Optional[Path] = None, + handle: str = DEFAULT_HANDLE, + plugin_instance_id: str = None, + ): + super().__init__( + client=client, + parent_directory=parent_directory, + plugin_instance_id=plugin_instance_id, + handle=handle or ModelCheckpoint.DEFAULT_HANDLE, + ) + + if self.plugin_instance_id is None: + raise SteamshipError("Null plugin_instance_id provided ModelCheckpoint") + + self.workspace = client.get_workspace() + + if parent_directory is None: + # TODO(ted): We may want to not use a tempdir so that we can cache it. + self.parent_directory = Path(tempfile.mkdtemp()) + + # Create the folder path on disk. + logging.info(f"Making sure Checkpoint path exists: {self.folder_path_on_disk()}") + self.folder_path_on_disk().mkdir(parents=True, exist_ok=True) + + def folder_path_on_disk(self) -> Path: + """Returns the path to this checkpoint on the local disk. + + On disk, the model checkpoint is the folder: + `{parent_directory}/{checkpoint_handle}/` + """ + return self.parent_directory / Path(self.handle) + + def archive_path_on_disk(self) -> Path: + """Returns the path to the checkpoint archive on disk. + + On disk, the model checkpoint is the folder: + `{parent_directory}/{checkpoint_handle}.zip` + """ + return self.parent_directory / Path(f"{self.handle}.zip") + + def archive_path_in_steamship(self, as_handle: str = None) -> str: + """Returns the path to the checkpoint archive on Steamship. + + On steamship, the checkpoint is archived in the Workspace's PluginInstance bucket as: + `{plugin_instance_bucket}/{plugin_instance_id}/{checkpoint_handle}.zip` + + Here we only return the following path since the bucket is specified separately + in the required Steamship API calls: `{plugin_instance_id}/{checkpoint_handle}.zip` + """ + return f"{self.plugin_instance_id}/{as_handle or self.handle}.zip" + + def download_model_bundle(self) -> Path: + """Download's the model from Steamship and unzips to `parent_directory`""" + download_resp = self.workspace.create_signed_url( + SignedUrl.Request( + bucket=SignedUrl.Bucket.PLUGIN_DATA, + filepath=self.archive_path_in_steamship(), + operation=SignedUrl.Operation.READ, + ) + ) + if not download_resp or not download_resp.signed_url: + raise SteamshipError( + message=f"Received empty Signed URL for model download of '{self.handle}." + ) + download_from_signed_url(download_resp.signed_url, to_file=self.archive_path_on_disk()) + unzip_folder(self.archive_path_on_disk(), into_folder=self.folder_path_on_disk()) + if not download_resp or not download_resp.signed_url: + raise SteamshipError( + message=f"Received empty Signed URL for model download of '{self.handle}." + ) + download_from_signed_url(download_resp.signed_url, to_file=self.archive_path_on_disk()) + unzip_folder(self.archive_path_on_disk(), into_folder=self.folder_path_on_disk()) + return self.folder_path_on_disk() + + def _upload_model_zip(self, as_handle: str = None): + """Assumes a pre-zipped model, uploads to the requested zip. + + This is an internal function. Please use upload_model_bundle as an caller.""" + logging.info(f"ModelCheckpoint:_upload_model_zip - handle={as_handle}") + signed_url_resp = self.workspace.create_signed_url( + SignedUrl.Request( + bucket=SignedUrl.Bucket.PLUGIN_DATA, + filepath=self.archive_path_in_steamship(as_handle=as_handle), + operation=SignedUrl.Operation.WRITE, + ) + ) + + if not signed_url_resp: + raise SteamshipError( + message="Empty result on Signed URL request while uploading model checkpoint" + ) + if not signed_url_resp.signed_url: + raise SteamshipError( + message="Empty signedUrl on Signed URL request while uploading model checkpoint" + ) + + upload_to_signed_url(signed_url_resp.signed_url, filepath=self.archive_path_on_disk()) + + def upload_model_bundle(self, set_as_default: bool = True): + """Zips and uploads the Model to steamship""" + logging.info("ModelCheckpoint:upload_model_bundle") + zip_folder(self.folder_path_on_disk(), into_file=self.archive_path_on_disk()) + self._upload_model_zip() + + if set_as_default: + # For simplicity, we'll assume the checkpoint named `default` is the one to be loaded unless otherwise + # specified. This means that we need to double-upload some checkpoints: + # - Once under the actual checkpoint name (e.g. `epoch-10`) + # - Again under the name: default + self._upload_model_zip(as_handle=ModelCheckpoint.DEFAULT_HANDLE) diff --git a/steamship/plugin/outputs/raw_block_and_tag_plugin_output.py b/steamship/plugin/outputs/raw_block_and_tag_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..f6d068a6af9d38148f6a44bcf30f2f367faa9d47 --- /dev/null +++ b/steamship/plugin/outputs/raw_block_and_tag_plugin_output.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from typing import List + +from steamship.base.model import CamelModel +from steamship.data.file import Block + + +class RawBlockAndTagPluginOutput(CamelModel): + blocks: List[Block] diff --git a/steamship/plugin/outputs/raw_data_plugin_output.py b/steamship/plugin/outputs/raw_data_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..91cc58f5cf1b8f597e70e47a9c150caa74fc009a --- /dev/null +++ b/steamship/plugin/outputs/raw_data_plugin_output.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import io +from typing import Any, Optional, Type, Union + +from pydantic import BaseModel + +from steamship.base import MimeTypes +from steamship.base.model import CamelModel +from steamship.utils.binary_utils import flexi_create + + +class RawDataPluginOutput(CamelModel): + """Represents mime-typed raw data (or a URL pointing to raw data) that can be returned to the engine. + + As a few examples, you can return: + - Raw text: RawDataPluginOutput(string=raw_text, MimeTypes.TXT) + - Markdown text: RawDataPluginOutput(string=markdown_text, MimeTypes.MKD) + - A PNG image: RawDataPluginOutput(bytes=png_bytes, MimeTypes.PNG) + - A JSON-serializable Dataclass: RawDataPluginOutput(json=dataclass, MimeTypes.JSON) + - Steamship Blocks: RawDataPluginOutput(json=file, MimeTypes.STEAMSHIP_BLOCK_JSON) + - Data uploaded to a pre-signed URL: RawDataPluginOutput(url=presigned_url, MimeTypes.TXT) + + The `data` field of this object will ALWAYS be Base64 encoded by the constructor. This ensures that the object + is always trivially JSON-serializable over the wire, no matter what it contains. + + The `mimeType` field of this object should always be filled in if known. The Steamship Engine makes use of it + to proactively select defaults for handling the data returned. + """ + + data: Optional[str] = None # Note: This is **always** Base64 encoded. + mime_type: Optional[str] = None + + def __init__( + self, + base64string: str = None, + string: str = None, + _bytes: Union[bytes, io.BytesIO] = None, + json: Any = None, + mime_type: str = None, + **kwargs, + ): + super().__init__() + + if base64string is not None: + self.data = base64string + self.mime_type = mime_type or MimeTypes.BINARY + else: + # Base64-encode the data field. + self.data, self.mime_type, encoding = flexi_create( + base64string=base64string, + string=string, + json=json, + _bytes=_bytes, + mime_type=mime_type, + force_base64=True, + ) + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + obj["base64string"] = obj.get("data") + return super().parse_obj(obj) diff --git a/steamship/plugin/outputs/train_plugin_output.py b/steamship/plugin/outputs/train_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..e9964e774786fd476d72c5a25bec282525cb70c6 --- /dev/null +++ b/steamship/plugin/outputs/train_plugin_output.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from steamship.base.model import CamelModel + + +class TrainPluginOutput(CamelModel): + """ + This is the object produced by a completed trainable operation, stored as the `output` field of a `train` task. + """ + + # The PluginInstance ID being trained + plugin_instance_id: str = None + + # This should always represent the most recent snapshot of the model in Steamship + # It is the output of ModelCheckpoint.archive_path_in_steamship + archive_path: str = None + + # Arbitrary key-valued data to provide to the `run` method when this plugin is Run. + inference_params: dict = None + + # Arbitrary key-valued data to provide information about training status or training results. + training_progress: dict = None # For tracking the progress (e.g. 3 / 40 epochs completed) + training_results: dict = None # For tracking accuracy (e.g. f1=0.8) diff --git a/steamship/plugin/outputs/training_parameter_plugin_output.py b/steamship/plugin/outputs/training_parameter_plugin_output.py new file mode 100644 index 0000000000000000000000000000000000000000..206ba13325740d6374ce9f8a5c4dc744db0b1dfb --- /dev/null +++ b/steamship/plugin/outputs/training_parameter_plugin_output.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional, Type + +from pydantic import BaseModel + +from steamship.base.model import CamelModel +from steamship.plugin.inputs.export_plugin_input import ExportPluginInput +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput + + +class TrainingParameterPluginOutput(CamelModel): + machine_type: Optional[str] = None + training_epochs: int = None + testing_holdout_percent: float = None + test_split_seed: int = None + training_params: Dict[str, Any] = None + inference_params: Dict[str, Any] = None + export_request: ExportPluginInput = None + + @staticmethod + def from_input(input: TrainingParameterPluginInput) -> TrainingParameterPluginOutput: + return TrainingParameterPluginOutput( + export_request=input.export_plugin_input, + training_epochs=input.training_epochs, + testing_holdout_percent=input.testing_holdout_percent, + test_split_seed=input.test_split_seed, + training_params=input.training_params, + inference_params=input.inference_params, + ) + + @classmethod + def parse_obj(cls: Type[BaseModel], obj: Any) -> BaseModel: + # TODO (enias): This needs to be solved at the engine side + obj["export_request"] = obj.get("exportPluginInput") + return super().parse_obj(obj) diff --git a/steamship/plugin/request.py b/steamship/plugin/request.py new file mode 100644 index 0000000000000000000000000000000000000000..5ba3aca2873434dc1b49b31e4359960fb96c87b8 --- /dev/null +++ b/steamship/plugin/request.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import Generic, Optional, TypeVar + +from pydantic.generics import GenericModel + +# Note! +# ===== +# +# This the files in this package are for Plugin Implementors. +# If you are using the Steamship Client, you probably are looking for either steamship.client or steamship.data +# +from steamship.base import Task +from steamship.base.model import CamelModel, to_camel + +T = TypeVar("T") +U = TypeVar("U") + + +class PluginRequestContext(CamelModel): + """Contains the context in which""" + + plugin_id: str = None + plugin_handle: str = None + plugin_version_id: str = None + plugin_version_handle: str = None + plugin_instance_id: str = None + plugin_instance_handle: str = None + + +class PluginRequest(GenericModel, Generic[T]): + # The primary payload of the request. E.g. RawDataPluginInput, BlockAndTagPluginInput + data: Optional[T] = None + + # The context in which this request is occurring + context: Optional[PluginRequestContext] = None + + # The status of the request as perceived by the requester. + status: Optional[Task] = None + + # Whether this plugin request is a status check against ongoing work. If True, status must be not None + is_status_check: bool = False + + class Config: + alias_generator = to_camel + allow_population_by_field_name = True diff --git a/steamship/plugin/tagger.py b/steamship/plugin/tagger.py new file mode 100644 index 0000000000000000000000000000000000000000..5dc3fe09687de326fd7422cf99780476af780622 --- /dev/null +++ b/steamship/plugin/tagger.py @@ -0,0 +1,70 @@ +import logging +from abc import ABC, abstractmethod + +from steamship.invocable import InvocableResponse, post +from steamship.invocable.plugin_service import PluginRequest, PluginService, TrainablePluginService +from steamship.plugin.inputs.block_and_tag_plugin_input import BlockAndTagPluginInput +from steamship.plugin.inputs.train_plugin_input import TrainPluginInput +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput +from steamship.plugin.outputs.block_and_tag_plugin_output import BlockAndTagPluginOutput +from steamship.plugin.outputs.train_plugin_output import TrainPluginOutput +from steamship.plugin.outputs.training_parameter_plugin_output import TrainingParameterPluginOutput +from steamship.plugin.trainable_model import TrainableModel + +# Note! +# ===== +# +# This is the PLUGIN IMPLEMENTOR's View of a Tagger. +# +# If you are using the Steamship Client, you probably want steamship.client.operations.tagger instead +# of this file. +# + + +class Tagger(PluginService[BlockAndTagPluginInput, BlockAndTagPluginOutput], ABC): + @abstractmethod + def run( + self, request: PluginRequest[BlockAndTagPluginInput] + ) -> InvocableResponse[BlockAndTagPluginOutput]: + raise NotImplementedError() + + @post("tag") + def run_endpoint(self, **kwargs) -> InvocableResponse[BlockAndTagPluginOutput]: + """Exposes the Tagger's `run` operation to the Steamship Engine via the expected HTTP path POST /tag""" + return self.run(PluginRequest[BlockAndTagPluginInput].parse_obj(kwargs)) + + +class TrainableTagger(TrainablePluginService[BlockAndTagPluginInput, BlockAndTagPluginOutput], ABC): + @abstractmethod + def run_with_model( + self, request: PluginRequest[BlockAndTagPluginInput], model: TrainableModel + ) -> InvocableResponse[BlockAndTagPluginOutput]: + raise NotImplementedError() + + # noinspection PyUnusedLocal + @post("tag") + def run_endpoint(self, **kwargs) -> InvocableResponse[BlockAndTagPluginOutput]: + """Exposes the Tagger's `run` operation to the Steamship Engine via the expected HTTP path POST /tag""" + return self.run(PluginRequest[BlockAndTagPluginInput].parse_obj(kwargs)) + + # noinspection PyUnusedLocal + @post("getTrainingParameters") + def get_training_parameters_endpoint( + self, **kwargs + ) -> InvocableResponse[TrainingParameterPluginOutput]: + """Exposes the Service's `get_training_parameters` operation to the Steamship Engine via the expected HTTP path POST /getTrainingParameters""" + return self.get_training_parameters(PluginRequest[TrainingParameterPluginInput](**kwargs)) + + # noinspection PyUnusedLocal + @post("train") + def train_endpoint(self, **kwargs) -> InvocableResponse[TrainPluginOutput]: + """Exposes the Service's `train` operation to the Steamship Engine via the expected HTTP path POST /train""" + logging.info(f"Tagger:train_endpoint called. Calling train {kwargs}") + arg = PluginRequest[TrainPluginInput].parse_obj(kwargs) + model = self.model_cls()() + model.receive_config(config=self.config) + + if arg.is_status_check: + return self.train_status(arg, model) + else: + return self.train(arg, model) diff --git a/steamship/plugin/trainable_model.py b/steamship/plugin/trainable_model.py new file mode 100644 index 0000000000000000000000000000000000000000..451967620022fc16fec7e0d5e267e20d3121610e --- /dev/null +++ b/steamship/plugin/trainable_model.py @@ -0,0 +1,187 @@ +import logging +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Callable, Dict, Generic, Optional, TypeVar + +from typing_extensions import TypeAlias + +from steamship.base.client import Client +from steamship.invocable import InvocableResponse +from steamship.invocable.plugin_service import PluginRequest +from steamship.plugin.inputs.train_plugin_input import TrainPluginInput +from steamship.plugin.outputs.model_checkpoint import ModelCheckpoint +from steamship.plugin.outputs.train_plugin_output import TrainPluginOutput + +ModelConstructor: TypeAlias = Callable[[], "TrainableModel"] + +# Global variable to store the model for reuse in memory. +MODEL_CACHE: Dict[str, "TrainableModel"] = {} + +ConfigType = TypeVar("ConfigType") + + +class TrainableModel(ABC, Generic[ConfigType]): + """Base class for trainable models. + + Trainable models are not plugins. They are a thin wrapper around the state of a model designed to be **used with** + the Steamship plugin system. + + # State Management + + 100% of a TrainableModel's state management should save to & read from a folder on disk via the methods + `save_to_folder` and `load_from_folder`. + + # Remote Saving and Loading + + `TrainableModel` instances automatically save to a user's Workspace on Steamship via `save_remote` method. They + can load themselves from a user's workspace via the `load_remote` method. + + When saving a model, the caller provides `handle`, such as "V1" or "epoch_23". This allows that particular checkpoint + to be re-loaded. By default, every save operation also saves the model to the "default" checkpoint, overwriting it + if it already existed. When a user loads a model without specifying a checkpoint, the "default" checkpoint will be used. + + # Data Scope + + A TrainableModel's data is saved & loaded with respect to + + 1) The user's active Workspace, and + 2) The provided Plugin Instance within that workspace. + + The active workspace is read from the Steamship client context, and the `plugin_instance_id` is supplied as a + method argument on the `save_remote` and `load_remote` methods. + + This organization enables a user to have arbitrarily many trained model instances of the same type colocated within + a Workspace. + + # Training + + A training job is fully parameterized by the `TrainPluginInput` object. + + # Result Reporting + + A training job's results are reported via the `TrainPluginOutput` object. These results include a reference to the + `save_remote` output, but they do not include the model parameters themselves. For example, after training, one + could write: + + >>> archive_path_in_steamship = model.save_remote(..) + >>> output = TrainPluginOutput(archive_path_in_steamship=archive_path_in_steamship, + ... + ) + + That output is the ultimate return object of the training process, but the Plugin that owns this model need not + wait for synchronous completion to update the Steamship Engine with intermediate results. It can use the + `Response.post_update` to proactively stream results back to the server. + + # Third-party / External Models + + This model class is a convenient wrapper for models running on third party systems (e.g. Google's AutoML). In such + a case: + + - The `train` method would begin the job on the 3rd party system. + - The `save_to_folder` method would write the Job ID and any other useful data to the checkpoint path + - The `load_from_folder` method would read this Job ID from disk and obtain an authenticated client with the + third party system. + - Any `run` method the implementer created would ferry back results fetched from the third-party system. + - Any status reporting in TrainPluginOutput would ferry back status fetched from the third-party system. + + """ + + config: ConfigType = None + + def receive_config(self, config: ConfigType): + """Stores config from plugin instance, so it is accessible by model on load or train.""" + self.config = config + + @abstractmethod + def save_to_folder(self, checkpoint_path: Path): + """Saves 100% of the state of this model to the provided path.""" + raise NotImplementedError() + + @abstractmethod + def load_from_folder(self, checkpoint_path: Path): + """Load 100% of the state of this model to the provided path.""" + raise NotImplementedError() + + @abstractmethod + def train(self, input: PluginRequest[TrainPluginInput]) -> InvocableResponse[TrainPluginOutput]: + """Train or fine-tune the model, parameterized by the information in the TrainPluginInput object.""" + raise NotImplementedError() + + @abstractmethod + def train_status( + self, input: PluginRequest[TrainPluginInput] + ) -> InvocableResponse[TrainPluginOutput]: + """Check on the status of an in-process training job, if it is running externally asynchronously.""" + raise NotImplementedError() + + @classmethod + def load_from_local_checkpoint(cls, checkpoint: ModelCheckpoint, config: ConfigType): + model = cls() + model.receive_config(config=config) + model.load_from_folder(checkpoint.folder_path_on_disk()) + return model + + @classmethod + def load_remote( + cls, + client: Client, + plugin_instance_id: str, + checkpoint_handle: Optional[str] = None, + use_cache: bool = True, + model_parent_directory: Path = None, + plugin_instance_config: ConfigType = None, + ): + if checkpoint_handle is None: + # For some reason doing this defaulting in the signature wasn't working. + checkpoint_handle = ModelCheckpoint.DEFAULT_HANDLE + + model_key = f"{plugin_instance_id}/{checkpoint_handle}" + logging.info(f"TrainableModel:load_remote - Model Key: {model_key}") + + global MODEL_CACHE + + if use_cache: + if model_key in MODEL_CACHE: + logging.info(f"TrainableModel:load_remote - Returning cached: {model_key}") + return MODEL_CACHE[model_key] + + checkpoint = ModelCheckpoint( + client=client, + parent_directory=model_parent_directory, + handle=checkpoint_handle, + plugin_instance_id=plugin_instance_id, + ) + + # If we haven't loaded the model, we need to download and start the model + logging.info(f"TrainableModel:load_remote - Downloading: {model_key}") + checkpoint.download_model_bundle() + logging.info(f"TrainableModel:load_remote - Loading: {model_key}") + model = cls.load_from_local_checkpoint(checkpoint, plugin_instance_config) + logging.info(f"TrainableModel:load_remote - Loaded: {model_key}") + + if use_cache: + MODEL_CACHE[model_key] = model + + return model + + def save_remote( + self, + client: Client, + plugin_instance_id: str, + checkpoint_handle: Optional[str] = None, + model_parent_directory: Path = None, + set_as_default: bool = True, + ) -> str: + if checkpoint_handle is None: + # For some reason doing this defaulting in the signature wasn't working. + checkpoint_handle = ModelCheckpoint.DEFAULT_HANDLE + + checkpoint = ModelCheckpoint( + client=client, + parent_directory=model_parent_directory, + handle=checkpoint_handle, + plugin_instance_id=plugin_instance_id, + ) + self.save_to_folder(checkpoint.folder_path_on_disk()) + checkpoint.upload_model_bundle(set_as_default=set_as_default) + return checkpoint.archive_path_in_steamship() diff --git a/steamship/utils/__init__.py b/steamship/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fff9cf696eeb5d7af026657a2999375ea863fd17 --- /dev/null +++ b/steamship/utils/__init__.py @@ -0,0 +1 @@ +"""Collection of utility functions.""" diff --git a/steamship/utils/__pycache__/__init__.cpython-39.pyc b/steamship/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f13e9e7edcd94560aaf8efe616d910445eb21db0 Binary files /dev/null and b/steamship/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/binary_utils.cpython-39.pyc b/steamship/utils/__pycache__/binary_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d1943162a3de43aa471e5d5d140a134f4bec060 Binary files /dev/null and b/steamship/utils/__pycache__/binary_utils.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/huggingface_helper.cpython-39.pyc b/steamship/utils/__pycache__/huggingface_helper.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c77d5b52b969de667bc4079577b1eca36f3f611 Binary files /dev/null and b/steamship/utils/__pycache__/huggingface_helper.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/kv_store.cpython-39.pyc b/steamship/utils/__pycache__/kv_store.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ffbfa9cc8b9e01b7451118920f5acde19c9fe32 Binary files /dev/null and b/steamship/utils/__pycache__/kv_store.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/metadata.cpython-39.pyc b/steamship/utils/__pycache__/metadata.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4eef4d28873b2f40904cbebb47db0205f5e1dfc Binary files /dev/null and b/steamship/utils/__pycache__/metadata.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/signed_urls.cpython-39.pyc b/steamship/utils/__pycache__/signed_urls.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04de4da71fe80ee8142a7626562844e6314cce40 Binary files /dev/null and b/steamship/utils/__pycache__/signed_urls.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/url.cpython-39.pyc b/steamship/utils/__pycache__/url.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de6e9c7bbfe61e54be19d1f43c2c9ae33542017e Binary files /dev/null and b/steamship/utils/__pycache__/url.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/utils.cpython-39.pyc b/steamship/utils/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9106f9ad110ee393bacb7afa387c58e04b639077 Binary files /dev/null and b/steamship/utils/__pycache__/utils.cpython-39.pyc differ diff --git a/steamship/utils/__pycache__/zip_archives.cpython-39.pyc b/steamship/utils/__pycache__/zip_archives.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf568b8e7f8f3d139db3763a408415d92c72f1b0 Binary files /dev/null and b/steamship/utils/__pycache__/zip_archives.cpython-39.pyc differ diff --git a/steamship/utils/binary_utils.py b/steamship/utils/binary_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d215aaaa7d6c1f483e8952e2ac0a821607cf4837 --- /dev/null +++ b/steamship/utils/binary_utils.py @@ -0,0 +1,113 @@ +import base64 +import io +import json as jsonlib +import logging +from typing import Any, Tuple, Union + +from pydantic import BaseModel + +from steamship.base import MimeTypes, SteamshipError +from steamship.base.mime_types import ContentEncodings +from steamship.base.model import CamelModel + + +def guess_mime(obj: Any, provided_mime: str = None) -> str: + if provided_mime is not None: + return provided_mime + if isinstance(obj, (str, int, float, bool)): + return MimeTypes.TXT + return MimeTypes.BINARY + + +def to_b64(obj: Any) -> str: + ret_bytes = obj + if isinstance(obj, bytes): + ret_bytes = obj + elif isinstance(obj, str): + ret_bytes = ret_bytes.encode("utf-8") + else: + ret_bytes = str(obj).encode("utf-8") + return base64.b64encode(ret_bytes).decode("utf-8") + + +def flexi_create( # noqa: C901 + base64string: str = None, + data: Any = None, + string: str = None, + json: Any = None, + _bytes: Union[bytes, io.BytesIO] = None, + mime_type=None, + force_base64=False, +) -> Tuple[Any, Union[None, str], Union[None, str]]: # TODO (Enias): Review + """ + It's convenient for some constructors to accept a variety of input types: + - data (your choice) + - string + - json + - bytes + + .. And have them all homogenized. + """ + + try: + if base64string is not None: + return base64string, mime_type or MimeTypes.BINARY, ContentEncodings.BASE64 + + ret_data = None # the body of the result + ret_mime = None # for the Content-Type field + ret_encoding = None # for the Content-Encoding field + is_b64 = False + + if data is not None: + ret_data, ret_mime = data, mime_type or guess_mime(data, mime_type) + + elif string is not None: + ret_data, ret_mime = string, mime_type or MimeTypes.TXT + + elif json is not None: + ret_mime = mime_type or MimeTypes.JSON + + if isinstance(json, CamelModel): + ret_dict = json.dict(by_alias=True) + ret_data = ret_dict + elif isinstance(json, BaseModel): + ret_dict = json.dict() + ret_data = ret_dict + else: + ret_data = json + + elif _bytes is not None: + if isinstance(_bytes, io.BytesIO): + _bytes = _bytes.getvalue() # Turn it into regular bytes + ret_data, ret_mime = ( + base64.b64encode(_bytes).decode("utf-8"), + mime_type or ret_mime or MimeTypes.BINARY, + ) + is_b64 = True + ret_encoding = ContentEncodings.BASE64 + + if ret_data is not None: + if force_base64 is False: + return ret_data, ret_mime, ret_encoding + if is_b64 is True: + return ret_data, ret_mime, ContentEncodings.BASE64 + else: + if json is not None or (data is not None and ret_mime == MimeTypes.JSON): + # If it was JSON, we need to dump the object first! + # Otherwise it will end up getting turned to the Python's object representation format + # which will result in invalid JSON + ret_data = jsonlib.dumps(ret_data) + + return ( + to_b64(ret_data), + ret_mime or MimeTypes.BINARY, + ContentEncodings.BASE64, + ) + + return None, None, None + except Exception as ex: + logging.error("Exception thrown trying to encode data", exc_info=ex) + raise SteamshipError( + message="There was an exception thrown while trying to encode your package/plugin data.", + error=ex, + ) diff --git a/steamship/utils/huggingface_helper.py b/steamship/utils/huggingface_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..b07a29057be7ad2e54aed74c70e9cba6cb2f2c15 --- /dev/null +++ b/steamship/utils/huggingface_helper.py @@ -0,0 +1,112 @@ +"""This class is a helper for plugins to use models hosted on Hugging Face. + +It uses asyncio parallelism to make many http requests simultaneously. +""" + +import asyncio +import logging +import time +from http import HTTPStatus +from typing import List, Optional + +import aiohttp +from aiohttp import ClientTimeout + +from steamship import Block, SteamshipError + + +async def _model_call( + session, text: str, api_url, headers, additional_params: dict = None, use_gpu: bool = False +) -> Optional[list]: + additional_params = additional_params or {} + json_input = { + "inputs": text or "", + "parameters": additional_params, + "options": {"use_gpu": use_gpu, "wait_for_model": False}, + } + ok_response, nok_response = None, None + + max_error_retries = 3 + + """ + Hugging Face returns an error that says that the model is currently loading + if it believes you have 'too many' requests simultaneously, so the logic retries in this case, but fails on + other errors. + """ + tries = 0 + while tries <= max_error_retries: + async with session.post(api_url, headers=headers, json=json_input) as response: + if response.status == HTTPStatus.OK and response.content_type == "application/json": + ok_response = await response.json() + logging.info(ok_response) + return ok_response + else: + nok_response = await response.text() + if "is currently loading" not in nok_response: + logging.info( + f'Received text response "{nok_response}" for input text "{text}" [attempt {tries}/{max_error_retries}]' + ) + tries += 1 + else: + await asyncio.sleep(1) + if ok_response is None: + raise SteamshipError( + message="Unable to query Hugging Face model", + internal_message=f"HF returned error: {nok_response} after {tries} attempts", + ) + return ok_response + + +async def _model_calls( + texts: List[str], + api_url: str, + headers, + timeout_seconds: int, + additional_params: dict = None, + use_gpu: bool = False, +) -> List[list]: + async with aiohttp.ClientSession(timeout=ClientTimeout(total=timeout_seconds)) as session: + tasks = [] + for text in texts: + tasks.append( + asyncio.ensure_future( + _model_call( + session, + text, + api_url, + headers=headers, + additional_params=additional_params, + use_gpu=use_gpu, + ) + ) + ) + + return await asyncio.gather(*tasks) + + +def get_huggingface_results( + blocks: List[Block], + hf_model_path: str, + hf_bearer_token: str, + additional_params: dict = None, + timeout_seconds: int = 30, + use_gpu: bool = False, +) -> List[list]: + api_url = f"https://api-inference.huggingface.co/models/{hf_model_path}" + headers = {"Authorization": f"Bearer {hf_bearer_token}"} + start_time = time.perf_counter() + results = asyncio.run( + _model_calls( + [block.text for block in blocks], + api_url, + headers, + timeout_seconds=timeout_seconds, + additional_params=additional_params, + use_gpu=use_gpu, + ) + ) + total_time = time.perf_counter() - start_time + logging.info( + f"Completed {len(blocks)} blocks in {total_time} seconds. ({float(len(blocks)) / total_time} bps)" + ) + return results diff --git a/steamship/utils/kv_store.py b/steamship/utils/kv_store.py new file mode 100644 index 0000000000000000000000000000000000000000..06ebe75d6720ef024e0ef3b3db168e82d9f78cc6 --- /dev/null +++ b/steamship/utils/kv_store.py @@ -0,0 +1,115 @@ +"""A simple key-value store implemented atop Files and Tags.""" + +from typing import Any, Dict, List, Optional, Tuple + +from steamship import Block, File, Steamship, Tag + +KV_STORE_MARKER = "__init__" + + +class KeyValueStore: + """A simple key value store implemented in Steamship. + + Instances of the KeyValueStore are identified by its `namespace`. + This store_identifier corresponds to a File that will be created with a special tag identifying it. + + Entries of the KeyValueStore are saved as `Tag` objects with: + * Kind = "KeyValueStore" + * Name = the key of the (kv) pair + * Value = a dict set to the value + + Note that the value is always saved as a dict object. To save a string or int, wrap it in a dict. + + WARNING: + + This is essentially a clever hack atop Steamship's tag system to provide mutable key-value storage. It is in the + steamship.utils package because it's proven useful once or twice. But in general, if you find yourself heavily + relying upon it, consider reaching out to us at hello@steamship.com to let us know, and we'll up-prioritize + adding a proper key-value API. + """ + + client: Steamship + store_identifier: str + + def __init__(self, client: Steamship, store_identifier: str = "KeyValueStore"): + """Create a new KeyValueStore instance. + + Args: + client (Steamship): The Steamship client. + store_identifier (str): The store_identifier which identifies this KeyValueStore instance. You can have multiple, separate KeyValueStore instances in a workspace using this implementation. + """ + self.client = client + self.store_identifier = f"kv-store-{store_identifier}" + + def _get_file(self, or_create: bool = False) -> Optional[File]: + status_files = File.query(self.client, f'filetag and kind "{self.store_identifier}"').files + if len(status_files) == 0: + if not or_create: + return None + return File.create( + self.client, + blocks=[Block(text="")], + tags=[Tag(kind=self.store_identifier, name=KV_STORE_MARKER)], + ) + else: + return status_files[0] + + def get(self, key: str) -> Optional[Dict]: + """Get the value represented by `key`.""" + file = self._get_file() + + if file is None: + return None + + for tag in file.tags: + if tag.kind == self.store_identifier and tag.name == key: + return tag.value + + def delete(self, key: str) -> bool: + """Delete the entry represented by `key`""" + file = self._get_file() + + if file is None: + return False + + deleted = False + for tag in file.tags: + if tag.kind == self.store_identifier and tag.name == key: + tag.delete() + deleted = True + + return deleted + + def set(self, key: str, value: Dict[str, Any]): + """Set the entry (key, value).""" + + # First delete it if it exists to avoid duplicate tags. + self.delete(key) + + # Now get/create the file + file = self._get_file(or_create=True) + + req = Tag(file_id=file.id, kind=self.store_identifier, name=key, value=value) + return self.client.post("tag/create", req, expect=Tag) + + def items(self, filter_keys: Optional[List[str]] = None) -> List[Tuple[str, Dict[str, Any]]]: + """Return all key-value entries as a list of (key, value) tuples. + + If `filter_keys` is provided, only returns keys within that list.""" + + file = self._get_file(or_create=True) + return [ + (tag.name, tag.value) + for tag in file.tags + if ( + tag.kind == self.store_identifier + and tag.name != KV_STORE_MARKER + and (filter_keys is None or tag.name in filter_keys) + ) + ] + + def reset(self): + """Delete all key-values.""" + file = self._get_file() + if file is not None: + file.delete() diff --git a/steamship/utils/metadata.py b/steamship/utils/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..fc4f669233a4cc4e913b193af377625794e33bed --- /dev/null +++ b/steamship/utils/metadata.py @@ -0,0 +1,26 @@ +import hashlib +import json +from typing import Dict, List, Optional, Union + +Metadata = Union[int, float, bool, str, List, Dict] + + +def str_to_metadata(s: str) -> Optional[Metadata]: + if s is None: + return None + return json.loads(s) + + +def metadata_to_str(m: Metadata) -> Optional[str]: + if m is None: + return None + return json.dumps(m) + + +def hash_dict(d: Dict) -> str: + """Returns the MD5 hash of a dictionary.""" + dhash = hashlib.md5() # noqa: S303 + # Sort arguments so so that the string representation is always the same. + encoded = json.dumps(d, sort_keys=True).encode() + dhash.update(encoded) + return dhash.hexdigest() diff --git a/steamship/utils/signed_urls.py b/steamship/utils/signed_urls.py new file mode 100644 index 0000000000000000000000000000000000000000..621b23ceecf7e3af450d0b8a129a8627662df263 --- /dev/null +++ b/steamship/utils/signed_urls.py @@ -0,0 +1,100 @@ +import json +import logging +from pathlib import Path +from typing import Optional + +import requests + +from steamship import SteamshipError +from steamship.utils.url import apply_localstack_url_fix + + +def url_to_json(url: str) -> any: + """ + Downloads the Signed URL and returns the contents as JSON. + """ + bytes = url_to_bytes(url) + json_string = bytes.decode("utf8") + return json.loads(json_string) + + +def url_to_bytes(url: str) -> bytes: + """ + Downloads the Signed URL and returns the contents as bytes. + + This is a helper function to consolidate Steamship Client URL fetching to ensure a single point of handling for: + * Error messages + * Any required manipulations for URL signed URLs + * Any required manipulations for localstack-based environments + + Note that the base API Client does not use this method on purpose: in the event of error code, it inspects the + contents of the response for a SteamshipError. + """ + url = apply_localstack_url_fix(url) + logging.info(f"Downloading: {url}.") + + resp = requests.get(url) + if resp.status_code != 200: + # TODO: At least Localstack send to reply with HTTP 200 even if the file isn't found! + # The full response contains: + # + # NoSuchKey + # + # So we **could** check the response text even in the event of 200 but that seems wrong.. + if "NoSuchKey" in resp.text: + raise SteamshipError( + message=f"The file at signed URL {url} did not exist. HTTP {resp.status_code}. Content: {resp.text}" + ) + else: + raise SteamshipError( + message=f"There was an error downloading from the signed url: {url}. HTTP {resp.status_code}. Content: {resp.text}" + ) + return resp.content + + +def download_from_signed_url(url: str, to_file: Path = None) -> Path: + """ + Downloads the Signed URL to the filename `desired_filename` in a temporary directory on disk. + """ + content = url_to_bytes(url) + + if not to_file.parent.exists(): + to_file.parent.mkdir(parents=True, exist_ok=True) + + with open(to_file, "wb") as f: + logging.debug(f"Got contents of: {url}") + f.write(content) + logging.debug(f"Wrote contents of: {url} to {to_file}") + return Path(to_file) + + +def upload_to_signed_url(url: str, _bytes: Optional[bytes] = None, filepath: Optional[Path] = None): + """ + Uploads either the bytes or filepath contents to the provided Signed URL. + """ + + url = apply_localstack_url_fix(url) + if _bytes is not None: + logging.info(f"Uploading provided bytes to: {url}") + elif filepath is not None: + logging.info(f"Uploading file at {filepath} to: {url}") + with open(filepath, "rb") as f: + _bytes = f.read() + else: + raise SteamshipError( + message="Unable to upload data to signed URL -- neither a filepath nor bytes were provided.", + suggestion="Please provide either the `bytes` or the `filepath` argument", + ) + + http_response = requests.put( + url, data=_bytes, headers={"Content-Type": "application/octet-stream"} + ) + + # S3 returns 204 upon success; we include 200 here for safety. + if http_response.status_code not in [200, 204]: + logging.error(f"File upload error. file={filepath}. url= {url}") + logging.error(f"Status Code: {http_response.status_code}") + logging.error(f"Response Text: {http_response.text}") + raise SteamshipError( + message=f"Unable to upload data to signed URL. Status code: {http_response.status_code}. Status text: {http_response.text}" + ) diff --git a/steamship/utils/url.py b/steamship/utils/url.py new file mode 100644 index 0000000000000000000000000000000000000000..037fbec09527d93fe019b5bbcfb3ca1f3819dfd9 --- /dev/null +++ b/steamship/utils/url.py @@ -0,0 +1,27 @@ +import logging +from enum import Enum +from os import environ +from typing import Optional + + +class Verb(str, Enum): + GET = "GET" + POST = "POST" + + +def is_local(base: str) -> bool: + """Check if we are running the client locally.""" + return any( + local_base in base + for local_base in ("localhost", "127.0.0.1", "0:0:0:0", "host.docker.internal", "/test:") + ) + + +def apply_localstack_url_fix(url: Optional[str]) -> Optional[str]: + logging.debug(f"URL {url}") + localstack_hostname = environ.get("LOCALSTACK_HOSTNAME") + if url and localstack_hostname is not None and localstack_hostname != "localhost": + for host in ["127.0.0.1", "host.docker.internal", "localstack"]: + url = url.replace(host, localstack_hostname) + logging.info(f"Replacing domain {host} in {url} with {localstack_hostname}") + return url diff --git a/steamship/utils/utils.py b/steamship/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8f6cfdf1c9ad7089ae7fd7b19c195f353647cb2d --- /dev/null +++ b/steamship/utils/utils.py @@ -0,0 +1,12 @@ +from typing import Any, Dict, Optional + + +def safe_get(d: Dict, key: str, default: Any = None) -> Optional[Any]: + """Safely a value from dictionairy using a specific key""" + return d.get(key, default) or default + + +def format_uri(uri: Optional[str]) -> Optional[str]: + if uri is not None and not uri.endswith("/"): + uri += "/" + return uri diff --git a/steamship/utils/zip_archives.py b/steamship/utils/zip_archives.py new file mode 100644 index 0000000000000000000000000000000000000000..28fcc97e760d08e399501fa28bdca7d88c2ce87d --- /dev/null +++ b/steamship/utils/zip_archives.py @@ -0,0 +1,43 @@ +import logging +import shutil +from pathlib import Path +from typing import Optional + + +def zip_folder(folder: Path, into_file: Optional[Path]) -> Path: + """Zips a folder on disk to a co-located zip-file of the same name. + + The resulting zip file does not contain the enclosing folder name provided. + It contains only the children of that folder as its root elements. + """ + logging.info(f"Zipping: {folder}") + shutil.make_archive(str(folder).rstrip("/"), "zip", folder) + zip_path_str = str(folder).rstrip("/") + ".zip" + logging.info(f"Zipped: {zip_path_str}") + + if into_file is None: + return Path(zip_path_str) + + # Move the archive to the desired destination + + # Ensure the path to the desired extraction folder exists + if not into_file.parent.exists(): + into_file.parent.mkdir(parents=True, exist_ok=True) + + shutil.move(zip_path_str, into_file) + return into_file + + +def unzip_folder(zip_file: Path, into_folder: Optional[Path]) -> Path: + """Unzips a folder on disk, returning the path to the new folder resulting.""" + logging.info(f"Unzipping: {zip_file}") + if into_folder is None: + into_folder = zip_file.with_suffix("") # Strips the '.zip' suffix + + # Ensure the path to the desired extraction folder exists + if not into_folder.parent.exists(): + into_folder.parent.mkdir(parents=True, exist_ok=True) + + shutil.unpack_archive(zip_file, into_folder, "zip") + logging.info(f"Unzipped: {into_folder}") + return Path(into_folder)