mpt-7b-8k-instruct / exceptions.py

LLM-foundry update March 26, 2024 23:50:31

2cc518e verified 8 months ago

7.17 kB

	"""Custom exceptions for the LLMFoundry."""
	from collections.abc import Mapping
	from typing import Any, Dict, List

	class MissingHuggingFaceURLSplitError(ValueError):
	"""Error thrown when there's no split used in HF dataset config."""

	def __init__(self) -> None:
	message = 'When using a HuggingFace dataset from a URL, you must set the ' + '`split` key in the dataset config.'
	super().__init__(message)

	class NotEnoughDatasetSamplesError(ValueError):
	"""Error thrown when there is not enough data to train a model."""

	def __init__(self, dataset_name: str, split: str, dataloader_batch_size: int, world_size: int, full_dataset_size: int, minimum_dataset_size: int) -> None:
	self.dataset_name = dataset_name
	self.split = split
	self.dataloader_batch_size = dataloader_batch_size
	self.world_size = world_size
	self.full_dataset_size = full_dataset_size
	self.minimum_dataset_size = minimum_dataset_size
	message = f'Your dataset (name={dataset_name}, split={split}) ' + f'has {full_dataset_size} samples, but your minimum batch size ' + f'is {minimum_dataset_size} because you are running on {world_size} gpus and ' + f'your per device batch size is {dataloader_batch_size}. Please increase the number ' + f'of samples in your dataset to at least {minimum_dataset_size}.'
	super().__init__(message)

	class UnknownExampleTypeError(KeyError):
	"""Error thrown when an unknown example type is used in a task."""

	def __init__(self, example: Mapping) -> None:
	message = f'Unknown example type example={example!r}'
	super().__init__(message)

	class TooManyKeysInExampleError(ValueError):
	"""Error thrown when a data sample has too many keys."""

	def __init__(self, desired_keys: set[str], keys: set[str]) -> None:
	message = f'Data sample has {len(keys)} keys in `allowed_keys`: {desired_keys} Please specify exactly one. Provided keys: {keys}'
	super().__init__(message)

	class NotEnoughChatDataError(ValueError):
	"""Error thrown when there is not enough chat data to train a model."""

	def __init__(self) -> None:
	message = 'Chat example must have at least two messages'
	super().__init__(message)

	class ConsecutiveRepeatedChatRolesError(ValueError):
	"""Error thrown when there are consecutive repeated chat roles."""

	def __init__(self, repeated_role: str) -> None:
	self.repeated_role = repeated_role
	message = f'Conversation roles must alternate but found {repeated_role} repeated consecutively.'
	super().__init__(message)

	class InvalidLastChatMessageRoleError(ValueError):
	"""Error thrown when the last message role in a chat example is invalid."""

	def __init__(self, last_role: str, expected_roles: set[str]) -> None:
	message = f'Invalid last message role: {last_role}. Expected one of: {expected_roles}'
	super().__init__(message)

	class IncorrectMessageKeyQuantityError(ValueError):
	"""Error thrown when a message has an incorrect number of keys."""

	def __init__(self, keys: List[str]) -> None:
	self.keys = keys
	message = f'Expected 2 keys in message, but found {len(keys)}'
	super().__init__(message)

	class InvalidRoleError(ValueError):
	"""Error thrown when a role is invalid."""

	def __init__(self, role: str, valid_roles: set[str]) -> None:
	self.role = role
	self.valid_roles = valid_roles
	message = f'Expected role to be one of {valid_roles} but found: {role}'
	super().__init__(message)

	class InvalidContentTypeError(TypeError):
	"""Error thrown when the content type is invalid."""

	def __init__(self, content_type: type) -> None:
	self.content_type = content_type
	message = f'Expected content to be a string, but found {content_type}'
	super().__init__(message)

	class InvalidPromptTypeError(TypeError):
	"""Error thrown when the prompt type is invalid."""

	def __init__(self, prompt_type: type) -> None:
	self.prompt_type = prompt_type
	message = f'Expected prompt to be a string, but found {prompt_type}'
	super().__init__(message)

	class InvalidResponseTypeError(TypeError):
	"""Error thrown when the response type is invalid."""

	def __init__(self, response_type: type) -> None:
	self.response_type = response_type
	message = f'Expected response to be a string, but found {response_type}'
	super().__init__(message)

	class InvalidPromptResponseKeysError(ValueError):
	"""Error thrown when missing expected prompt and response keys."""

	def __init__(self, mapping: Dict[str, str], example: Dict[str, Any]):
	self.example = example
	message = f'Expected mapping={mapping!r} to have keys "prompt" and "response".'
	super().__init__(message)

	class InvalidFileExtensionError(FileNotFoundError):
	"""Error thrown when a file extension is not a safe extension."""

	def __init__(self, dataset_name: str, valid_extensions: List[str]) -> None:
	self.dataset_name = dataset_name
	self.valid_extensions = valid_extensions
	message = f'safe_load is set to True. No data files with safe extensions {valid_extensions} ' + f'found for dataset at local path {dataset_name}.'
	super().__init__(message)

	class UnableToProcessPromptResponseError(ValueError):
	"""Error thrown when a prompt and response cannot be processed."""

	def __init__(self, input: Dict) -> None:
	message = f'Unable to extract prompt/response from {input}'
	super().__init__(message)

	class ClusterDoesNotExistError(ValueError):
	"""Error thrown when the cluster does not exist."""

	def __init__(self, cluster_id: str) -> None:
	self.cluster_id = cluster_id
	message = f'Cluster with id {cluster_id} does not exist. Check cluster id and try again!'
	super().__init__(message)

	class FailedToCreateSQLConnectionError(RuntimeError):
	"""Error thrown when client can't sql connect to Databricks."""

	def __init__(self) -> None:
	message = 'Failed to create sql connection to db workspace. To use sql connect, you need to provide http_path and cluster_id!'
	super().__init__(message)

	class FailedToConnectToDatabricksError(RuntimeError):
	"""Error thrown when the client fails to connect to Databricks."""

	def __init__(self) -> None:
	message = 'Failed to create databricks connection. Check hostname and access token!'
	super().__init__(message)

	class InputFolderMissingDataError(ValueError):
	"""Error thrown when the input folder is missing data."""

	def __init__(self, input_folder: str) -> None:
	self.input_folder = input_folder
	message = f'No text files were found at {input_folder}.'
	super().__init__(message)

	class OutputFolderNotEmptyError(FileExistsError):
	"""Error thrown when the output folder is not empty."""

	def __init__(self, output_folder: str) -> None:
	self.output_folder = output_folder
	message = f'{output_folder} is not empty. Please remove or empty it and retry.'
	super().__init__(message)