litellmlope

Sleeping

App Files Files Community

litellmlope / litellm /__init__.py

ka1kuk

Upload 235 files

7db0ae4 verified 10 months ago

raw

history blame

17.5 kB

	### INIT VARIABLES ###
	import threading, requests
	from typing import Callable, List, Optional, Dict, Union, Any
	from litellm.caching import Cache
	from litellm._logging import set_verbose
	from litellm.proxy._types import KeyManagementSystem
	import httpx

	input_callback: List[Union[str, Callable]] = []
	success_callback: List[Union[str, Callable]] = []
	failure_callback: List[Union[str, Callable]] = []
	callbacks: List[Callable] = []
	_async_input_callback: List[
	Callable
	] = [] # internal variable - async custom callbacks are routed here.
	_async_success_callback: List[
	Union[str, Callable]
	] = [] # internal variable - async custom callbacks are routed here.
	_async_failure_callback: List[
	Callable
	] = [] # internal variable - async custom callbacks are routed here.
	pre_call_rules: List[Callable] = []
	post_call_rules: List[Callable] = []
	email: Optional[
	str
	] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	token: Optional[
	str
	] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	telemetry = True
	max_tokens = 256 # OpenAI Defaults
	drop_params = False
	retry = True
	api_key: Optional[str] = None
	openai_key: Optional[str] = None
	azure_key: Optional[str] = None
	anthropic_key: Optional[str] = None
	replicate_key: Optional[str] = None
	cohere_key: Optional[str] = None
	maritalk_key: Optional[str] = None
	ai21_key: Optional[str] = None
	openrouter_key: Optional[str] = None
	huggingface_key: Optional[str] = None
	vertex_project: Optional[str] = None
	vertex_location: Optional[str] = None
	togetherai_api_key: Optional[str] = None
	cloudflare_api_key: Optional[str] = None
	baseten_key: Optional[str] = None
	aleph_alpha_key: Optional[str] = None
	nlp_cloud_key: Optional[str] = None
	use_client: bool = False
	logging: bool = True
	caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	cache: Optional[
	Cache
	] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
	model_alias_map: Dict[str, str] = {}
	model_group_alias_map: Dict[str, str] = {}
	max_budget: float = 0.0 # set the max budget across all providers
	_openai_completion_params = [
	"functions",
	"function_call",
	"temperature",
	"temperature",
	"top_p",
	"n",
	"stream",
	"stop",
	"max_tokens",
	"presence_penalty",
	"frequency_penalty",
	"logit_bias",
	"user",
	"request_timeout",
	"api_base",
	"api_version",
	"api_key",
	"deployment_id",
	"organization",
	"base_url",
	"default_headers",
	"timeout",
	"response_format",
	"seed",
	"tools",
	"tool_choice",
	"max_retries",
	]
	_litellm_completion_params = [
	"metadata",
	"acompletion",
	"caching",
	"mock_response",
	"api_key",
	"api_version",
	"api_base",
	"force_timeout",
	"logger_fn",
	"verbose",
	"custom_llm_provider",
	"litellm_logging_obj",
	"litellm_call_id",
	"use_client",
	"id",
	"fallbacks",
	"azure",
	"headers",
	"model_list",
	"num_retries",
	"context_window_fallback_dict",
	"roles",
	"final_prompt_value",
	"bos_token",
	"eos_token",
	"request_timeout",
	"complete_response",
	"self",
	"client",
	"rpm",
	"tpm",
	"input_cost_per_token",
	"output_cost_per_token",
	"hf_model_name",
	"model_info",
	"proxy_server_request",
	"preset_cache_key",
	]
	_current_cost = 0 # private variable, used if max budget is set
	error_logs: Dict = {}
	add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
	client_session: Optional[httpx.Client] = None
	aclient_session: Optional[httpx.AsyncClient] = None
	model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
	model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
	suppress_debug_info = False
	dynamodb_table_name: Optional[str] = None
	s3_callback_params: Optional[Dict] = None
	#### RELIABILITY ####
	request_timeout: Optional[float] = 6000
	num_retries: Optional[int] = None # per model endpoint
	fallbacks: Optional[List] = None
	context_window_fallbacks: Optional[List] = None
	allowed_fails: int = 0
	num_retries_per_request: Optional[
	int
	] = None # for the request overall (incl. fallbacks + model retries)
	####### SECRET MANAGERS #####################
	secret_manager_client: Optional[
	Any
	] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
	_google_kms_resource_name: Optional[str] = None
	_key_management_system: Optional[KeyManagementSystem] = None
	#############################################


	def get_model_cost_map(url: str):
	try:
	with requests.get(
	url, timeout=5
	) as response: # set a 5 second timeout for the get request
	response.raise_for_status() # Raise an exception if the request is unsuccessful
	content = response.json()
	return content
	except Exception as e:
	import importlib.resources
	import json

	with importlib.resources.open_text(
	"litellm", "model_prices_and_context_window_backup.json"
	) as f:
	content = json.load(f)
	return content


	model_cost = get_model_cost_map(url=model_cost_map_url)
	custom_prompt_dict: Dict[str, dict] = {}


	####### THREAD-SPECIFIC DATA ###################
	class MyLocal(threading.local):
	def __init__(self):
	self.user = "Hello World"


	_thread_context = MyLocal()


	def identify(event_details):
	# Store user in thread local data
	if "user" in event_details:
	_thread_context.user = event_details["user"]


	####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
	api_base = None
	headers = None
	api_version = None
	organization = None
	config_path = None
	####### COMPLETION MODELS ###################
	open_ai_chat_completion_models: List = []
	open_ai_text_completion_models: List = []
	cohere_models: List = []
	anthropic_models: List = []
	openrouter_models: List = []
	vertex_language_models: List = []
	vertex_vision_models: List = []
	vertex_chat_models: List = []
	vertex_code_chat_models: List = []
	vertex_text_models: List = []
	vertex_code_text_models: List = []
	ai21_models: List = []
	nlp_cloud_models: List = []
	aleph_alpha_models: List = []
	bedrock_models: List = []
	deepinfra_models: List = []
	perplexity_models: List = []
	for key, value in model_cost.items():
	if value.get("litellm_provider") == "openai":
	open_ai_chat_completion_models.append(key)
	elif value.get("litellm_provider") == "text-completion-openai":
	open_ai_text_completion_models.append(key)
	elif value.get("litellm_provider") == "cohere":
	cohere_models.append(key)
	elif value.get("litellm_provider") == "anthropic":
	anthropic_models.append(key)
	elif value.get("litellm_provider") == "openrouter":
	openrouter_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-text-models":
	vertex_text_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-code-text-models":
	vertex_code_text_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-language-models":
	vertex_language_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-vision-models":
	vertex_vision_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-chat-models":
	vertex_chat_models.append(key)
	elif value.get("litellm_provider") == "vertex_ai-code-chat-models":
	vertex_code_chat_models.append(key)
	elif value.get("litellm_provider") == "ai21":
	ai21_models.append(key)
	elif value.get("litellm_provider") == "nlp_cloud":
	nlp_cloud_models.append(key)
	elif value.get("litellm_provider") == "aleph_alpha":
	aleph_alpha_models.append(key)
	elif value.get("litellm_provider") == "bedrock":
	bedrock_models.append(key)
	elif value.get("litellm_provider") == "deepinfra":
	deepinfra_models.append(key)
	elif value.get("litellm_provider") == "perplexity":
	perplexity_models.append(key)

	# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
	openai_compatible_endpoints: List = [
	"api.perplexity.ai",
	"api.endpoints.anyscale.com/v1",
	"api.deepinfra.com/v1/openai",
	"api.mistral.ai/v1",
	]

	# this is maintained for Exception Mapping
	openai_compatible_providers: List = [
	"anyscale",
	"mistral",
	"deepinfra",
	"perplexity",
	"xinference",
	]


	# well supported replicate llms
	replicate_models: List = [
	# llama replicate supported LLMs
	"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
	"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
	"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
	# Vicuna
	"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
	"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
	# Flan T-5
	"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
	# Others
	"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
	"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
	]

	huggingface_models: List = [
	"meta-llama/Llama-2-7b-hf",
	"meta-llama/Llama-2-7b-chat-hf",
	"meta-llama/Llama-2-13b-hf",
	"meta-llama/Llama-2-13b-chat-hf",
	"meta-llama/Llama-2-70b-hf",
	"meta-llama/Llama-2-70b-chat-hf",
	"meta-llama/Llama-2-7b",
	"meta-llama/Llama-2-7b-chat",
	"meta-llama/Llama-2-13b",
	"meta-llama/Llama-2-13b-chat",
	"meta-llama/Llama-2-70b",
	"meta-llama/Llama-2-70b-chat",
	] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers

	together_ai_models: List = [
	# llama llms - chat
	"togethercomputer/llama-2-70b-chat",
	# llama llms - language / instruct
	"togethercomputer/llama-2-70b",
	"togethercomputer/LLaMA-2-7B-32K",
	"togethercomputer/Llama-2-7B-32K-Instruct",
	"togethercomputer/llama-2-7b",
	# falcon llms
	"togethercomputer/falcon-40b-instruct",
	"togethercomputer/falcon-7b-instruct",
	# alpaca
	"togethercomputer/alpaca-7b",
	# chat llms
	"HuggingFaceH4/starchat-alpha",
	# code llms
	"togethercomputer/CodeLlama-34b",
	"togethercomputer/CodeLlama-34b-Instruct",
	"togethercomputer/CodeLlama-34b-Python",
	"defog/sqlcoder",
	"NumbersStation/nsql-llama-2-7B",
	"WizardLM/WizardCoder-15B-V1.0",
	"WizardLM/WizardCoder-Python-34B-V1.0",
	# language llms
	"NousResearch/Nous-Hermes-Llama2-13b",
	"Austism/chronos-hermes-13b",
	"upstage/SOLAR-0-70b-16bit",
	"WizardLM/WizardLM-70B-V1.0",
	] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)


	baseten_models: List = [
	"qvv0xeq",
	"q841o8w",
	"31dxrj3",
	] # FALCON 7B # WizardLM # Mosaic ML


	# used for Cost Tracking & Token counting
	# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/
	# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting
	azure_llms = {
	"gpt-35-turbo": "azure/gpt-35-turbo",
	"gpt-35-turbo-16k": "azure/gpt-35-turbo-16k",
	"gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct",
	}

	azure_embedding_models = {
	"ada": "azure/ada",
	}

	petals_models = [
	"petals-team/StableBeluga2",
	]

	ollama_models = ["llama2"]

	maritalk_models = ["maritalk"]

	model_list = (
	open_ai_chat_completion_models
	+ open_ai_text_completion_models
	+ cohere_models
	+ anthropic_models
	+ replicate_models
	+ openrouter_models
	+ huggingface_models
	+ vertex_chat_models
	+ vertex_text_models
	+ ai21_models
	+ together_ai_models
	+ baseten_models
	+ aleph_alpha_models
	+ nlp_cloud_models
	+ ollama_models
	+ bedrock_models
	+ deepinfra_models
	+ perplexity_models
	+ maritalk_models
	)

	provider_list: List = [
	"openai",
	"custom_openai",
	"text-completion-openai",
	"cohere",
	"anthropic",
	"replicate",
	"huggingface",
	"together_ai",
	"openrouter",
	"vertex_ai",
	"palm",
	"gemini",
	"ai21",
	"baseten",
	"azure",
	"sagemaker",
	"bedrock",
	"vllm",
	"nlp_cloud",
	"petals",
	"oobabooga",
	"ollama",
	"ollama_chat",
	"deepinfra",
	"perplexity",
	"anyscale",
	"mistral",
	"maritalk",
	"voyage",
	"cloudflare",
	"xinference",
	"custom", # custom apis
	]

	models_by_provider: dict = {
	"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
	"cohere": cohere_models,
	"anthropic": anthropic_models,
	"replicate": replicate_models,
	"huggingface": huggingface_models,
	"together_ai": together_ai_models,
	"baseten": baseten_models,
	"openrouter": openrouter_models,
	"vertex_ai": vertex_chat_models + vertex_text_models,
	"ai21": ai21_models,
	"bedrock": bedrock_models,
	"petals": petals_models,
	"ollama": ollama_models,
	"deepinfra": deepinfra_models,
	"perplexity": perplexity_models,
	"maritalk": maritalk_models,
	}

	# mapping for those models which have larger equivalents
	longer_context_model_fallback_dict: dict = {
	# openai chat completion models
	"gpt-3.5-turbo": "gpt-3.5-turbo-16k",
	"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
	"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
	"gpt-4": "gpt-4-32k",
	"gpt-4-0314": "gpt-4-32k-0314",
	"gpt-4-0613": "gpt-4-32k-0613",
	# anthropic
	"claude-instant-1": "claude-2",
	"claude-instant-1.2": "claude-2",
	# vertexai
	"chat-bison": "chat-bison-32k",
	"chat-bison@001": "chat-bison-32k",
	"codechat-bison": "codechat-bison-32k",
	"codechat-bison@001": "codechat-bison-32k",
	# openrouter
	"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
	"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
	}

	####### EMBEDDING MODELS ###################
	open_ai_embedding_models: List = ["text-embedding-ada-002"]
	cohere_embedding_models: List = [
	"embed-english-v3.0",
	"embed-english-light-v3.0",
	"embed-multilingual-v3.0",
	"embed-english-v2.0",
	"embed-english-light-v2.0",
	"embed-multilingual-v2.0",
	]
	bedrock_embedding_models: List = [
	"amazon.titan-embed-text-v1",
	"cohere.embed-english-v3",
	"cohere.embed-multilingual-v3",
	]

	all_embedding_models = (
	open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
	)

	####### IMAGE GENERATION MODELS ###################
	openai_image_generation_models = ["dall-e-2", "dall-e-3"]


	from .timeout import timeout
	from .utils import (
	client,
	exception_type,
	get_optional_params,
	modify_integration,
	token_counter,
	cost_per_token,
	completion_cost,
	get_litellm_params,
	Logging,
	acreate,
	get_model_list,
	get_max_tokens,
	get_model_info,
	register_prompt_template,
	validate_environment,
	check_valid_key,
	get_llm_provider,
	register_model,
	encode,
	decode,
	_calculate_retry_after,
	_should_retry,
	get_secret,
	)
	from .llms.huggingface_restapi import HuggingfaceConfig
	from .llms.anthropic import AnthropicConfig
	from .llms.replicate import ReplicateConfig
	from .llms.cohere import CohereConfig
	from .llms.ai21 import AI21Config
	from .llms.together_ai import TogetherAIConfig
	from .llms.cloudflare import CloudflareConfig
	from .llms.palm import PalmConfig
	from .llms.gemini import GeminiConfig
	from .llms.nlp_cloud import NLPCloudConfig
	from .llms.aleph_alpha import AlephAlphaConfig
	from .llms.petals import PetalsConfig
	from .llms.vertex_ai import VertexAIConfig
	from .llms.sagemaker import SagemakerConfig
	from .llms.ollama import OllamaConfig
	from .llms.maritalk import MaritTalkConfig
	from .llms.bedrock import (
	AmazonTitanConfig,
	AmazonAI21Config,
	AmazonAnthropicConfig,
	AmazonCohereConfig,
	AmazonLlamaConfig,
	)
	from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
	from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
	from .main import * # type: ignore
	from .integrations import *
	from .exceptions import (
	AuthenticationError,
	InvalidRequestError,
	BadRequestError,
	NotFoundError,
	RateLimitError,
	ServiceUnavailableError,
	OpenAIError,
	ContextWindowExceededError,
	ContentPolicyViolationError,
	BudgetExceededError,
	APIError,
	Timeout,
	APIConnectionError,
	APIResponseValidationError,
	UnprocessableEntityError,
	)
	from .budget_manager import BudgetManager
	from .proxy.proxy_cli import run_server
	from .router import Router