Spaces:
Sleeping
Sleeping
### INIT VARIABLES ### | |
import threading, requests | |
from typing import Callable, List, Optional, Dict, Union, Any | |
from litellm.caching import Cache | |
from litellm._logging import set_verbose | |
from litellm.proxy._types import KeyManagementSystem | |
import httpx | |
input_callback: List[Union[str, Callable]] = [] | |
success_callback: List[Union[str, Callable]] = [] | |
failure_callback: List[Union[str, Callable]] = [] | |
callbacks: List[Callable] = [] | |
_async_input_callback: List[ | |
Callable | |
] = [] # internal variable - async custom callbacks are routed here. | |
_async_success_callback: List[ | |
Union[str, Callable] | |
] = [] # internal variable - async custom callbacks are routed here. | |
_async_failure_callback: List[ | |
Callable | |
] = [] # internal variable - async custom callbacks are routed here. | |
pre_call_rules: List[Callable] = [] | |
post_call_rules: List[Callable] = [] | |
email: Optional[ | |
str | |
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 | |
token: Optional[ | |
str | |
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 | |
telemetry = True | |
max_tokens = 256 # OpenAI Defaults | |
drop_params = False | |
retry = True | |
api_key: Optional[str] = None | |
openai_key: Optional[str] = None | |
azure_key: Optional[str] = None | |
anthropic_key: Optional[str] = None | |
replicate_key: Optional[str] = None | |
cohere_key: Optional[str] = None | |
maritalk_key: Optional[str] = None | |
ai21_key: Optional[str] = None | |
openrouter_key: Optional[str] = None | |
huggingface_key: Optional[str] = None | |
vertex_project: Optional[str] = None | |
vertex_location: Optional[str] = None | |
togetherai_api_key: Optional[str] = None | |
cloudflare_api_key: Optional[str] = None | |
baseten_key: Optional[str] = None | |
aleph_alpha_key: Optional[str] = None | |
nlp_cloud_key: Optional[str] = None | |
use_client: bool = False | |
logging: bool = True | |
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 | |
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 | |
cache: Optional[ | |
Cache | |
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching | |
model_alias_map: Dict[str, str] = {} | |
model_group_alias_map: Dict[str, str] = {} | |
max_budget: float = 0.0 # set the max budget across all providers | |
_openai_completion_params = [ | |
"functions", | |
"function_call", | |
"temperature", | |
"temperature", | |
"top_p", | |
"n", | |
"stream", | |
"stop", | |
"max_tokens", | |
"presence_penalty", | |
"frequency_penalty", | |
"logit_bias", | |
"user", | |
"request_timeout", | |
"api_base", | |
"api_version", | |
"api_key", | |
"deployment_id", | |
"organization", | |
"base_url", | |
"default_headers", | |
"timeout", | |
"response_format", | |
"seed", | |
"tools", | |
"tool_choice", | |
"max_retries", | |
] | |
_litellm_completion_params = [ | |
"metadata", | |
"acompletion", | |
"caching", | |
"mock_response", | |
"api_key", | |
"api_version", | |
"api_base", | |
"force_timeout", | |
"logger_fn", | |
"verbose", | |
"custom_llm_provider", | |
"litellm_logging_obj", | |
"litellm_call_id", | |
"use_client", | |
"id", | |
"fallbacks", | |
"azure", | |
"headers", | |
"model_list", | |
"num_retries", | |
"context_window_fallback_dict", | |
"roles", | |
"final_prompt_value", | |
"bos_token", | |
"eos_token", | |
"request_timeout", | |
"complete_response", | |
"self", | |
"client", | |
"rpm", | |
"tpm", | |
"input_cost_per_token", | |
"output_cost_per_token", | |
"hf_model_name", | |
"model_info", | |
"proxy_server_request", | |
"preset_cache_key", | |
] | |
_current_cost = 0 # private variable, used if max budget is set | |
error_logs: Dict = {} | |
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt | |
client_session: Optional[httpx.Client] = None | |
aclient_session: Optional[httpx.AsyncClient] = None | |
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks' | |
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" | |
suppress_debug_info = False | |
dynamodb_table_name: Optional[str] = None | |
s3_callback_params: Optional[Dict] = None | |
#### RELIABILITY #### | |
request_timeout: Optional[float] = 6000 | |
num_retries: Optional[int] = None # per model endpoint | |
fallbacks: Optional[List] = None | |
context_window_fallbacks: Optional[List] = None | |
allowed_fails: int = 0 | |
num_retries_per_request: Optional[ | |
int | |
] = None # for the request overall (incl. fallbacks + model retries) | |
####### SECRET MANAGERS ##################### | |
secret_manager_client: Optional[ | |
Any | |
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc. | |
_google_kms_resource_name: Optional[str] = None | |
_key_management_system: Optional[KeyManagementSystem] = None | |
############################################# | |
def get_model_cost_map(url: str): | |
try: | |
with requests.get( | |
url, timeout=5 | |
) as response: # set a 5 second timeout for the get request | |
response.raise_for_status() # Raise an exception if the request is unsuccessful | |
content = response.json() | |
return content | |
except Exception as e: | |
import importlib.resources | |
import json | |
with importlib.resources.open_text( | |
"litellm", "model_prices_and_context_window_backup.json" | |
) as f: | |
content = json.load(f) | |
return content | |
model_cost = get_model_cost_map(url=model_cost_map_url) | |
custom_prompt_dict: Dict[str, dict] = {} | |
####### THREAD-SPECIFIC DATA ################### | |
class MyLocal(threading.local): | |
def __init__(self): | |
self.user = "Hello World" | |
_thread_context = MyLocal() | |
def identify(event_details): | |
# Store user in thread local data | |
if "user" in event_details: | |
_thread_context.user = event_details["user"] | |
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc. | |
api_base = None | |
headers = None | |
api_version = None | |
organization = None | |
config_path = None | |
####### COMPLETION MODELS ################### | |
open_ai_chat_completion_models: List = [] | |
open_ai_text_completion_models: List = [] | |
cohere_models: List = [] | |
anthropic_models: List = [] | |
openrouter_models: List = [] | |
vertex_language_models: List = [] | |
vertex_vision_models: List = [] | |
vertex_chat_models: List = [] | |
vertex_code_chat_models: List = [] | |
vertex_text_models: List = [] | |
vertex_code_text_models: List = [] | |
ai21_models: List = [] | |
nlp_cloud_models: List = [] | |
aleph_alpha_models: List = [] | |
bedrock_models: List = [] | |
deepinfra_models: List = [] | |
perplexity_models: List = [] | |
for key, value in model_cost.items(): | |
if value.get("litellm_provider") == "openai": | |
open_ai_chat_completion_models.append(key) | |
elif value.get("litellm_provider") == "text-completion-openai": | |
open_ai_text_completion_models.append(key) | |
elif value.get("litellm_provider") == "cohere": | |
cohere_models.append(key) | |
elif value.get("litellm_provider") == "anthropic": | |
anthropic_models.append(key) | |
elif value.get("litellm_provider") == "openrouter": | |
openrouter_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-text-models": | |
vertex_text_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-code-text-models": | |
vertex_code_text_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-language-models": | |
vertex_language_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-vision-models": | |
vertex_vision_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-chat-models": | |
vertex_chat_models.append(key) | |
elif value.get("litellm_provider") == "vertex_ai-code-chat-models": | |
vertex_code_chat_models.append(key) | |
elif value.get("litellm_provider") == "ai21": | |
ai21_models.append(key) | |
elif value.get("litellm_provider") == "nlp_cloud": | |
nlp_cloud_models.append(key) | |
elif value.get("litellm_provider") == "aleph_alpha": | |
aleph_alpha_models.append(key) | |
elif value.get("litellm_provider") == "bedrock": | |
bedrock_models.append(key) | |
elif value.get("litellm_provider") == "deepinfra": | |
deepinfra_models.append(key) | |
elif value.get("litellm_provider") == "perplexity": | |
perplexity_models.append(key) | |
# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary | |
openai_compatible_endpoints: List = [ | |
"api.perplexity.ai", | |
"api.endpoints.anyscale.com/v1", | |
"api.deepinfra.com/v1/openai", | |
"api.mistral.ai/v1", | |
] | |
# this is maintained for Exception Mapping | |
openai_compatible_providers: List = [ | |
"anyscale", | |
"mistral", | |
"deepinfra", | |
"perplexity", | |
"xinference", | |
] | |
# well supported replicate llms | |
replicate_models: List = [ | |
# llama replicate supported LLMs | |
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", | |
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", | |
"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", | |
# Vicuna | |
"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", | |
"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", | |
# Flan T-5 | |
"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f" | |
# Others | |
"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", | |
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", | |
] | |
huggingface_models: List = [ | |
"meta-llama/Llama-2-7b-hf", | |
"meta-llama/Llama-2-7b-chat-hf", | |
"meta-llama/Llama-2-13b-hf", | |
"meta-llama/Llama-2-13b-chat-hf", | |
"meta-llama/Llama-2-70b-hf", | |
"meta-llama/Llama-2-70b-chat-hf", | |
"meta-llama/Llama-2-7b", | |
"meta-llama/Llama-2-7b-chat", | |
"meta-llama/Llama-2-13b", | |
"meta-llama/Llama-2-13b-chat", | |
"meta-llama/Llama-2-70b", | |
"meta-llama/Llama-2-70b-chat", | |
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers | |
together_ai_models: List = [ | |
# llama llms - chat | |
"togethercomputer/llama-2-70b-chat", | |
# llama llms - language / instruct | |
"togethercomputer/llama-2-70b", | |
"togethercomputer/LLaMA-2-7B-32K", | |
"togethercomputer/Llama-2-7B-32K-Instruct", | |
"togethercomputer/llama-2-7b", | |
# falcon llms | |
"togethercomputer/falcon-40b-instruct", | |
"togethercomputer/falcon-7b-instruct", | |
# alpaca | |
"togethercomputer/alpaca-7b", | |
# chat llms | |
"HuggingFaceH4/starchat-alpha", | |
# code llms | |
"togethercomputer/CodeLlama-34b", | |
"togethercomputer/CodeLlama-34b-Instruct", | |
"togethercomputer/CodeLlama-34b-Python", | |
"defog/sqlcoder", | |
"NumbersStation/nsql-llama-2-7B", | |
"WizardLM/WizardCoder-15B-V1.0", | |
"WizardLM/WizardCoder-Python-34B-V1.0", | |
# language llms | |
"NousResearch/Nous-Hermes-Llama2-13b", | |
"Austism/chronos-hermes-13b", | |
"upstage/SOLAR-0-70b-16bit", | |
"WizardLM/WizardLM-70B-V1.0", | |
] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...) | |
baseten_models: List = [ | |
"qvv0xeq", | |
"q841o8w", | |
"31dxrj3", | |
] # FALCON 7B # WizardLM # Mosaic ML | |
# used for Cost Tracking & Token counting | |
# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ | |
# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting | |
azure_llms = { | |
"gpt-35-turbo": "azure/gpt-35-turbo", | |
"gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", | |
"gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", | |
} | |
azure_embedding_models = { | |
"ada": "azure/ada", | |
} | |
petals_models = [ | |
"petals-team/StableBeluga2", | |
] | |
ollama_models = ["llama2"] | |
maritalk_models = ["maritalk"] | |
model_list = ( | |
open_ai_chat_completion_models | |
+ open_ai_text_completion_models | |
+ cohere_models | |
+ anthropic_models | |
+ replicate_models | |
+ openrouter_models | |
+ huggingface_models | |
+ vertex_chat_models | |
+ vertex_text_models | |
+ ai21_models | |
+ together_ai_models | |
+ baseten_models | |
+ aleph_alpha_models | |
+ nlp_cloud_models | |
+ ollama_models | |
+ bedrock_models | |
+ deepinfra_models | |
+ perplexity_models | |
+ maritalk_models | |
) | |
provider_list: List = [ | |
"openai", | |
"custom_openai", | |
"text-completion-openai", | |
"cohere", | |
"anthropic", | |
"replicate", | |
"huggingface", | |
"together_ai", | |
"openrouter", | |
"vertex_ai", | |
"palm", | |
"gemini", | |
"ai21", | |
"baseten", | |
"azure", | |
"sagemaker", | |
"bedrock", | |
"vllm", | |
"nlp_cloud", | |
"petals", | |
"oobabooga", | |
"ollama", | |
"ollama_chat", | |
"deepinfra", | |
"perplexity", | |
"anyscale", | |
"mistral", | |
"maritalk", | |
"voyage", | |
"cloudflare", | |
"xinference", | |
"custom", # custom apis | |
] | |
models_by_provider: dict = { | |
"openai": open_ai_chat_completion_models + open_ai_text_completion_models, | |
"cohere": cohere_models, | |
"anthropic": anthropic_models, | |
"replicate": replicate_models, | |
"huggingface": huggingface_models, | |
"together_ai": together_ai_models, | |
"baseten": baseten_models, | |
"openrouter": openrouter_models, | |
"vertex_ai": vertex_chat_models + vertex_text_models, | |
"ai21": ai21_models, | |
"bedrock": bedrock_models, | |
"petals": petals_models, | |
"ollama": ollama_models, | |
"deepinfra": deepinfra_models, | |
"perplexity": perplexity_models, | |
"maritalk": maritalk_models, | |
} | |
# mapping for those models which have larger equivalents | |
longer_context_model_fallback_dict: dict = { | |
# openai chat completion models | |
"gpt-3.5-turbo": "gpt-3.5-turbo-16k", | |
"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", | |
"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", | |
"gpt-4": "gpt-4-32k", | |
"gpt-4-0314": "gpt-4-32k-0314", | |
"gpt-4-0613": "gpt-4-32k-0613", | |
# anthropic | |
"claude-instant-1": "claude-2", | |
"claude-instant-1.2": "claude-2", | |
# vertexai | |
"chat-bison": "chat-bison-32k", | |
"chat-bison@001": "chat-bison-32k", | |
"codechat-bison": "codechat-bison-32k", | |
"codechat-bison@001": "codechat-bison-32k", | |
# openrouter | |
"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", | |
"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", | |
} | |
####### EMBEDDING MODELS ################### | |
open_ai_embedding_models: List = ["text-embedding-ada-002"] | |
cohere_embedding_models: List = [ | |
"embed-english-v3.0", | |
"embed-english-light-v3.0", | |
"embed-multilingual-v3.0", | |
"embed-english-v2.0", | |
"embed-english-light-v2.0", | |
"embed-multilingual-v2.0", | |
] | |
bedrock_embedding_models: List = [ | |
"amazon.titan-embed-text-v1", | |
"cohere.embed-english-v3", | |
"cohere.embed-multilingual-v3", | |
] | |
all_embedding_models = ( | |
open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models | |
) | |
####### IMAGE GENERATION MODELS ################### | |
openai_image_generation_models = ["dall-e-2", "dall-e-3"] | |
from .timeout import timeout | |
from .utils import ( | |
client, | |
exception_type, | |
get_optional_params, | |
modify_integration, | |
token_counter, | |
cost_per_token, | |
completion_cost, | |
get_litellm_params, | |
Logging, | |
acreate, | |
get_model_list, | |
get_max_tokens, | |
get_model_info, | |
register_prompt_template, | |
validate_environment, | |
check_valid_key, | |
get_llm_provider, | |
register_model, | |
encode, | |
decode, | |
_calculate_retry_after, | |
_should_retry, | |
get_secret, | |
) | |
from .llms.huggingface_restapi import HuggingfaceConfig | |
from .llms.anthropic import AnthropicConfig | |
from .llms.replicate import ReplicateConfig | |
from .llms.cohere import CohereConfig | |
from .llms.ai21 import AI21Config | |
from .llms.together_ai import TogetherAIConfig | |
from .llms.cloudflare import CloudflareConfig | |
from .llms.palm import PalmConfig | |
from .llms.gemini import GeminiConfig | |
from .llms.nlp_cloud import NLPCloudConfig | |
from .llms.aleph_alpha import AlephAlphaConfig | |
from .llms.petals import PetalsConfig | |
from .llms.vertex_ai import VertexAIConfig | |
from .llms.sagemaker import SagemakerConfig | |
from .llms.ollama import OllamaConfig | |
from .llms.maritalk import MaritTalkConfig | |
from .llms.bedrock import ( | |
AmazonTitanConfig, | |
AmazonAI21Config, | |
AmazonAnthropicConfig, | |
AmazonCohereConfig, | |
AmazonLlamaConfig, | |
) | |
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig | |
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError | |
from .main import * # type: ignore | |
from .integrations import * | |
from .exceptions import ( | |
AuthenticationError, | |
InvalidRequestError, | |
BadRequestError, | |
NotFoundError, | |
RateLimitError, | |
ServiceUnavailableError, | |
OpenAIError, | |
ContextWindowExceededError, | |
ContentPolicyViolationError, | |
BudgetExceededError, | |
APIError, | |
Timeout, | |
APIConnectionError, | |
APIResponseValidationError, | |
UnprocessableEntityError, | |
) | |
from .budget_manager import BudgetManager | |
from .proxy.proxy_cli import run_server | |
from .router import Router | |