from litellm.proxy.proxy_server import app, save_worker_config import uvicorn import random import subprocess, json import os host = "0.0.0.0" port = 8000 api_base = None api_version = "2023-07-01-preview" model = None alias = None add_key = None headers = None save = False debug = False detailed_debug = False temperature = 0.0 max_tokens = 1000 request_timeout = 10 drop_params = True add_function_to_prompt = True config = None max_budget = 100 telemetry = False test = False local = False num_workers = 1 test_async = False num_requests = 1 use_queue = False health = False version = False def run_ollama_serve(): try: command = ["ollama", "serve"] with open(os.devnull, "w") as devnull: process = subprocess.Popen(command, stdout=devnull, stderr=devnull) except Exception as e: print( f""" LiteLLM Warning: proxy started with `ollama` model\n`ollama serve` failed with Exception{e}. \nEnsure you run `ollama serve` """ ) def is_port_in_use(port): import socket with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: return s.connect_ex(("localhost", port)) == 0 if model and "ollama" in model and api_base is None: run_ollama_serve() else: if headers: headers = json.loads(headers) save_worker_config( model=model, alias=alias, api_base=api_base, api_version=api_version, debug=debug, detailed_debug=detailed_debug, temperature=temperature, max_tokens=max_tokens, request_timeout=request_timeout, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save, config=config, use_queue=use_queue, ) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) if __name__ == "__main__": uvicorn.run(app, host=host, port=port)