litellm / main.py
ka1kuk's picture
Update main.py
cf0c6cb verified
raw
history blame
2.01 kB
from proxy_server import app, save_worker_config
import uvicorn
import random
import subprocess, json
import os
host = "0.0.0.0"
port = 8000
api_base = None
api_version = "2023-07-01-preview"
model = None
alias = None
add_key = None
headers = None
save = False
debug = False
detailed_debug = False
temperature = 0.0
max_tokens = 1000
request_timeout = 10
drop_params = True
add_function_to_prompt = True
config = None
max_budget = 100
telemetry = False
test = False
local = False
num_workers = 1
test_async = False
num_requests = 1
use_queue = False
health = False
version = False
def run_ollama_serve():
try:
command = ["ollama", "serve"]
with open(os.devnull, "w") as devnull:
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
except Exception as e:
print(
f"""
LiteLLM Warning: proxy started with `ollama` model\n`ollama serve` failed with Exception{e}. \nEnsure you run `ollama serve`
"""
)
def is_port_in_use(port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(("localhost", port)) == 0
if model and "ollama" in model and api_base is None:
run_ollama_serve()
else:
if headers:
headers = json.loads(headers)
save_worker_config(
model=model,
alias=alias,
api_base=api_base,
api_version=api_version,
debug=debug,
detailed_debug=detailed_debug,
temperature=temperature,
max_tokens=max_tokens,
request_timeout=request_timeout,
max_budget=max_budget,
telemetry=telemetry,
drop_params=drop_params,
add_function_to_prompt=add_function_to_prompt,
headers=headers,
save=save,
config=config,
use_queue=use_queue,
)
if port == 8000 and is_port_in_use(port):
port = random.randint(1024, 49152)
if __name__ == "__main__":
uvicorn.run(app, host=host, port=port)