litellmlope

Sleeping

App Files Files Community

litellmlope / main.py

ka1kuk

Update main.py

6cb06ab verified 10 months ago

raw

history blame

2.03 kB

	from litellm.proxy.proxy_server import app, save_worker_config
	import uvicorn
	import random
	import subprocess, json
	import os

	host = "0.0.0.0"
	port = 8000
	api_base = None
	api_version = "2023-07-01-preview"
	model = None
	alias = None
	add_key = None
	headers = None
	save = False
	debug = False
	detailed_debug = False
	temperature = 0.0
	max_tokens = 1000
	request_timeout = 10
	drop_params = True
	add_function_to_prompt = True
	config = None
	max_budget = 100
	telemetry = False
	test = False
	local = False
	num_workers = 1
	test_async = False
	num_requests = 1
	use_queue = False
	health = False
	version = False


	def run_ollama_serve():
	try:
	command = ["ollama", "serve"]

	with open(os.devnull, "w") as devnull:
	process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
	except Exception as e:
	print(
	f"""
	LiteLLM Warning: proxy started with `ollama` model\n`ollama serve` failed with Exception{e}. \nEnsure you run `ollama serve`
	"""
	)

	def is_port_in_use(port):
	import socket

	with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
	return s.connect_ex(("localhost", port)) == 0

	if model and "ollama" in model and api_base is None:
	run_ollama_serve()

	else:
	if headers:
	headers = json.loads(headers)
	save_worker_config(
	model=model,
	alias=alias,
	api_base=api_base,
	api_version=api_version,
	debug=debug,
	detailed_debug=detailed_debug,
	temperature=temperature,
	max_tokens=max_tokens,
	request_timeout=request_timeout,
	max_budget=max_budget,
	telemetry=telemetry,
	drop_params=drop_params,
	add_function_to_prompt=add_function_to_prompt,
	headers=headers,
	save=save,
	config=config,
	use_queue=use_queue,
	)

	if port == 8000 and is_port_in_use(port):
	port = random.randint(1024, 49152)


	if __name__ == "__main__":
	uvicorn.run(app, host=host, port=port)