zephyr-7B-beta-GGUF

Running

zephyr-7B-beta-GGUF / main.py

feat: added chat_format

55fe9af 12 months ago

776 Bytes

	from llama_cpp.server.app import create_app, Settings
	from fastapi.responses import HTMLResponse
	from fastapi.middleware.gzip import GZipMiddleware
	import os

	app = create_app(
	Settings(
	n_threads=2, # set to number of cpu cores
	model="model/gguf-model.bin",
	embedding=True,
	chat_format="zephyr"
	)
	)

	app.add_middleware(GZipMiddleware, minimum_size=1000)

	# Read the content of index.html once and store it in memory
	with open("index.html", "r") as f:
	content = f.read()


	@app.get("/", response_class=HTMLResponse)
	async def read_items():
	return content

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app,
	host=os.environ["HOST"],
	port=int(os.environ["PORT"])
	)