Spaces:

ggml-org
/

gguf-my-repo

Running on A10G

App Files Files Community

136

gguf-my-repo / app.py

reach-vb HF staff

Update app.py (#6)

75b770e verified 8 months ago

raw

history blame

2.89 kB

	import shutil
	import subprocess

	import gradio as gr

	from huggingface_hub import create_repo, HfApi
	from huggingface_hub import snapshot_download
	from huggingface_hub import whoami

	api = HfApi()

	def process_model(model_id, q_method, hf_token):

	MODEL_NAME = model_id.split('/')[-1]
	fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"

	username = whoami(hf_token)["name"]

	snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
	print("Model downloaded successully!")

	fp16_conversion = f"python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"
	subprocess.run(fp16_conversion, shell=True)
	print("Model converted to fp16 successully!")

	qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
	quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
	subprocess.run(quantise_ggml, shell=True)
	print("Quantised successfully!")

	# Create empty repo
	repo_url = create_repo(
	repo_id = f"{username}/{MODEL_NAME}-{q_method}-GGUF",
	repo_type="model",
	exist_ok=True,
	token=hf_token
	)
	print("Empty repo created successfully!")

	# Upload gguf files
	# api.upload_folder(
	# folder_path=MODEL_NAME,
	# repo_id=f"{username}/{MODEL_NAME}-{q_method}-GGUF",
	# allow_patterns=["*.gguf"],
	# token=hf_token
	# )
	api.upload_file(
	path_or_fileobj=qtype,
	repo_id=f"{username}/{MODEL_NAME}-{q_method}-GGUF",
	repo_type="model",
	)
	print("Uploaded successfully!")

	shutil.rmtree(MODEL_NAME)
	print("Folder cleaned up successfully!")

	return (
	f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
	"llama.png",
	)

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_model,
	inputs=[
	gr.Textbox(
	lines=1,
	label="Hub Model ID",
	info="Model repo ID"
	),
	gr.Dropdown(
	["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
	label="Quantization Method",
	info="GGML quantisation type"
	),
	gr.Textbox(
	lines=1,
	label="HF Write Token",
	info="https://hf.co/settings/token"
	)
	],
	outputs=[
	gr.Markdown(label="output"),
	gr.Image(show_label=False),
	],
	title="Create your own GGUF Quants!",
	description="Create GGUF quants from any Hugging Face repository! You need to specify a write token obtained in https://hf.co/settings/tokens.",
	article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",

	)

	# Launch the interface
	iface.launch(debug=True)