yuekai
/

distill_whisper_large_v3_trtllm_triton

Upload folder using huggingface_hub

f605912 verified 3 months ago

1.47 kB

	export CUDA_VISIBLE_DEVICES=1
	INFERENCE_PRECISION=float16
	MAX_BEAM_WIDTH=4
	MAX_BATCH_SIZE=8
	checkpoint_dir=tllm_checkpoint
	output_dir=distill_whisper_large_v3


	# trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
	# --output_dir ${output_dir}/encoder \
	# --paged_kv_cache disable \
	# --moe_plugin disable \
	# --enable_xqa disable \
	# --max_batch_size ${MAX_BATCH_SIZE} \
	# --gemm_plugin disable \
	# --bert_attention_plugin ${INFERENCE_PRECISION} \
	# --remove_input_padding disable

	# trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
	# --output_dir ${output_dir}/decoder \
	# --paged_kv_cache disable \
	# --moe_plugin disable \
	# --enable_xqa disable \
	# --max_beam_width ${MAX_BEAM_WIDTH} \
	# --max_batch_size ${MAX_BATCH_SIZE} \
	# --max_seq_len 100 \
	# --max_input_len 14 \
	# --max_encoder_input_len 1500 \
	# --gemm_plugin ${INFERENCE_PRECISION} \
	# --bert_attention_plugin ${INFERENCE_PRECISION} \
	# --gpt_attention_plugin ${INFERENCE_PRECISION} \
	# --remove_input_padding disable

	python3 run.py --engine_dir $output_dir --dataset hf-internal-testing/librispeech_asr_dummy --name librispeech_dummy_${output_dir}