yuekai's picture
Upload folder using huggingface_hub
f605912 verified
export CUDA_VISIBLE_DEVICES=1
INFERENCE_PRECISION=float16
MAX_BEAM_WIDTH=4
MAX_BATCH_SIZE=8
checkpoint_dir=tllm_checkpoint
output_dir=distill_whisper_large_v3
# trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
# --output_dir ${output_dir}/encoder \
# --paged_kv_cache disable \
# --moe_plugin disable \
# --enable_xqa disable \
# --max_batch_size ${MAX_BATCH_SIZE} \
# --gemm_plugin disable \
# --bert_attention_plugin ${INFERENCE_PRECISION} \
# --remove_input_padding disable
# trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
# --output_dir ${output_dir}/decoder \
# --paged_kv_cache disable \
# --moe_plugin disable \
# --enable_xqa disable \
# --max_beam_width ${MAX_BEAM_WIDTH} \
# --max_batch_size ${MAX_BATCH_SIZE} \
# --max_seq_len 100 \
# --max_input_len 14 \
# --max_encoder_input_len 1500 \
# --gemm_plugin ${INFERENCE_PRECISION} \
# --bert_attention_plugin ${INFERENCE_PRECISION} \
# --gpt_attention_plugin ${INFERENCE_PRECISION} \
# --remove_input_padding disable
python3 run.py --engine_dir $output_dir --dataset hf-internal-testing/librispeech_asr_dummy --name librispeech_dummy_${output_dir}