gradio_together_tgi / entrypoint.sh.template
chansung's picture
Update entrypoint.sh.template
47200c8 verified
raw
history blame contribute delete
985 Bytes
#!/bin/bash
export HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
export HF_TOKEN=$HUGGING_FACE_HUB_TOKEN
if [[ "$QUANTIZATION" == "false" ]]; then
text-generation-launcher --model-id $MODEL_NAME \
--num-shard 1 --port 8080 --trust-remote-code \
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
--max-input-length $MAX_INPUT_LENGTH \
--max-total-tokens $MAX_TOTAL_TOKENS \
&
else
text-generation-launcher --model-id $MODEL_NAME \
--num-shard 1 --port 8080 --trust-remote-code \
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
--max-input-length $MAX_INPUT_LENGTH \
--max-total-tokens $MAX_TOTAL_TOKENS \
--quantize $QUANTIZATION \
&
fi
# Wait for text-generation-inference to start
curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health
# Start the gradio
python app/main.py --port $GRADIO_PORT &
# Wait for any process to exit
wait -n
# Exit with status of process that exited first
exit $?