Spaces:

chansung
/

gradio_together_tgi

Runtime error

gradio_together_tgi / entrypoint.sh.template

Update entrypoint.sh.template

47200c8 verified 6 days ago

985 Bytes

	#!/bin/bash

	export HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
	export HF_TOKEN=$HUGGING_FACE_HUB_TOKEN

	if [[ "$QUANTIZATION" == "false" ]]; then
	text-generation-launcher --model-id $MODEL_NAME \
	--num-shard 1 --port 8080 --trust-remote-code \
	--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
	--max-input-length $MAX_INPUT_LENGTH \
	--max-total-tokens $MAX_TOTAL_TOKENS \
	&
	else
	text-generation-launcher --model-id $MODEL_NAME \
	--num-shard 1 --port 8080 --trust-remote-code \
	--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
	--max-input-length $MAX_INPUT_LENGTH \
	--max-total-tokens $MAX_TOTAL_TOKENS \
	--quantize $QUANTIZATION \
	&
	fi

	# Wait for text-generation-inference to start
	curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health

	# Start the gradio
	python app/main.py --port $GRADIO_PORT &

	# Wait for any process to exit
	wait -n

	# Exit with status of process that exited first
	exit $?