subhankarg's picture
Upload folder using huggingface_hub
0558aa4 verified
#!/usr/bin/env bash
set -exou pipefail
# List of all supported libraries (update this list when adding new libraries)
# This also defines the order in which they will be installed by --libraries "all"
ALL_LIBRARIES=(
"trtllm"
"te"
"mcore"
"vllm"
"extra"
)
export INSTALL_OPTION=${1:-dev}
export HEAVY_DEPS=${HEAVY_DEPS:-false}
export INSTALL_DIR=${INSTALL_DIR:-"/opt"}
export CURR=$(pwd)
export WHEELS_DIR=${WHEELS_DIR:-"$INSTALL_DIR/wheels"}
export PIP=pip
export TRTLLM_REPO=${TRTLLM_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."trt-llm".repo')}
export TRTLLM_TAG=${TRTLLM_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."trt-llm".ref')}
export TRTLLM_DIR="$INSTALL_DIR/TensorRT-LLM"
export TE_REPO=${TE_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."transformer_engine".repo')}
export TE_TAG=${TE_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."transformer_engine".ref')}
export NVIDIA_PYTORCH_VERSION=${NVIDIA_PYTORCH_VERSION:-""}
export CONDA_PREFIX=${CONDA_PREFIX:-""}
trt() {
local mode="$1"
local WHEELS_DIR=$WHEELS_DIR/trt/
mkdir -p $WHEELS_DIR
# Skip TRT installation on macOS ARM
if [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
echo "Skipping TRT installation on macOS ARM"
return
fi
if [ "$(id -u)" -ne 0 ]; then
if ! command -v sudo &>/dev/null; then
echo "Not running as root and sudo is not available, skipping TRT installation"
return
fi
fi
if [ "$(id -u)" -eq 0 ]; then
# Already root, run directly
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
apt-get install git-lfs
git lfs install
apt-get clean
else
# Need to gain sudo
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
sudo apt-get install git-lfs
git lfs install
sudo apt-get clean
fi
if [ ! -d "$TRTLLM_DIR/.git" ]; then
rm -rf "$TRTLLM_DIR"
cd $(dirname "$TRTLLM_DIR")
git clone ${TRTLLM_REPO}
fi
pushd $TRTLLM_DIR
git checkout -f $TRTLLM_TAG
git submodule update --init --recursive
sed -i "/torch/d" requirements.txt
git lfs pull
patch -p1 < $CURR/external/patches/trt_llm.patch
popd
if [[ "$mode" == "install" ]]; then
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
cd $TRTLLM_DIR
set +u
bash docker/common/install_base.sh
bash docker/common/install_cmake.sh
bash docker/common/install_ccache.sh
. docker/common/install_tensorrt.sh \
--TRT_VER="10.10.0.31" \
--CUDA_VER="12.9" \
--CUDNN_VER="9.9.0.52-1" \
--NCCL_VER="2.26.5-1+cuda12.9" \
--CUBLAS_VER="12.9.0.13-1" \
--NVRTC_VER="12.9.41-1"
set -u
fi
fi
}
trtllm() {
local mode="$1"
local WHEELS_DIR=$WHEELS_DIR/trtllm/
mkdir -p $WHEELS_DIR
# Skip TRT installation on macOS ARM
if [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
echo "Skipping TRT installation on macOS ARM"
return
fi
if [ "$(id -u)" -ne 0 ]; then
if ! command -v sudo &>/dev/null; then
echo "Not running as root and sudo is not available, skipping TRT installation"
return
fi
fi
if [ "$(id -u)" -eq 0 ]; then
# Already root, run directly
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
apt-get install git-lfs
git lfs install
apt-get clean
else
# Need to gain sudo
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
sudo apt-get install git-lfs
git lfs install
sudo apt-get clean
fi
if [ ! -d "$TRTLLM_DIR/.git" ]; then
rm -rf "$TRTLLM_DIR"
cd $(dirname "$TRTLLM_DIR")
git clone ${TRTLLM_REPO}
fi
pushd $TRTLLM_DIR
git checkout -f $TRTLLM_TAG
git submodule update --init --recursive
sed -i "/torch/d" requirements.txt
git lfs pull
patch -p1 < $CURR/external/patches/trt_llm.patch
popd
build() {
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
# CONDA_PREFIX causes an error in trt-llm's build script
unset CONDA_PREFIX
cd $TRTLLM_DIR
TORCH_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" python3 ./scripts/build_wheel.py --job_count $(nproc) --clean --trt_root /usr/local/tensorrt --dist_dir $WHEELS_DIR --python_bindings --benchmarks
fi
}
if [[ "$mode" == "build" ]]; then
build
else
if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then
build
fi
pip install --no-cache-dir $WHEELS_DIR/tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com || true
fi
}
te() {
local mode="$1"
local WHEELS_DIR=$WHEELS_DIR/te/
mkdir -p $WHEELS_DIR
TE_DIR="$INSTALL_DIR/TransformerEngine"
if [ ! -d "$TE_DIR/.git" ]; then
rm -rf "$TE_DIR" &&
cd $(dirname "$TE_DIR")
git clone ${TE_REPO}
fi
pushd $TE_DIR
git checkout -f $TE_TAG
popd
build() {
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
cd $TE_DIR
git submodule init
git submodule update
pip install nvidia-mathdx==25.1.1
pip wheel --wheel-dir $WHEELS_DIR/ --no-build-isolation $TE_DIR
fi
}
if [[ "$mode" == "build" ]]; then
build
else
if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then
build
fi
pip install --no-cache-dir $WHEELS_DIR/transformer_engine*.whl && patch -p1 --force $(python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py $CURR/external/patches/triton-lang_triton_6570_lazy_init.patch || true
fi
}
mcore() {
local mode="$1"
local WHEELS_DIR=$WHEELS_DIR/mcore/
mkdir -p $WHEELS_DIR
export CAUSAL_CONV1D_FORCE_BUILD=TRUE
export CAUSAL_CONV_TAG=v1.5.3
CAUSAL_CONV1D_DIR="$INSTALL_DIR/causal-conv1d"
if [ ! -d "$CAUSAL_CONV1D_DIR/.git" ]; then
rm -rf "$CAUSAL_CONV1D_DIR"
mkdir -p $(dirname "$CAUSAL_CONV1D_DIR")
cd $(dirname "$CAUSAL_CONV1D_DIR")
git clone https://github.com/Dao-AILab/$(basename $CAUSAL_CONV1D_DIR).git
fi
pushd $CAUSAL_CONV1D_DIR
git checkout -f $CAUSAL_CONV_TAG
popd
export MAMBA_FORCE_BUILD=TRUE
export MAMBA_TAG=6b32be06d026e170b3fdaf3ae6282c5a6ff57b06
MAMBA_DIR="$INSTALL_DIR/mamba"
if [ ! -d "$MAMBA_DIR/.git" ]; then
rm -rf "$MAMBA_DIR"
cd $(dirname "$MAMBA_DIR")
git clone https://github.com/state-spaces/$(basename $MAMBA_DIR).git
fi
pushd $MAMBA_DIR
git checkout -f $MAMBA_TAG
perl -ni -e 'print unless /triton/' setup.py
perl -ni -e 'print unless /triton/' pyproject.toml
popd
MLM_REPO=${MLM_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."megatron-lm".repo')}
MLM_TAG=${MLM_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."megatron-lm".ref')}
MLM_DIR="$INSTALL_DIR/Megatron-LM"
if [ ! -d "$MLM_DIR/.git" ]; then
rm -rf "$MLM_DIR"
mkdir -p $(dirname "$MLM_DIR")
cd $(dirname "$MLM_DIR")
git clone ${MLM_REPO}
fi
pushd $MLM_DIR
git checkout -f $MLM_TAG
perl -ni -e 'print unless /triton==3.1.0/' requirements/pytorch_24.10/requirements.txt
perl -ni -e 'print unless /nvidia-resiliency-ext/' requirements/pytorch_24.10/requirements.txt
popd
build() {
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
pip wheel --no-deps --no-cache-dir --no-build-isolation --wheel-dir $WHEELS_DIR $MAMBA_DIR
pip wheel --no-deps --no-cache-dir --no-build-isolation --wheel-dir $WHEELS_DIR $CAUSAL_CONV1D_DIR
fi
pip wheel --no-deps --wheel-dir $WHEELS_DIR $MLM_DIR
}
if [[ "$mode" == "build" ]]; then
build
else
if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then
build
fi
pip install --no-cache-dir $WHEELS_DIR/*.whl "nvidia-pytriton ; platform_machine == 'x86_64'" || true
pip install --no-cache-dir -e $MLM_DIR
fi
}
vllm() {
local mode="$1"
local WHEELS_DIR=$WHEELS_DIR/vllm/
mkdir -p $WHEELS_DIR
VLLM_DIR="$INSTALL_DIR/vllm"
build() {
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
${PIP} install --no-cache-dir virtualenv
virtualenv $INSTALL_DIR/venv
$INSTALL_DIR/venv/bin/pip install --no-cache-dir setuptools coverage
$INSTALL_DIR/venv/bin/pip wheel --no-cache-dir --no-build-isolation \
--wheel-dir $WHEELS_DIR/ \
-r $CURR/requirements/requirements_vllm.txt
fi
}
if [[ "$mode" == "build" ]]; then
build
else
if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then
build
fi
${PIP} install --no-cache-dir virtualenv
virtualenv $INSTALL_DIR/venv
$INSTALL_DIR/venv/bin/pip install --no-cache-dir coverage
$INSTALL_DIR/venv/bin/pip install --no-cache-dir --no-build-isolation $WHEELS_DIR/*.whl || true
fi
}
extra() {
local mode="$1"
DEPS=(
"llama-index==0.10.43" # incompatible with nvidia-pytriton
"nemo_run"
"nvidia-modelopt==0.37.0" # We want a specific version of nvidia-modelopt
)
if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
DEPS+=(
"git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@b6eb61dbf9fe272b1a943b1b0d9efdde99df0737 ; platform_machine == 'x86_64'" # Compiling NvRX requires CUDA
)
fi
if [[ "$mode" == "install" ]]; then
pip install --force-reinstall --no-deps --no-cache-dir "${DEPS[@]}"
pip install --no-cache-dir "${DEPS[@]}"
# needs no-deps to avoid installing triton on top of pytorch-triton.
pip install --no-deps --no-cache-dir "liger-kernel==0.5.8; (platform_machine == 'x86_64' and platform_system != 'Darwin')"
pip install --no-deps "cut-cross-entropy @ git+https://github.com/apple/ml-cross-entropy.git@87a86aba72cfd2f0d8abecaf81c13c4528ea07d8; (platform_machine == 'x86_64' and platform_system != 'Darwin')"
fi
}
echo 'Uninstalling stuff'
# Some of these packages are uninstalled for legacy purposes
${PIP} uninstall -y nemo_toolkit sacrebleu nemo_asr nemo_nlp nemo_tts
echo 'Upgrading tools'
${PIP} install -U --no-cache-dir "setuptools==76.0.0" pybind11 wheel ${PIP}
if [ "${NVIDIA_PYTORCH_VERSION}" != "" ]; then
echo "Installing NeMo in NVIDIA PyTorch container: ${NVIDIA_PYTORCH_VERSION}"
echo "Will not install numba"
else
if [ "${CONDA_PREFIX}" != "" ]; then
echo 'Installing numba'
conda install -y -c conda-forge numba
else
pip install --no-cache-dir --no-deps torch cython
fi
fi
echo 'Installing nemo dependencies'
cd $CURR
if [[ "$INSTALL_OPTION" == "dev" ]]; then
echo "Running in dev mode"
${PIP} install --editable ".[all]"
else
# --------------------------
# Argument Parsing & Validation
# --------------------------
# Parse command-line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--library)
LIBRARY_ARG="$2"
shift 2
;;
--mode)
MODE="$2"
shift 2
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Validate required arguments
if [[ -z "$LIBRARY_ARG" ]]; then
echo "Error: --library argument is required"
exit 1
fi
if [[ -z "$MODE" ]]; then
echo "Error: --mode argument is required"
exit 1
fi
# Validate mode
if [[ "$MODE" != "build" && "$MODE" != "install" ]]; then
echo "Error: Invalid mode. Must be 'build' or 'install'"
exit 1
fi
# Process library argument
declare -a LIBRARIES
if [[ "$LIBRARY_ARG" == "all" ]]; then
LIBRARIES=("${ALL_LIBRARIES[@]}")
else
IFS=',' read -ra TEMP_ARRAY <<<"$LIBRARY_ARG"
for lib in "${TEMP_ARRAY[@]}"; do
trimmed_lib=$(echo "$lib" | xargs)
if [[ -n "$trimmed_lib" ]]; then
LIBRARIES+=("$trimmed_lib")
fi
done
fi
# Validate libraries array
if [[ ${#LIBRARIES[@]} -eq 0 ]]; then
echo "Error: No valid libraries specified"
exit 1
fi
# Validate each library is supported
for lib in "${LIBRARIES[@]}"; do
# "trt" is a valid option but not in ALL_LIBRARIES
# It does not get installed at the same time as the rest
if [[ "$lib" == "trt" ]]; then
continue
fi
if [[ ! " ${ALL_LIBRARIES[@]} " =~ " ${lib} " ]]; then
echo "Error: Unsupported library '$lib'"
exit 1
fi
done
# --------------------------
# Execution Logic
# --------------------------
# Run operations for each library
for library in "${LIBRARIES[@]}"; do
echo "Processing $library ($MODE)..."
"$library" "$MODE"
# Check if function succeeded
if [[ $? -ne 0 ]]; then
echo "Error: Operation failed for $library"
exit 1
fi
done
echo "All operations completed successfully"
exit 0
fi
echo 'All done!'