# syntax=docker/dockerfile:1 ARG WHISPER_MODEL=base ARG LANG=en ARG UID=1001 ARG VERSION=EDGE ARG RELEASE=0 # These ARGs are for caching stage builds in CI # Leave them as is when building locally ARG LOAD_WHISPER_STAGE=load_whisper ARG NO_MODEL_STAGE=no_model # When downloading diarization model with auth token, it seems that it is not respecting the TORCH_HOME env variable. # So it is necessary to ensure that the CACHE_HOME is set to the exact same path as the default path. # https://github.com/jim60105/docker-whisperX/issues/27 ARG CACHE_HOME=/.cache ARG CONFIG_HOME=/.config ARG TORCH_HOME=${CACHE_HOME}/torch ARG HF_HOME=${CACHE_HOME}/huggingface ######################################## # Base stage for amd64 ######################################## FROM docker.io/library/python:3.11-slim AS prepare_base_amd64 # RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 ARG TARGETARCH ARG TARGETVARIANT WORKDIR /tmp ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility # Missing dependencies for nvidia RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ # Add non-free and non-free-firmware components to apt sources to install libnppicc12 sed -i 's/^Components: main$/Components: main non-free non-free-firmware/' /etc/apt/sources.list.d/debian.sources && \ apt-get update && apt-get install -y --no-install-recommends \ libnppicc12 ######################################## # Base stage for arm64 ######################################## FROM docker.io/library/python:3.11-slim AS prepare_base_arm64 # RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 ARG TARGETARCH ARG TARGETVARIANT WORKDIR /tmp # Missing dependencies for arm64 (needed for build-time and run-time) # https://github.com/jim60105/docker-whisperX/issues/14 RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ apt-get update && apt-get install -y --no-install-recommends \ libgomp1 libsndfile1 # Select the base stage by target architecture FROM prepare_base_$TARGETARCH$TARGETVARIANT AS base ######################################## # Build stage ######################################## FROM base AS build # RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 ARG TARGETARCH ARG TARGETVARIANT ENV TARGETARCH=${TARGETARCH} WORKDIR /app # Install uv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ ENV UV_PROJECT_ENVIRONMENT=/venv ENV VIRTUAL_ENV=/venv ENV UV_LINK_MODE=copy ENV UV_PYTHON_DOWNLOADS=0 ENV UV_PYTHON=3.11 # Install curl RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ apt-get update && apt-get install -y --no-install-recommends \ curl # Get Dumb Init # Map Docker TARGETARCH to dumb-init naming convention # TARGETARCH: amd64 -> x86_64, arm64 -> aarch64 RUN case "${TARGETARCH}" in \ amd64) DUMBINIT_ARCH="x86_64" ;; \ arm64) DUMBINIT_ARCH="aarch64" ;; \ *) echo "unsupported architecture: ${TARGETARCH}" && exit 1 ;; \ esac && \ curl -fsSL "https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_${DUMBINIT_ARCH}" \ -o /usr/local/bin/dumb-init # Install whisperX dependencies RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \ --mount=type=bind,source=whisperX/pyproject.toml,target=pyproject.toml \ --mount=type=bind,source=whisperX/uv.lock,target=uv.lock \ uv sync --frozen --no-dev --no-install-project --no-editable # Install whisperX project RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \ --mount=source=whisperX,target=.,rw \ uv sync --frozen --no-dev --no-editable ######################################## # Final stage for no_model ######################################## FROM base AS no_model # We don't need them anymore RUN pip3.11 uninstall -y pip wheel && \ rm -rf /root/.cache/pip # Create user ARG UID RUN groupadd -g $UID whisperx && \ useradd -l -u $UID -g $UID -m -s /bin/sh -N whisperx ARG CACHE_HOME ARG CONFIG_HOME ARG TORCH_HOME ARG HF_HOME ENV XDG_CACHE_HOME=${CACHE_HOME} ENV TORCH_HOME=${TORCH_HOME} ENV HF_HOME=${HF_HOME} RUN install -d -m 775 -o $UID -g 0 /licenses && \ install -d -m 775 -o $UID -g 0 /root && \ install -d -m 775 -o $UID -g 0 ${CACHE_HOME} && \ install -d -m 775 -o $UID -g 0 ${CONFIG_HOME} && \ install -d -m 775 -o $UID -g 0 /nltk_data # Copy dumb-init COPY --link --chown=$UID:0 --chmod=775 --from=build /usr/local/bin/dumb-init /usr/local/bin/dumb-init # Copy licenses (OpenShift Policy) COPY --link --chown=$UID:0 --chmod=775 LICENSE /licenses/LICENSE COPY --link --chown=$UID:0 --chmod=775 whisperX/LICENSE /licenses/whisperX.LICENSE # RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 ARG TARGETARCH ARG TARGETVARIANT # ffmpeg (install via package manager to provide shared libraries for torchcodec) # https://github.com/jim60105/docker-whisperX/issues/98 RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ apt-get update && apt-get install -y --no-install-recommends \ ffmpeg # Copy dependencies and code (and support arbitrary uid for OpenShift best practice) # https://docs.openshift.com/container-platform/4.14/openshift_images/create-images.html#use-uid_create-images COPY --link --chown=$UID:0 --chmod=775 --from=build /venv /venv ENV PATH="/venv/bin${PATH:+:${PATH}}" ENV PYTHONPATH="/venv/lib/python3.11/site-packages" ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:/lib/aarch64-linux-gnu:/venv/lib/python3.11/site-packages/nvidia/cudnn/lib:/venv/lib/python3.11/site-packages/torch/lib:${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" # Test whisperX RUN python3 -c 'import whisperx;' && \ whisperx -h WORKDIR /app VOLUME [ "/app" ] USER $UID STOPSIGNAL SIGINT ENTRYPOINT [ "dumb-init", "--", "/bin/sh", "-c", "whisperx \"$@\"" ] ARG VERSION ARG RELEASE LABEL name="jim60105/docker-whisperX" \ # Authors for WhisperX vendor="Bain, Max and Huh, Jaesung and Han, Tengda and Zisserman, Andrew" \ # Maintainer for this docker image maintainer="jim60105" \ # Dockerfile source repository url="https://github.com/jim60105/docker-whisperX" \ version=${VERSION} \ # This should be a number, incremented with each change release=${RELEASE} \ io.k8s.display-name="WhisperX" \ summary="WhisperX: Time-Accurate Speech Transcription of Long-Form Audio" \ description="This is the docker image for WhisperX: Automatic Speech Recognition with Word-Level Timestamps (and Speaker Diarization) from the community. For more information about this tool, please visit the following website: https://github.com/m-bain/whisperX." ######################################## # load_whisper stage # This stage will be tagged for caching in CI. ######################################## FROM ${NO_MODEL_STAGE} AS load_whisper ARG CONFIG_HOME ARG XDG_CONFIG_HOME=${CONFIG_HOME} ARG HOME="/root" # Preload Silero vad model RUN python3 <