E2E Windows, accuracy, huggingface, inference, float16 #131
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E on Windows | |
run-name: ${{ inputs.run_name }} | |
on: | |
workflow_dispatch: | |
inputs: | |
test_mode: | |
description: accuracy or performance | |
type: string | |
default: accuracy | |
suite: | |
description: Test suite | |
type: choice | |
options: | |
- all | |
- huggingface | |
- timm_models | |
- torchbench | |
default: all | |
mode: | |
description: Inference, inference-with-freezing, or training | |
type: choice | |
options: | |
- all | |
- inference | |
- inference-with-freezing | |
- training | |
default: all | |
dtype: | |
description: Data type | |
type: choice | |
options: | |
- all | |
- amp_bf16 | |
- amp_fp16 | |
- bfloat16 | |
- float16 | |
- float32 | |
default: all | |
models: | |
description: Run all models or a subset from .github/models/{mode}/{suite}.txt | |
type: choice | |
options: | |
- all | |
- subset | |
default: subset | |
check_all_subset_models: | |
description: In "subset" mode, keep going after errors | |
type: boolean | |
default: false | |
only_one_model: | |
description: Run only this one model | |
type: string | |
default: "" | |
runner_label: | |
description: Runner label | |
type: string | |
default: "b580" | |
TORCH_COMPILE_DEBUG: | |
description: TORCH_COMPILE_DEBUG | |
type: string | |
default: "" | |
run_name: | |
description: Custom run name | |
type: string | |
default: "E2E on Windows" | |
permissions: read-all | |
env: | |
PYTHONIOENCODING: utf-8 | |
NEW_WORKSPACE: C:\gh${{ github.run_id }} | |
TRITON_DISABLE_LINE_INFO: 1 | |
PYTHON_VERSION: "3.10" | |
BENCHMARK_REPO: pytorch/benchmark | |
jobs: | |
tests: | |
name: Tests | |
runs-on: | |
- windows | |
- ${{ inputs.runner_label }} | |
timeout-minutes: 1440 # 24h | |
strategy: | |
fail-fast: false | |
steps: | |
- name: Print inputs | |
shell: bash | |
run: | | |
cat <<EOF | |
${{ toJSON(github.event.inputs) }} | |
EOF | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10' | |
- name: Clean up old workspaces | |
shell: bash | |
run: | | |
rm -rf /c/gh* | |
# Copy workspace to a temporary location with a shorter name. | |
- name: Copy workspace | |
run: | | |
Copy-Item -Path ${{ github.workspace }} -Destination ${{ env.NEW_WORKSPACE }} -Recurse | |
- name: Create venv | |
run: | |
python -m venv .venv | |
- name: Install PyTorch (source) | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
# Required to build on Windows | |
$env:CMAKE_SHARED_LINKER_FLAGS = "/FORCE:MULTIPLE" | |
$env:CMAKE_MODULE_LINKER_FLAGS = "/FORCE:MULTIPLE" | |
$env:CMAKE_EXE_LINKER_FLAGS = "/FORCE:MULTIPLE" | |
$env:TORCH_XPU_ARCH_LIST = "bmg,dg2,arl-h,mtl-h" | |
bash -c "PYTORCH_PROJ=/c/pytorch ./scripts/install-pytorch.sh --source --check-wheel" | |
- name: PyTorch version | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
python -c 'import torch; print(torch.__version__)' | Tee-Object -Variable PYTORCH_VERSION | |
echo "PYTORCH_VERSION=$PYTORCH_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
- name: Clean up Triton cache | |
shell: bash | |
run: | | |
rm -rf ~/.triton/cache | |
# We need ninja >= 1.12.0 to support long names on Windows. At the moment there is no required | |
# version in pypi, so instead of installing ninja with pip we use a preinstalled 1.12.1 on the | |
# runner. | |
- name: Setup Triton | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
cd ${{ env.NEW_WORKSPACE }} | |
pip install -U wheel pybind11 cython cmake | |
pip install -v '.[build,tests,tutorials]' | |
- name: Triton version | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
python -c 'import triton; print(triton.__version__)' | |
- name: Identify pinned versions | |
shell: bash | |
run: | | |
cd /c/pytorch | |
echo "BENCHMARK_COMMIT_ID=$(<.ci/docker/ci_commit_pins/torchbench.txt)" | tee -a "$GITHUB_ENV" | |
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" | tee -a "$GITHUB_ENV" | |
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" | tee -a "$GITHUB_ENV" | |
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" | tee -a "$GITHUB_ENV" | |
- name: Install python test dependencies | |
run: | | |
.venv\Scripts\activate.ps1 | |
pip install pyyaml pandas scipy 'numpy==1.26.4' psutil pyre_extensions torchrec | |
- name: Install transformers package | |
if: inputs.suite == 'all' || inputs.suite == 'huggingface' | |
run: | | |
cd pytorch | |
pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt | |
- name: Install torchvision package | |
if: inputs.suite == 'all' || inputs.suite == 'timm_models' || inputs.suite == 'torchbench' | |
env: | |
DISTUTILS_USE_SDK: '1' | |
uses: ./.github/actions/install-dependency | |
with: | |
package: torchvision | |
repository: pytorch/vision | |
ref: ${{ env.TORCHVISION_COMMIT_ID }} | |
extra-cache-key: ${{ env.PYTORCH_VERSION }} | |
workspace: /c/gh${{ github.run_id }} | |
- name: Install torchaudio package | |
if: inputs.suite == 'all' || inputs.suite == 'torchbench' | |
uses: ./.github/actions/install-dependency | |
with: | |
package: torchaudio | |
repository: pytorch/audio | |
ref: ${{ env.TORCHAUDIO_COMMIT_ID }} | |
extra-cache-key: ${{ env.PYTORCH_VERSION }} | |
workspace: /c/gh${{ github.run_id }} | |
- name: Install timm package | |
if: inputs.suite == 'all' || inputs.suite == 'timm_models' || inputs.suite == 'torchbench' | |
uses: ./.github/actions/install-dependency | |
with: | |
package: timm | |
repository: huggingface/pytorch-image-models | |
ref: ${{ env.TIMM_COMMIT_ID }} | |
extra-cache-key: ${{ env.PYTORCH_VERSION }} | |
workspace: /c/gh${{ github.run_id }} | |
- name: Clone pytorch benchmark | |
if: inputs.suite == 'all' || inputs.suite == 'torchbench' | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.BENCHMARK_REPO }} | |
ref: ${{ env.BENCHMARK_COMMIT_ID }} | |
submodules: recursive | |
path: benchmark | |
- name: Install pytorch benchmark | |
if: inputs.suite == 'all' || inputs.suite == 'torchbench' | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
bash -c ' | |
cd benchmark | |
if [[ "${{ inputs.only_one_model }}" ]]; then | |
python install.py "${{ inputs.only_one_model }}" | |
else | |
# install all models | |
python install.py | |
fi | |
pip install -e . | |
' | |
- name: Run e2e tests | |
env: | |
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
run: | | |
.venv\Scripts\activate.ps1 | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
bash -c ' | |
cd /c/pytorch | |
export WORKSPACE="/c/gh${{ github.run_id }}" | |
if [[ "${{ inputs.TORCH_COMPILE_DEBUG }}" = "1" ]] ; then | |
export TORCH_COMPILE_DEBUG="1" | |
# torch will save debug logs to $TORCH_COMPILE_DEBUG_DIR/torch_compile_debug | |
export TORCH_COMPILE_DEBUG_DIR=$WORKSPACE | |
fi | |
if [[ "${{ inputs.suite }}" = "all" ]]; then | |
suites=("huggingface" "timm_models" "torchbench") | |
else | |
suites=("${{ inputs.suite }}") | |
fi | |
if [[ "${{ inputs.mode }}" = "all" ]]; then | |
modes=("inference" "inference-with-freezing" "training") | |
else | |
modes=("${{ inputs.mode }}") | |
fi | |
if [[ "${{ inputs.dtype }}" = "all" ]]; then | |
dtypes=("amp_bf16" "amp_fp16" "bfloat16" "float16" "float32") | |
else | |
dtypes=("${{ inputs.dtype }}") | |
fi | |
# if "only_one_model" is set, then test this model | |
# if "models" == "subset", then test the models from .github/models/{accuracy,performance}/{suite}.txt | |
# otherwise test all models. | |
for suite in ${suites[@]}; do | |
for mode in ${modes[@]}; do | |
for dtype in ${dtypes[@]}; do | |
if [[ "${{ inputs.only_one_model }}" ]]; then | |
bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0 ${{ inputs.only_one_model }} | |
elif [[ "${{ inputs.models }}" == "subset" ]]; then | |
models_subset_file="$WORKSPACE/.github/models/${{ inputs.test_mode }}/$suite.txt" | |
while read model; do | |
bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0 $model | |
done < $models_subset_file | |
if [[ "${{ inputs.check_all_subset_models }}" == true ]]; then | |
python $WORKSPACE/scripts/check_inductor_report.py --models-file="$models_subset_file" \ | |
--suite=$suite \ | |
--dtype=$dtype \ | |
--mode=$mode \ | |
--test_mode=${{ inputs.test_mode }} \ | |
--device=xpu \ | |
--inductor-log-dir="$WORKSPACE/inductor_log" | |
fi | |
else | |
bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0 | |
fi | |
done | |
done | |
done | |
' | |
- name: Identify GPU | |
run: | | |
# Initializing oneAPI to enable sycl-ls, which is used in capture-hw-details.sh on Windows. | |
Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" | |
bash -c './scripts/capture-hw-details.sh | tee -a $GITHUB_ENV' | |
- name: Report environment details | |
shell: bash | |
run: | | |
mkdir -p /c/gh${{ github.run_id }}/inductor_log | |
cat <<EOF | tee /c/gh${{ github.run_id }}/inductor_log/e2e.env | |
TIMESTAMP=$(date '+%Y%m%d%H%M%S') | |
JOB_NAME=${{ join(inputs.*, '-') }} | |
GITHUB_RUN_ID=$GITHUB_RUN_ID | |
GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER | |
GITHUB_RUN_ATTEMPT=$GITHUB_RUN_ATTEMPT | |
E2E_MODE=${{ inputs.mode }} | |
E2E_TEST_MODE=${{ inputs.test_mode }} | |
E2E_SUITE=${{ inputs.suite }} | |
E2E_DTYPE=${{ inputs.dtype }} | |
PYTHON_VERSION=$PYTHON_VERSION | |
PYTORCH_REPO=pytorch/pytorch | |
PYTORCH_COMMIT_ID=$(<.github/pins/pytorch.txt)" | |
PYTORCH_VERSION=$PYTORCH_VERSION | |
BENCHMARK_REPO=$BENCHMARK_REPO | |
BENCHMARK_COMMIT_ID=$BENCHMARK_COMMIT_ID | |
TRITON_REPO=$GITHUB_REPOSITORY | |
TRITON_COMMIT_ID=$GITHUB_SHA | |
TORCHVISION_COMMIT_ID=$TORCHVISION_COMMIT_ID | |
TORCHAUDIO_COMMIT_ID=$TORCHAUDIO_COMMIT_ID | |
TIMM_COMMIT_ID=$TIMM_COMMIT_ID | |
LIBIGC1_VERSION=$LIBIGC1_VERSION | |
LEVEL_ZERO_VERSION=$LEVEL_ZERO_VERSION | |
GPU_DEVICE=$GPU_DEVICE | |
AGAMA_VERSION=$AGAMA_VERSION | |
OS=${{ runner.os }} | |
EOF | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: logs | |
path: ${{ env.NEW_WORKSPACE }}\inductor_log | |
include-hidden-files: true | |
- name: Clean up workspace | |
if: ${{ always() }} | |
run: | | |
Remove-Item -LiteralPath ${{ env.NEW_WORKSPACE }} -Force -Recurse -ErrorAction Ignore | |
- name: Clean up temporary files | |
if: ${{ always() }} | |
shell: bash | |
run: | | |
rm -rf rm -rf /tmp/triton-* /tmp/tmp* |