E2E Windows, accuracy, huggingface, inference, float16 #131

Summary
Jobs
- Tests
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/e2e-windows.yml at c54e067

	name: E2E on Windows
	run-name: ${{ inputs.run_name }}

	on:
	workflow_dispatch:
	inputs:
	test_mode:
	description: accuracy or performance
	type: string
	default: accuracy
	suite:
	description: Test suite
	type: choice
	options:
	- all
	- huggingface
	- timm_models
	- torchbench
	default: all
	mode:
	description: Inference, inference-with-freezing, or training
	type: choice
	options:
	- all
	- inference
	- inference-with-freezing
	- training
	default: all
	dtype:
	description: Data type
	type: choice
	options:
	- all
	- amp_bf16
	- amp_fp16
	- bfloat16
	- float16
	- float32
	default: all
	models:
	description: Run all models or a subset from .github/models/{mode}/{suite}.txt
	type: choice
	options:
	- all
	- subset
	default: subset
	check_all_subset_models:
	description: In "subset" mode, keep going after errors
	type: boolean
	default: false
	only_one_model:
	description: Run only this one model
	type: string
	default: ""
	runner_label:
	description: Runner label
	type: string
	default: "b580"
	TORCH_COMPILE_DEBUG:
	description: TORCH_COMPILE_DEBUG
	type: string
	default: ""
	run_name:
	description: Custom run name
	type: string
	default: "E2E on Windows"

	permissions: read-all

	env:
	PYTHONIOENCODING: utf-8
	NEW_WORKSPACE: C:\gh${{ github.run_id }}
	TRITON_DISABLE_LINE_INFO: 1
	PYTHON_VERSION: "3.10"
	BENCHMARK_REPO: pytorch/benchmark

	jobs:
	tests:
	name: Tests
	runs-on:
	- windows
	- ${{ inputs.runner_label }}
	timeout-minutes: 1440 # 24h
	strategy:
	fail-fast: false
	steps:
	- name: Print inputs
	shell: bash
	run: \|
	cat <<EOF
	${{ toJSON(github.event.inputs) }}
	EOF

	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.10'

	- name: Clean up old workspaces
	shell: bash
	run: \|
	rm -rf /c/gh*

	# Copy workspace to a temporary location with a shorter name.
	- name: Copy workspace
	run: \|
	Copy-Item -Path ${{ github.workspace }} -Destination ${{ env.NEW_WORKSPACE }} -Recurse

	- name: Create venv
	run:
	python -m venv .venv

	- name: Install PyTorch (source)
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
	# Required to build on Windows
	$env:CMAKE_SHARED_LINKER_FLAGS = "/FORCE:MULTIPLE"
	$env:CMAKE_MODULE_LINKER_FLAGS = "/FORCE:MULTIPLE"
	$env:CMAKE_EXE_LINKER_FLAGS = "/FORCE:MULTIPLE"
	$env:TORCH_XPU_ARCH_LIST = "bmg,dg2,arl-h,mtl-h"
	bash -c "PYTORCH_PROJ=/c/pytorch ./scripts/install-pytorch.sh --source --check-wheel"

	- name: PyTorch version
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
	python -c 'import torch; print(torch.__version__)' \| Tee-Object -Variable PYTORCH_VERSION
	echo "PYTORCH_VERSION=$PYTORCH_VERSION" \| Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append

	- name: Clean up Triton cache
	shell: bash
	run: \|
	rm -rf ~/.triton/cache

	# We need ninja >= 1.12.0 to support long names on Windows. At the moment there is no required
	# version in pypi, so instead of installing ninja with pip we use a preinstalled 1.12.1 on the
	# runner.
	- name: Setup Triton
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
	cd ${{ env.NEW_WORKSPACE }}
	pip install -U wheel pybind11 cython cmake
	pip install -v '.[build,tests,tutorials]'

	- name: Triton version
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
	python -c 'import triton; print(triton.__version__)'

	- name: Identify pinned versions
	shell: bash
	run: \|
	cd /c/pytorch
	echo "BENCHMARK_COMMIT_ID=$(<.ci/docker/ci_commit_pins/torchbench.txt)" \| tee -a "$GITHUB_ENV"
	echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" \| tee -a "$GITHUB_ENV"
	echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" \| tee -a "$GITHUB_ENV"
	echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" \| tee -a "$GITHUB_ENV"

	- name: Install python test dependencies
	run: \|
	.venv\Scripts\activate.ps1
	pip install pyyaml pandas scipy 'numpy==1.26.4' psutil pyre_extensions torchrec

	- name: Install transformers package
	if: inputs.suite == 'all' \|\| inputs.suite == 'huggingface'
	run: \|
	cd pytorch
	pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt

	- name: Install torchvision package
	if: inputs.suite == 'all' \|\| inputs.suite == 'timm_models' \|\| inputs.suite == 'torchbench'
	env:
	DISTUTILS_USE_SDK: '1'
	uses: ./.github/actions/install-dependency
	with:
	package: torchvision
	repository: pytorch/vision
	ref: ${{ env.TORCHVISION_COMMIT_ID }}
	extra-cache-key: ${{ env.PYTORCH_VERSION }}
	workspace: /c/gh${{ github.run_id }}

	- name: Install torchaudio package
	if: inputs.suite == 'all' \|\| inputs.suite == 'torchbench'
	uses: ./.github/actions/install-dependency
	with:
	package: torchaudio
	repository: pytorch/audio
	ref: ${{ env.TORCHAUDIO_COMMIT_ID }}
	extra-cache-key: ${{ env.PYTORCH_VERSION }}
	workspace: /c/gh${{ github.run_id }}

	- name: Install timm package
	if: inputs.suite == 'all' \|\| inputs.suite == 'timm_models' \|\| inputs.suite == 'torchbench'
	uses: ./.github/actions/install-dependency
	with:
	package: timm
	repository: huggingface/pytorch-image-models
	ref: ${{ env.TIMM_COMMIT_ID }}
	extra-cache-key: ${{ env.PYTORCH_VERSION }}
	workspace: /c/gh${{ github.run_id }}

	- name: Clone pytorch benchmark
	if: inputs.suite == 'all' \|\| inputs.suite == 'torchbench'
	uses: actions/checkout@v4
	with:
	repository: ${{ env.BENCHMARK_REPO }}
	ref: ${{ env.BENCHMARK_COMMIT_ID }}
	submodules: recursive
	path: benchmark

	- name: Install pytorch benchmark
	if: inputs.suite == 'all' \|\| inputs.suite == 'torchbench'
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
	bash -c '
	cd benchmark
	if [[ "${{ inputs.only_one_model }}" ]]; then
	python install.py "${{ inputs.only_one_model }}"
	else
	# install all models
	python install.py
	fi
	pip install -e .
	'

	- name: Run e2e tests
	env:
	HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	run: \|
	.venv\Scripts\activate.ps1
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"

	bash -c '
	cd /c/pytorch

	export WORKSPACE="/c/gh${{ github.run_id }}"

	if [[ "${{ inputs.TORCH_COMPILE_DEBUG }}" = "1" ]] ; then
	export TORCH_COMPILE_DEBUG="1"
	# torch will save debug logs to $TORCH_COMPILE_DEBUG_DIR/torch_compile_debug
	export TORCH_COMPILE_DEBUG_DIR=$WORKSPACE
	fi

	if [[ "${{ inputs.suite }}" = "all" ]]; then
	suites=("huggingface" "timm_models" "torchbench")
	else
	suites=("${{ inputs.suite }}")
	fi

	if [[ "${{ inputs.mode }}" = "all" ]]; then
	modes=("inference" "inference-with-freezing" "training")
	else
	modes=("${{ inputs.mode }}")
	fi

	if [[ "${{ inputs.dtype }}" = "all" ]]; then
	dtypes=("amp_bf16" "amp_fp16" "bfloat16" "float16" "float32")
	else
	dtypes=("${{ inputs.dtype }}")
	fi

	# if "only_one_model" is set, then test this model
	# if "models" == "subset", then test the models from .github/models/{accuracy,performance}/{suite}.txt
	# otherwise test all models.

	for suite in ${suites[@]}; do
	for mode in ${modes[@]}; do
	for dtype in ${dtypes[@]}; do

	if [[ "${{ inputs.only_one_model }}" ]]; then
	bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0 ${{ inputs.only_one_model }}
	elif [[ "${{ inputs.models }}" == "subset" ]]; then
	models_subset_file="$WORKSPACE/.github/models/${{ inputs.test_mode }}/$suite.txt"
	while read model; do
	bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0 $model
	done < $models_subset_file
	if [[ "${{ inputs.check_all_subset_models }}" == true ]]; then
	python $WORKSPACE/scripts/check_inductor_report.py --models-file="$models_subset_file" \
	--suite=$suite \
	--dtype=$dtype \
	--mode=$mode \
	--test_mode=${{ inputs.test_mode }} \
	--device=xpu \
	--inductor-log-dir="$WORKSPACE/inductor_log"
	fi
	else
	bash -e $WORKSPACE/scripts/inductor_xpu_test.sh $suite $dtype $mode ${{ inputs.test_mode }} xpu 0 static 1 0
	fi

	done
	done
	done
	'


	- name: Identify GPU
	run: \|
	# Initializing oneAPI to enable sycl-ls, which is used in capture-hw-details.sh on Windows.
	Invoke-BatchFile "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
	bash -c './scripts/capture-hw-details.sh \| tee -a $GITHUB_ENV'

	- name: Report environment details
	shell: bash
	run: \|
	mkdir -p /c/gh${{ github.run_id }}/inductor_log
	cat <<EOF \| tee /c/gh${{ github.run_id }}/inductor_log/e2e.env
	TIMESTAMP=$(date '+%Y%m%d%H%M%S')
	JOB_NAME=${{ join(inputs.*, '-') }}
	GITHUB_RUN_ID=$GITHUB_RUN_ID
	GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER
	GITHUB_RUN_ATTEMPT=$GITHUB_RUN_ATTEMPT
	E2E_MODE=${{ inputs.mode }}
	E2E_TEST_MODE=${{ inputs.test_mode }}
	E2E_SUITE=${{ inputs.suite }}
	E2E_DTYPE=${{ inputs.dtype }}
	PYTHON_VERSION=$PYTHON_VERSION
	PYTORCH_REPO=pytorch/pytorch
	PYTORCH_COMMIT_ID=$(<.github/pins/pytorch.txt)"
	PYTORCH_VERSION=$PYTORCH_VERSION
	BENCHMARK_REPO=$BENCHMARK_REPO
	BENCHMARK_COMMIT_ID=$BENCHMARK_COMMIT_ID
	TRITON_REPO=$GITHUB_REPOSITORY
	TRITON_COMMIT_ID=$GITHUB_SHA
	TORCHVISION_COMMIT_ID=$TORCHVISION_COMMIT_ID
	TORCHAUDIO_COMMIT_ID=$TORCHAUDIO_COMMIT_ID
	TIMM_COMMIT_ID=$TIMM_COMMIT_ID
	LIBIGC1_VERSION=$LIBIGC1_VERSION
	LEVEL_ZERO_VERSION=$LEVEL_ZERO_VERSION
	GPU_DEVICE=$GPU_DEVICE
	AGAMA_VERSION=$AGAMA_VERSION
	OS=${{ runner.os }}
	EOF

	- name: Upload test logs
	uses: actions/upload-artifact@v4
	with:
	name: logs
	path: ${{ env.NEW_WORKSPACE }}\inductor_log
	include-hidden-files: true

	- name: Clean up workspace
	if: ${{ always() }}
	run: \|
	Remove-Item -LiteralPath ${{ env.NEW_WORKSPACE }} -Force -Recurse -ErrorAction Ignore

	- name: Clean up temporary files
	if: ${{ always() }}
	shell: bash
	run: \|
	rm -rf rm -rf /tmp/triton-* /tmp/tmp*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

E2E Windows, accuracy, huggingface, inference, float16 #131

Workflow file

E2E Windows, accuracy, huggingface, inference, float16 #131

Uh oh!

Jobs

Run details

Workflow file for this run