Guobing-Chen · Guobing-Chen · Nov 15, 2022 · Nov 15, 2022 · Nov 16, 2022 · Nov 16, 2022
diff --git a/.bazelrc b/.bazelrc
@@ -1,4 +1,4 @@
-build --cxxopt=--std=c++14
+build --cxxopt=--std=c++17
 build --copt=-I.
 # Bazel does not support including its cc_library targets as system
 # headers. We work around this for generated code

diff --git a/.circleci/README.md b/.circleci/README.md
diff --git a/.circleci/docker/build.sh b/.circleci/docker/build.sh
@@ -81,7 +81,7 @@ fi
 
 TRAVIS_DL_URL_PREFIX="https://s3.amazonaws.com/travis-python-archives/binaries/ubuntu/14.04/x86_64"
 _UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
-_UCC_COMMIT=12944da33f911daf505d9bbc51411233d0ed85e1
+_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
 
 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it

diff --git a/.circleci/docker/common/install_cudnn.sh b/.circleci/docker/common/install_cudnn.sh
@@ -6,9 +6,9 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
     CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
     if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
-        curl -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
+        curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
     else
-        curl -OLs  https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
+        curl --retry 3 -OLs  https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
     fi
 
     tar xf ${CUDNN_NAME}.tar.xz

diff --git a/.circleci/docker/common/install_docs_reqs.sh b/.circleci/docker/common/install_docs_reqs.sh
@@ -7,10 +7,10 @@ if [ -n "$KATEX" ]; then
   # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
   apt-get install -y gpg-agent || :
 
-  curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -
+  curl --retry 3 -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -
   sudo apt-get install -y nodejs
 
-  curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
+  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
   echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
 
   apt-get update

diff --git a/.circleci/docker/common/install_protobuf.sh b/.circleci/docker/common/install_protobuf.sh
@@ -12,7 +12,7 @@ install_protobuf_317() {
   #   g++: error: ./../lib64/crti.o: No such file or directory
   ln -s /usr/lib64 "$pb_dir/lib64"
 
-  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz"
+  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
   tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
   # -j6 to balance memory usage and speed.
   # naked `-j` seems to use too much memory.

diff --git a/.circleci/docker/requirements-ci.txt b/.circleci/docker/requirements-ci.txt
@@ -159,8 +159,13 @@ pytest-shard
 #Pinned versions:
 #test that import:
 
+pytest-flakefinder==1.1.0
+#Description: plugin for rerunning tests a fixed number of times in pytest
+#Pinned versions: 1.1.0
+#test that import:
+
 pytest-rerunfailures
-#Description: plugin for rerunning tests in pytest
+#Description: plugin for rerunning failure tests in pytest
 #Pinned versions:
 #test that import:
 

diff --git a/.circleci/scripts/binary_install_miniconda.sh b/.circleci/scripts/binary_install_miniconda.sh
@@ -31,9 +31,9 @@ fi
 
 conda_sh="$workdir/install_miniconda.sh"
 if [[ "$(uname)" == Darwin ]]; then
-  curl --retry 3 -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
+  curl --retry 3 --retry-all-errors -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 else
-  curl --retry 3 -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+  curl --retry 3 --retry-all-errors -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 fi
 chmod +x "$conda_sh"
 "$conda_sh" -b -p "$MINICONDA_ROOT"

diff --git a/.circleci/scripts/binary_ios_build.sh b/.circleci/scripts/binary_ios_build.sh
@@ -8,7 +8,7 @@ PROJ_ROOT=/Users/distiller/project
 export TCLLIBPATH="/usr/local/lib"
 
 # Install conda
-curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
+curl --retry 3 --retry-all-errors -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 chmod +x ~/conda.sh
 /bin/bash ~/conda.sh -b -p ~/anaconda
 export PATH="~/anaconda/bin:${PATH}"

diff --git a/.circleci/scripts/binary_ios_upload.sh b/.circleci/scripts/binary_ios_upload.sh
@@ -47,7 +47,7 @@ echo "${IOS_NIGHTLY_BUILD_VERSION}" > version.txt
 zip -r ${ZIPFILE} install src version.txt LICENSE
 # upload to aws
 # Install conda then 'conda install' awscli
-curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
+curl --retry 3 --retry-all-errors -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 chmod +x ~/conda.sh
 /bin/bash ~/conda.sh -b -p ~/anaconda
 export PATH="~/anaconda/bin:${PATH}"

diff --git a/.circleci/scripts/driver_update.bat b/.circleci/scripts/driver_update.bat
@@ -1,5 +1,5 @@
 set "DRIVER_DOWNLOAD_LINK=https://s3.amazonaws.com/ossci-windows/452.39-data-center-tesla-desktop-win10-64bit-international.exe"
-curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output 452.39-data-center-tesla-desktop-win10-64bit-international.exe
+curl --retry 3 --retry-all-errors -kL %DRIVER_DOWNLOAD_LINK% --output 452.39-data-center-tesla-desktop-win10-64bit-international.exe
 if errorlevel 1 exit /b 1
 
 start /wait 452.39-data-center-tesla-desktop-win10-64bit-international.exe -s -noreboot

diff --git a/.circleci/scripts/setup_ci_environment.sh b/.circleci/scripts/setup_ci_environment.sh
@@ -40,8 +40,8 @@ if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
   # Taken directly from https://github.com/NVIDIA/nvidia-docker
   # Add the package repositories
   distribution=$(. /etc/os-release;echo "$ID$VERSION_ID")
-  curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-  curl -s -L "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+  curl -s -L --retry 3 --retry-all-errors https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+  curl -s -L --retry 3 --retry-all-errors "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
 
   retry sudo apt-get update -qq
   # Necessary to get the `--gpus` flag to function within docker

diff --git a/.circleci/scripts/setup_linux_system_environment.sh b/.circleci/scripts/setup_linux_system_environment.sh
@@ -2,7 +2,7 @@
 set -eux -o pipefail
 
 # Set up CircleCI GPG keys for apt, if needed
-curl --retry 3 -s -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
+curl --retry 3 --retry-all-errors -s -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
 
 # Stop background apt updates.  Hypothetically, the kill should not
 # be necessary, because stop is supposed to send a kill signal to

diff --git a/.circleci/scripts/vs_install.ps1 b/.circleci/scripts/vs_install.ps1
@@ -29,7 +29,7 @@ if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswher
 }
 
 echo "Downloading VS installer from S3."
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
+curl.exe --retry 3 --retry-all-errors -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
 if ($LASTEXITCODE -ne 0) {
     echo "Download of the VS 2019 Version ${env:VS_VERSION} installer failed"
     exit 1

diff --git a/.circleci/scripts/vs_install_cmath.ps1 b/.circleci/scripts/vs_install_cmath.ps1
@@ -1,5 +1,5 @@
 $CMATH_DOWNLOAD_LINK = "https://raw.githubusercontent.com/microsoft/STL/12c684bba78f9b032050526abdebf14f58ca26a3/stl/inc/cmath"
 $VC14_28_INSTALL_PATH="C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29910\include"
 
-curl.exe --retry 3 -kL $CMATH_DOWNLOAD_LINK --output "$home\cmath"
+curl.exe --retry 3 --retry-all-errors -kL $CMATH_DOWNLOAD_LINK --output "$home\cmath"
 Move-Item -Path "$home\cmath" -Destination "$VC14_28_INSTALL_PATH" -Force
diff --git a/.circleci/scripts/windows_cudnn_install.sh b/.circleci/scripts/windows_cudnn_install.sh
@@ -36,7 +36,7 @@ else
     tmp_dir=$(mktemp -d)
     (
         pushd "${tmp_dir}"
-        curl --retry 3 -o "${cudnn_installer_name}" "$cudnn_installer_link"
+        curl --retry 3 --retry-all-errors -o "${cudnn_installer_name}" "$cudnn_installer_link"
         7z x "${cudnn_installer_name}" -ocudnn
         # Use '${var:?}/*' to avoid potentially expanding to '/*'
         # Remove all of the directories before attempting to copy files

diff --git a/.github/actions/setup-rocm/action.yml b/.github/actions/setup-rocm/action.yml
@@ -36,7 +36,12 @@ runs:
       run: |
         ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
         if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then
-            echo "Failed to detect GPUs on the runner"
+            if [[ $ngpu -eq 0 ]]; then
+              echo "Error: Failed to detect any GPUs on the runner"
+            else
+              echo "Error: Detected $ngpu GPUs on the runner, when only 2 or 4 were expected"
+            fi
+            echo "Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
             exit 1
         fi
 

diff --git a/.github/actions/upload-test-artifacts/action.yml b/.github/actions/upload-test-artifacts/action.yml
@@ -34,7 +34,7 @@ runs:
       run: |
         # Remove any previous test reports if they exist
         rm -f test-reports-*.zip
-        zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
+        zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml' -i '*.csv'
 
     - name: Zip usage log for upload
       if: runner.os != 'Windows' && !inputs.use-gha
@@ -67,7 +67,7 @@ runs:
         FILE_SUFFIX: ${{ inputs.file-suffix }}
       run: |
         # -ir => recursive include all files in pattern
-        7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
+        7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' -ir'!test\*.csv'
 
     - name: Zip usage log for upload
       if: runner.os == 'Windows' && !inputs.use-gha
@@ -127,8 +127,11 @@ runs:
         # Add the run attempt, see [Artifact run attempt]
         name: test-reports-runattempt${{ github.run_attempt }}-${{ inputs.file-suffix }}.zip
         retention-days: 14
-        if-no-files-found: error
-        path: test/**/*.xml
+        # Don't want to fail the workflow here because not all workflows have csv files
+        if-no-files-found: ignore
+        path: |
+          test/**/*.xml
+          test/**/*.csv
 
     - name: Store Usage Logs on Github
       uses: actions/upload-artifact@v3

diff --git a/.github/ci_commit_pins/text.txt b/.github/ci_commit_pins/text.txt
@@ -0,0 +1 @@
+5b78d074bd303eb230d30567646fcf0358ee2dd4
diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-bf58902b2fd881c760cd2eeacfae2d7c468ebf1f
+4a310f26049371959617921d0eb9b001f4d262c6
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-7889d2d3be16675943d84e4a4133ed7c245a623f
+50855d7babfa7970cba18528c659989b91c83824
diff --git a/.github/requirements/README.md b/.github/requirements/README.md
@@ -17,6 +17,8 @@ The list of support files are as follows:
     test jobs to setup the conda environment
   * conda-env-macOS-X64. This is use by MacOS (x86-64) build and test
     jobs to setup the conda environment
+  * conda-env-Linux-X64. This is used by Linux buck build and test jobs
+    to setup the conda environment
 * Pip:
   * pip-requirements-macOS.txt. This is used by MacOS build and test jobs to
     setup the pip environment
diff --git a/.github/requirements/conda-env-Linux-X64 b/.github/requirements/conda-env-Linux-X64
@@ -0,0 +1,10 @@
+cffi=1.15.1
+cmake=3.22.1
+mkl=2022.1.0
+mkl-include=2022.1.0
+ninja=1.10.2
+numpy=1.23.3
+pyyaml=6.0
+requests=2.28.1
+setuptools=65.5.0
+typing_extensions=4.3.0
diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt
@@ -14,6 +14,7 @@ pygments==2.12.0
 pytest==7.2.0
 pytest-xdist==3.0.2
 pytest-rerunfailures==10.2
+pytest-flakefinder==1.1.0
 pytest-shard==0.1.2
 scipy==1.9.0
 sympy==1.11.1

diff --git a/.github/scripts/README.md b/.github/scripts/README.md
@@ -3,7 +3,7 @@
 > NOTE: This README contains information for the `.github` directory but cannot be located there because it will overwrite the
 repo README.
 
-This directory contains workflows and scripts to support our CI infrastructure that runs on Github Actions.
+This directory contains workflows and scripts to support our CI infrastructure that runs on GitHub Actions.
 
 ## Workflows
 

diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
@@ -24,7 +24,9 @@
     "functorch",
     "inductor",
     "inductor_distributed",
+    "inductor_huggingface",
     "inductor_timm",
+    "inductor_torchbench",
     "jit_legacy",
     "multigpu",
     "nogpu_AVX512",
@@ -34,6 +36,13 @@
     "xla",
 }}
 
+# Supported modes when running periodically
+SUPPORTED_PERIODICAL_MODES = {
+    "mem_leak_check",
+    "rerun_disabled_tests",
+}
+
+
 def parse_args() -> Any:
     from argparse import ArgumentParser
     parser = ArgumentParser("Filter all test configurations and keep only requested ones")
@@ -109,6 +118,23 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
         return filtered_test_matrix
 
 
+def set_periodic_modes(test_matrix: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
+    """
+    Apply all periodic modes when running under a schedule
+    """
+    scheduled_test_matrix: Dict[str, List[Any]] = {
+        "include": [],
+    }
+
+    for config in test_matrix.get("include", []):
+        for mode in SUPPORTED_PERIODICAL_MODES:
+            cfg = config.copy()
+            cfg[mode] = mode
+            scheduled_test_matrix["include"].append(cfg)
+
+    return scheduled_test_matrix
+
+
 def set_output(name: str, val: Any) -> None:
     if os.getenv("GITHUB_OUTPUT"):
         with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
@@ -163,8 +189,7 @@ def main() -> None:
         filtered_test_matrix = test_matrix
 
     if args.event_name == "schedule":
-        for config in filtered_test_matrix.get("include", []):
-            config["mem_leak_check"] = "mem_leak_check"
+        filtered_test_matrix = set_periodic_modes(filtered_test_matrix)
 
     # Set the filtered test matrix as the output
     set_output("test-matrix", json.dumps(filtered_test_matrix))

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -219,9 +219,9 @@ def generate_wheels_matrix(os: str,
                         "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                         "package_type": package_type,
                         "pytorch_extra_install_requirements":
-                        "nvidia-cuda-runtime-cu11;"
-                        "nvidia-cudnn-cu11==8.5.0.96;"
-                        "nvidia-cublas-cu11==11.10.3.66",
+                        "nvidia-cuda-runtime-cu11; platform_system == 'Linux' | "
+                        "nvidia-cudnn-cu11==8.5.0.96; platform_system == 'Linux' | "
+                        "nvidia-cublas-cu11==11.10.3.66; platform_system == 'Linux'",
                         "build_name":
                         f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-with-pypi-cudnn"
                         .replace(

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -154,7 +154,7 @@ class OperatingSystem:
         package_type="libtorch",
         abi_version=generate_binary_build_matrix.PRE_CXX11_ABI,
         build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
-            OperatingSystem.LINUX, generate_binary_build_matrix.CXX11_ABI,
+            OperatingSystem.LINUX, generate_binary_build_matrix.PRE_CXX11_ABI,
             arches=["cpu"],
             libtorch_variants=["shared-with-deps"],
         ),
@@ -277,7 +277,7 @@ class OperatingSystem:
     BinaryBuildWorkflow(
         os=OperatingSystem.MACOS_ARM64,
         package_type="wheel",
-        build_configs=generate_binary_build_matrix.generate_wheels_matrix(OperatingSystem.MACOS),
+        build_configs=generate_binary_build_matrix.generate_wheels_matrix(OperatingSystem.MACOS_ARM64),
         cross_compile_arm64=True,
         ciflow_config=CIFlowConfig(
             labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		bf58902b2fd881c760cd2eeacfae2d7c468ebf1f
		4a310f26049371959617921d0eb9b001f4d262c6
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		7889d2d3be16675943d84e4a4133ed7c245a623f
		50855d7babfa7970cba18528c659989b91c83824