From 6fad282453c158a4afcacebd6cbc91ce2f650011 Mon Sep 17 00:00:00 2001 From: Zhiqiang Ma Date: Tue, 16 Jan 2024 15:28:01 -0800 Subject: [PATCH 1/2] Add default input values for stream-triad-modified-*.cpp and spec-const3.cpp so they don't need to take user interaction for inputs by default if no arguments are specified and added default input values for stream-triad-modified-*.cpp and spec-const3.cpp so they don't need to take user interaction for inputs by default if no arguments are specified --- .../GPU-Opt-Guide/exec-model/vaddsync.cpp | 33 +++++++++++++++---- .../GPU-Opt-Guide/jitting/spec-const3.cpp | 11 +++++-- .../stream-triad-modified-constant.cpp | 5 ++- .../stream-triad-modified-runtime-var.cpp | 5 ++- .../stream-triad-modified-spec-const.cpp | 5 ++- 5 files changed, 41 insertions(+), 18 deletions(-) diff --git a/Publications/GPU-Opt-Guide/exec-model/vaddsync.cpp b/Publications/GPU-Opt-Guide/exec-model/vaddsync.cpp index 012b13b8fe..efb5e270f3 100644 --- a/Publications/GPU-Opt-Guide/exec-model/vaddsync.cpp +++ b/Publications/GPU-Opt-Guide/exec-model/vaddsync.cpp @@ -114,11 +114,32 @@ int main() { std::cout << "Running on device: " << q.get_device().get_info() << "\n"; + + auto sgsizes = q.get_device().get_info(); + + constexpr int sgsize = 16; + bool supported = false; + std::cout << "Sub-group sizes supported:"; + for (auto sz : sgsizes) { + std::cout << " " << sz; + if (sz == sgsize) { + supported = true; + } + } + std::cout << std::endl; + + if (!supported) { + std::cout << "Sub-group size " << sgsize << " is not supported. Please change sgsize to one of the supported sizes" + << std::endl; + return 0; + } + + std::cout << "Using sub-group size " << sgsize << std::endl; std::cout << "Vector size: " << a.size() << "\n"; // check results Initialize(sum); - VectorAdd3<6, 320, 8>(q, a, b, sum, 1); + VectorAdd3<6, 320, sgsize>(q, a, b, sum, 1); for (int i = 0; i < mysize; i++) if (sum[i] != 2 * i) { @@ -126,7 +147,7 @@ int main() { } Initialize(sum); - VectorAdd4<6, 320, 8>(q, a, b, sum, 1); + VectorAdd4<6, 320, sgsize>(q, a, b, sum, 1); for (int i = 0; i < mysize; i++) if (sum[i] != 2 * i) { std::cout << "add4 Did not match\n"; @@ -134,16 +155,16 @@ int main() { // group1 Initialize(sum); - VectorAdd3<8, 320, 8>(q, a, b, sum, 10000); + VectorAdd3<8, 320, sgsize>(q, a, b, sum, 10000); Initialize(sum); - VectorAdd4<8, 320, 8>(q, a, b, sum, 10000); + VectorAdd4<8, 320, sgsize>(q, a, b, sum, 10000); // end group1 // group2 Initialize(sum); - VectorAdd3<24, 224, 8>(q, a, b, sum, 10000); + VectorAdd3<24, 224, sgsize>(q, a, b, sum, 10000); Initialize(sum); - VectorAdd4<24, 224, 8>(q, a, b, sum, 10000); + VectorAdd4<24, 224, sgsize>(q, a, b, sum, 10000); // end group2 return 0; } diff --git a/Publications/GPU-Opt-Guide/jitting/spec-const3.cpp b/Publications/GPU-Opt-Guide/jitting/spec-const3.cpp index 7b0c463c11..dddb286ce2 100644 --- a/Publications/GPU-Opt-Guide/jitting/spec-const3.cpp +++ b/Publications/GPU-Opt-Guide/jitting/spec-const3.cpp @@ -11,7 +11,7 @@ class SpecializedKernel; // Identify the specialization constant. constexpr sycl::specialization_id nx_sc; -int main() { +int main(int argc, char *argv[]) { sycl::queue queue; std::cout << "Running on " @@ -23,8 +23,13 @@ int main() { // Application execution stops here asking for input from user int Nx; - std::cout << "Enter input number ..." << std::endl; - std::cin >> Nx; + if (argc > 1) { + Nx = std::stoi(argv[1]); + } else { + Nx = 1024; + } + + std::cout << "Nx = " << Nx << std::endl; queue.submit([&](sycl::handler &h) { sycl::accessor acc(buf, h, sycl::write_only, sycl::no_init); diff --git a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-constant.cpp b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-constant.cpp index 40634dc488..5e19c9edd9 100644 --- a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-constant.cpp +++ b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-constant.cpp @@ -105,9 +105,8 @@ int main(int argc, char *argv[]) { array_size = std::stoi(argv[1]); inner_loop_size = std::stoi(argv[2]); } else { - std::cout - << "Run as ./ \n"; - return 1; + array_size = 134217728; + inner_loop_size = 10; } std::cout << "Running with stream size of " << array_size << " elements (" << (array_size * sizeof(double)) / (double)1024 / 1024 << "MB)\n"; diff --git a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-runtime-var.cpp b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-runtime-var.cpp index 7e834b336c..dd775c7922 100644 --- a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-runtime-var.cpp +++ b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-runtime-var.cpp @@ -105,9 +105,8 @@ int main(int argc, char *argv[]) { array_size = std::stoi(argv[1]); inner_loop_size = std::stoi(argv[2]); } else { - std::cout - << "Run as ./ \n"; - return 1; + array_size = 134217728; + inner_loop_size = 10; } std::cout << "Running with stream size of " << array_size << " elements (" << (array_size * sizeof(double)) / (double)1024 / 1024 << "MB)\n"; diff --git a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-spec-const.cpp b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-spec-const.cpp index 457cc4d5ce..96190fc34e 100644 --- a/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-spec-const.cpp +++ b/Publications/GPU-Opt-Guide/jitting/stream-triad-modified-spec-const.cpp @@ -110,9 +110,8 @@ int main(int argc, char *argv[]) { array_size = std::stoi(argv[1]); inner_loop_size = std::stoi(argv[2]); } else { - std::cout - << "Run as ./ \n"; - return 1; + array_size = 134217728; + inner_loop_size = 10; } std::cout << "Running with stream size of " << array_size << " elements (" << (array_size * sizeof(double)) / (double)1024 / 1024 << "MB)\n"; From 8ba12d29c00cf9e87ed43f806e3b9d7cb81ef6dc Mon Sep 17 00:00:00 2001 From: Zhiqiang Ma Date: Mon, 22 Jan 2024 14:52:23 -0800 Subject: [PATCH 2/2] Fix ONSAM-1799 --- .../explicit-scaling/14_explicit_subsubdevice/ccs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Publications/GPU-Opt-Guide/explicit-scaling/14_explicit_subsubdevice/ccs.cpp b/Publications/GPU-Opt-Guide/explicit-scaling/14_explicit_subsubdevice/ccs.cpp index f703e2bdf6..d3912f7114 100644 --- a/Publications/GPU-Opt-Guide/explicit-scaling/14_explicit_subsubdevice/ccs.cpp +++ b/Publications/GPU-Opt-Guide/explicit-scaling/14_explicit_subsubdevice/ccs.cpp @@ -15,6 +15,7 @@ int main() { size_t num_of_ccs; if (part_prop.empty()) { num_of_tiles = 1; + subdevices->push_back(d); } else { for (int i = 0; i < part_prop.size(); i++) { if (part_prop[i] == @@ -48,12 +49,11 @@ int main() { } else { for (int i = 0; i < part_prop1.size(); i++) { if (part_prop1[i] == - sycl::info::partition_property::partition_by_affinity_domain) { + sycl::info::partition_property::ext_intel_partition_by_cslice) { auto ccses = (*subdevices)[j] .create_sub_devices( - sycl::info::partition_affinity_domain::numa); + ext_intel_partition_by_cslice>(); num_of_ccs = ccses.size(); for (int k = 0; k < num_of_ccs; k++) CCS->push_back(ccses[k]);