Skip to content
97 changes: 97 additions & 0 deletions sycl/test/host-interop-task/host-task-dependency2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out

// RUNx: %CPU_RUN_PLACEHOLDER %t.out
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

// RUNx: %CPU_RUN_PLACEHOLDER %t.out 10
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 10
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 10

#include <CL/sycl.hpp>
#include <iostream>

using namespace cl::sycl;
using namespace cl::sycl::access;

static constexpr size_t BUFFER_SIZE = 1024;

static auto EH = [](exception_list EL) {
for (const std::exception_ptr &E : EL) {
throw E;
}
};

// Host-task depending on another host-task via handler::depends_on() only
// should not hang
void test(size_t Count) {
queue Q(EH);

static constexpr size_t BufferSize = 10 * 1024;

buffer<int, 1> B0{range<1>{BufferSize}};
buffer<int, 1> B1{range<1>{BufferSize}};
buffer<int, 1> B2{range<1>{BufferSize}};
buffer<int, 1> B3{range<1>{BufferSize}};
buffer<int, 1> B4{range<1>{BufferSize}};
buffer<int, 1> B5{range<1>{BufferSize}};

for (size_t Idx = 1; Idx <= Count; ++Idx) {
// This host task should be submitted without hesitation
event E1 = Q.submit([&](handler &CGH) {
std::cout << "Submit 1" << std::endl;

auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc0[0] = 1 * Idx;
Acc1[0] = 2 * Idx;
Acc2[0] = 3 * Idx;
});
});

// This host task is going to depend on blocked empty node of the first
// host-task (via buffer #2). Still this one should be enqueued.
event E2 = Q.submit([&](handler &CGH) {
std::cout << "Submit 2" << std::endl;

auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc2[1] = 1 * Idx;
Acc3[1] = 2 * Idx;
});
});

// This host-task only depends on the second host-task via
// handler::depends_on(). This one should not hang and should be eexecuted
// after host-task #2.
event E3 = Q.submit([&](handler &CGH) {
CGH.depends_on(E2);

std::cout << "Submit 3" << std::endl;

auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc4[2] = 1 * Idx;
Acc5[2] = 2 * Idx;
});
});
}

Q.wait_and_throw();
}

int main(int Argc, const char *Argv[]) {
size_t Count = 1;
if (Argc > 1)
Count = std::stoi(Argv[1]);

test(Count);
return 0;
}
126 changes: 126 additions & 0 deletions sycl/test/host-interop-task/host-task-dependency3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out

// RUNx: %CPU_RUN_PLACEHOLDER %t.out
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

// RUNx: %CPU_RUN_PLACEHOLDER %t.out 10
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 10
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 10

#include <CL/sycl.hpp>
#include <chrono>
#include <iostream>
#include <thread>

using namespace cl::sycl;
using namespace cl::sycl::access;

static constexpr size_t BUFFER_SIZE = 1024;

static auto EH = [](exception_list EL) {
for (const std::exception_ptr &E : EL) {
throw E;
}
};

// Host-task depending on another host-task via handler::depends_on() only
// should not hang. A bit more complicated case with kernels depending on
// host-task being involved.
void test(size_t Count) {
queue Q(EH);

static constexpr size_t BufferSize = 10 * 1024;

buffer<int, 1> B0{range<1>{BufferSize}};
buffer<int, 1> B1{range<1>{BufferSize}};
buffer<int, 1> B2{range<1>{BufferSize}};
buffer<int, 1> B3{range<1>{BufferSize}};
buffer<int, 1> B4{range<1>{BufferSize}};
buffer<int, 1> B5{range<1>{BufferSize}};

using namespace std::chrono_literals;
constexpr auto SleepFor = 1s;

for (size_t Idx = 1; Idx <= Count; ++Idx) {
// This host task should be submitted without hesitation
Q.submit([&](handler &CGH) {
std::cout << "Submit HT-1" << std::endl;

auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
std::this_thread::sleep_for(SleepFor);
Acc0[0] = 1 * Idx;
});
});

Q.submit([&](handler &CGH) {
std::cout << "Submit Kernel-1" << std::endl;

auto Acc0 = B0.get_access<mode::read_write>(CGH);

CGH.single_task<class Test5_Kernel1>([=] { Acc0[1] = 1 * Idx; });
});

Q.submit([&](handler &CGH) {
std::cout << "Submit Kernel-2" << std::endl;

auto Acc1 = B1.get_access<mode::read_write>(CGH);

CGH.single_task<class Test5_Kernel2>([=] { Acc1[2] = 1 * Idx; });
});

Q.submit([&](handler &CGH) {
std::cout << "Submit HT-2" << std::endl;

auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
std::this_thread::sleep_for(SleepFor);
Acc2[3] = 1 * Idx;
});
});

// This host task is going to depend on blocked empty node of the second
// host-task (via buffer #0). Still this one should be enqueued.
event EHT3 = Q.submit([&](handler &CGH) {
std::cout << "Submit HT-3" << std::endl;

auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
std::this_thread::sleep_for(SleepFor);
Acc0[4] = 1 * Idx;
Acc1[4] = 2 * Idx;
Acc2[4] = 3 * Idx;
});
});

// This host-task only depends on the third host-task via
// handler::depends_on(). This one should not hang and should be executed
// after host-task #3.
Q.submit([&](handler &CGH) {
std::cout << "Submit HT-4" << std::endl;

CGH.depends_on(EHT3);

auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] { Acc5[5] = 1 * Idx; });
});
}

Q.wait_and_throw();
}

int main(int Argc, const char *Argv[]) {
size_t Count = 1;
if (Argc > 1)
Count = std::stoi(Argv[1]);

test(Count);
return 0;
}
30 changes: 30 additions & 0 deletions sycl/test/host-interop-task/host-task-dependency4.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out

// RUNx: %CPU_RUN_PLACEHOLDER %t.out
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>

cl::sycl::event submit(cl::sycl::queue &Q, cl::sycl::buffer<int> &B) {
return Q.submit([&](cl::sycl::handler &CGH) {
auto A = B.template get_access<cl::sycl::access::mode::read_write>(CGH);
CGH.codeplay_host_task([=]() { (void)A; });
});
}

int main() {
cl::sycl::queue Q;
int Status = 0;
cl::sycl::buffer<int> A{&Status, 1};
cl::sycl::vector_class<cl::sycl::event> Events;

Events.push_back(submit(Q, A));
Events.push_back(submit(Q, A));
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(Events);
CGH.codeplay_host_task([&] { printf("all done\n"); });
}).wait_and_throw();

return 0;
}
72 changes: 1 addition & 71 deletions sycl/test/host-interop-task/host-task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out 3
// RUN: %ACC_RUN_PLACEHOLDER %t.out 3

// RUNx: %CPU_RUN_PLACEHOLDER %t.out 4
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 4
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 4

#include <CL/sycl.hpp>
#include <chrono>
#include <iostream>
Expand Down Expand Up @@ -103,7 +99,6 @@ void test3() {

std::vector<event> Deps;

using namespace std::chrono_literals;
static constexpr size_t Count = 10;

auto Start = std::chrono::steady_clock::now();
Expand Down Expand Up @@ -146,74 +141,12 @@ void test3() {
Q.wait_and_throw();
auto End = std::chrono::steady_clock::now();

using namespace std::chrono_literals;
constexpr auto Threshold = 2s;

assert(End - Start < Threshold && "Host tasks were waiting for too long");
}

// Host-task depending on another host-task via handler::depends_on() only
// should not hang
void test4() {
queue Q(EH);

static constexpr size_t BufferSize = 10 * 1024;

buffer<int, 1> B0{range<1>{BufferSize}};
buffer<int, 1> B1{range<1>{BufferSize}};
buffer<int, 1> B2{range<1>{BufferSize}};
buffer<int, 1> B3{range<1>{BufferSize}};
buffer<int, 1> B4{range<1>{BufferSize}};
buffer<int, 1> B5{range<1>{BufferSize}};

// This host task should be submitted without hesitation
event E1 = Q.submit([&](handler &CGH) {
std::cout << "Submit 1" << std::endl;

auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc0[0] = 1;
Acc1[0] = 2;
Acc2[0] = 3;
});
});

// This host task is going to depend on blocked empty node of the first
// host-task (via buffer #2). Still this one should be enqueued.
event E2 = Q.submit([&](handler &CGH) {
std::cout << "Submit 2" << std::endl;

auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc2[1] = 1;
Acc3[1] = 2;
});
});

// This host-task only depends on the second host-task via
// handler::depends_on(). This one should not hang and should be enqueued
// after host-task #2.
event E3 = Q.submit([&](handler &CGH) {
CGH.depends_on(E2);

std::cout << "Submit 3" << std::endl;

auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);

CGH.codeplay_host_task([=] {
Acc4[2] = 1;
Acc5[2] = 2;
});
});

Q.wait_and_throw();
}

int main(int Argc, const char *Argv[]) {
if (Argc < 2)
return 1;
Expand All @@ -230,9 +163,6 @@ int main(int Argc, const char *Argv[]) {
case 3:
test3();
break;
case 4:
test4();
break;
default:
return 1;
}
Expand Down