Skip to content

Commit 9a53f20

Browse files
performance(debugger): optimize ATT event handling
- check threads with attention state before reading SR_IDENT - memory read is costly, for threads already in stopped state there is no need to read sr_ident and check thread state again - single stepping one thread performance is substantially improved, by the factor of 6x Related-To: NEO-8183 Signed-off-by: Mateusz Hoppe <[email protected]>
1 parent 204e5fd commit 9a53f20

File tree

8 files changed

+203
-4
lines changed

8 files changed

+203
-4
lines changed

level_zero/tools/source/debug/debug_session_imp.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,4 +1393,21 @@ ze_result_t DebugSessionImp::waitForCmdReady(EuThread::ThreadId threadId, uint16
13931393
return ZE_RESULT_SUCCESS;
13941394
}
13951395

1396+
void DebugSessionImp::getNotStoppedThreads(const std::vector<EuThread::ThreadId> &threadsWithAtt, std::vector<EuThread::ThreadId> &notStoppedThreads) {
1397+
for (const auto &threadId : threadsWithAtt) {
1398+
1399+
bool wasStopped = false;
1400+
1401+
if (tileSessionsEnabled) {
1402+
wasStopped = tileSessions[threadId.tileIndex].first->allThreads[threadId]->isStopped();
1403+
} else {
1404+
wasStopped = allThreads[threadId]->isStopped();
1405+
}
1406+
1407+
if (!wasStopped) {
1408+
notStoppedThreads.push_back(threadId);
1409+
}
1410+
}
1411+
}
1412+
13961413
} // namespace L0

level_zero/tools/source/debug/debug_session_imp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct DebugSessionImp : DebugSession {
5858
tileAttachEnabled = false;
5959
}
6060
}
61+
void getNotStoppedThreads(const std::vector<EuThread::ThreadId> &threadsWithAtt, std::vector<EuThread::ThreadId> &notStoppedThreads);
6162

6263
virtual void attachTile() = 0;
6364
virtual void detachTile() = 0;

level_zero/tools/source/debug/linux/prelim/debug_session.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,8 +1325,14 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte
13251325
}
13261326

13271327
if (gpuVa != 0 && stateSaveAreaSize != 0) {
1328-
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
1329-
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
1328+
1329+
std::vector<EuThread::ThreadId> newThreads;
1330+
getNotStoppedThreads(threadsWithAttention, newThreads);
1331+
1332+
if (newThreads.size() > 0) {
1333+
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
1334+
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
1335+
}
13301336
} else {
13311337
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
13321338
DEBUG_BREAK_IF(true);

level_zero/tools/source/debug/windows/debug_session.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,12 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU
272272
std::unique_lock<std::mutex> lock(threadStateMutex);
273273

274274
if (gpuVa != 0 && stateSaveAreaSize != 0) {
275-
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
276-
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
275+
std::vector<EuThread::ThreadId> newThreads;
276+
getNotStoppedThreads(threadsWithAttention, newThreads);
277+
if (newThreads.size() > 0) {
278+
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
279+
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
280+
}
277281
} else {
278282
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
279283
DEBUG_BREAK_IF(true);

level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,6 +1756,33 @@ TEST_F(DebugSessionTest, givenTssMagicCorruptedWhenStateSaveAreIsReadThenHeaderI
17561756
EXPECT_TRUE(session->stateSaveAreaHeader.empty());
17571757
}
17581758

1759+
TEST(DebugSessionTest, givenStoppedThreadWhenGettingNotStoppedThreadsThenOnlyRunningOrUnavailableThreadsAreReturned) {
1760+
zet_debug_config_t config = {};
1761+
config.pid = 0x1234;
1762+
auto hwInfo = *NEO::defaultHwInfo.get();
1763+
1764+
NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
1765+
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());
1766+
1767+
auto sessionMock = std::make_unique<MockDebugSession>(config, &deviceImp);
1768+
1769+
EuThread::ThreadId thread(0, 0, 0, 0, 0);
1770+
sessionMock->allThreads[thread]->stopThread(1u);
1771+
EuThread::ThreadId thread1(0, 0, 0, 0, 1);
1772+
EXPECT_FALSE(sessionMock->allThreads[thread1]->isStopped());
1773+
1774+
std::vector<EuThread::ThreadId> threadsWithAtt;
1775+
std::vector<EuThread::ThreadId> newStops;
1776+
1777+
threadsWithAtt.push_back(thread);
1778+
threadsWithAtt.push_back(thread1);
1779+
1780+
sessionMock->getNotStoppedThreads(threadsWithAtt, newStops);
1781+
1782+
ASSERT_EQ(1u, newStops.size());
1783+
EXPECT_EQ(thread1, newStops[0]);
1784+
}
1785+
17591786
using MultiTileDebugSessionTest = Test<MultipleDevicesWithCustomHwInfo>;
17601787

17611788
TEST_F(MultiTileDebugSessionTest, givenThreadsFromMultipleTilesWhenResumeCalledThenThreadsResumedInAllTiles) {
@@ -3423,6 +3450,35 @@ TEST_F(MultiTileDebugSessionTest, givenAttachedRootDeviceWhenAttachingToTiletDev
34233450
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
34243451
}
34253452

3453+
TEST_F(MultiTileDebugSessionTest, givenTileSessionAndStoppedThreadWhenGettingNotStoppedThreadsThenOnlyRunningOrUnavailableThreadsReturned) {
3454+
zet_debug_config_t config = {};
3455+
config.pid = 0x1234;
3456+
3457+
L0::Device *device = driverHandle->devices[0];
3458+
auto neoDevice = device->getNEODevice();
3459+
auto deviceImp = static_cast<DeviceImp *>(device);
3460+
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(new OsInterfaceWithDebugAttach);
3461+
3462+
auto sessionMock = new MockDebugSession(config, device, false);
3463+
sessionMock->initialize();
3464+
deviceImp->setDebugSession(sessionMock);
3465+
3466+
EuThread::ThreadId thread(0, 0, 0, 0, 0);
3467+
static_cast<MockDebugSession *>(sessionMock->tileSessions[0].first)->allThreads[thread]->stopThread(1u);
3468+
EuThread::ThreadId thread1(0, 0, 0, 0, 1);
3469+
EXPECT_FALSE(static_cast<MockDebugSession *>(sessionMock->tileSessions[0].first)->allThreads[thread1]->isStopped());
3470+
3471+
std::vector<EuThread::ThreadId> threadsWithAtt;
3472+
std::vector<EuThread::ThreadId> newStops;
3473+
3474+
threadsWithAtt.push_back(thread);
3475+
threadsWithAtt.push_back(thread1);
3476+
3477+
sessionMock->getNotStoppedThreads(threadsWithAtt, newStops);
3478+
ASSERT_EQ(1u, newStops.size());
3479+
EXPECT_EQ(thread1, newStops[0]);
3480+
}
3481+
34263482
struct AffinityMaskForSingleSubDevice : MultipleDevicesWithCustomHwInfo {
34273483
void setUp() {
34283484
DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");

level_zero/tools/test/unit_tests/sources/debug/linux/test_debug_api_linux.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6527,6 +6527,70 @@ TEST_F(DebugApiLinuxAttentionTest, GivenNoStateSaveAreaOrInvalidSizeWhenHandling
65276527
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
65286528
}
65296529

6530+
TEST_F(DebugApiLinuxAttentionTest, GivenAlreadyStoppedThreadsWhenHandlingAttEventThenStateSaveAreaIsNotRead) {
6531+
zet_debug_config_t config = {};
6532+
config.pid = 0x1234;
6533+
6534+
auto sessionMock = std::make_unique<MockDebugSessionLinux>(config, device, 10);
6535+
ASSERT_NE(nullptr, sessionMock);
6536+
sessionMock->clientHandle = MockDebugSessionLinux::mockClientHandle;
6537+
auto handler = new MockIoctlHandler;
6538+
sessionMock->ioctlHandler.reset(handler);
6539+
SIP::version version = {2, 0, 0};
6540+
initStateSaveArea(sessionMock->stateSaveAreaHeader, version, device);
6541+
handler->setPreadMemory(sessionMock->stateSaveAreaHeader.data(), sessionMock->stateSaveAreaHeader.size(), 0x1000);
6542+
6543+
uint64_t ctxHandle = 2;
6544+
uint64_t vmHandle = 7;
6545+
uint64_t lrcHandle = 8;
6546+
6547+
sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->contextsCreated[ctxHandle].vm = vmHandle;
6548+
sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->lrcToContextHandle[lrcHandle] = ctxHandle;
6549+
DebugSessionLinux::BindInfo cssaInfo = {0x1000, sessionMock->stateSaveAreaHeader.size()};
6550+
sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->vmToContextStateSaveAreaBindInfo[vmHandle] = cssaInfo;
6551+
6552+
uint8_t data[sizeof(prelim_drm_i915_debug_event_eu_attention) + 128];
6553+
auto &hwInfo = neoDevice->getHardwareInfo();
6554+
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
6555+
std::unique_ptr<uint8_t[]> bitmask;
6556+
size_t bitmaskSize = 0;
6557+
6558+
std::vector<EuThread::ThreadId> threads{
6559+
{0, 0, 0, 0, 0}, {0, 0, 0, 0, 2}};
6560+
6561+
// bitmask returned in ATT event - 2 threads
6562+
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, bitmaskSize);
6563+
auto threadsWithAtt = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), bitmaskSize);
6564+
6565+
for (const auto &thread : threadsWithAtt) {
6566+
sessionMock->stoppedThreads[thread.packed] = 1;
6567+
sessionMock->allThreads[thread]->stopThread(vmHandle);
6568+
}
6569+
6570+
prelim_drm_i915_debug_event_eu_attention attention = {};
6571+
attention.base.type = PRELIM_DRM_I915_DEBUG_EVENT_EU_ATTENTION;
6572+
attention.base.flags = PRELIM_DRM_I915_DEBUG_EVENT_STATE_CHANGE;
6573+
attention.base.size = sizeof(prelim_drm_i915_debug_event_eu_attention) + std::min(uint32_t(128), static_cast<uint32_t>(bitmaskSize));
6574+
attention.base.seqno = 2;
6575+
attention.client_handle = MockDebugSessionLinux::mockClientHandle;
6576+
attention.lrc_handle = lrcHandle;
6577+
attention.flags = 0;
6578+
attention.ci.engine_class = 0;
6579+
attention.ci.engine_instance = 0;
6580+
attention.bitmask_size = std::min(uint32_t(128), static_cast<uint32_t>(bitmaskSize));
6581+
6582+
memcpy(data, &attention, sizeof(prelim_drm_i915_debug_event_eu_attention));
6583+
memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_eu_attention, bitmask)), bitmask.get(), std::min(size_t(128), bitmaskSize));
6584+
6585+
sessionMock->handleEvent(reinterpret_cast<prelim_drm_i915_debug_event *>(data));
6586+
6587+
EXPECT_EQ(0u, sessionMock->addThreadToNewlyStoppedFromRaisedAttentionCallCount);
6588+
EXPECT_EQ(0u, sessionMock->newlyStoppedThreads.size());
6589+
6590+
EXPECT_FALSE(handler->preadCalled);
6591+
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
6592+
}
6593+
65306594
using DebugApiLinuxAsyncThreadTest = Test<DebugApiLinuxFixture>;
65316595

65326596
TEST_F(DebugApiLinuxAsyncThreadTest, GivenPollReturnsErrorAndEinvalWhenReadingInternalEventsAsyncThenDetachEventIsGenerated) {

level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ struct MockDebugSession : public L0::DebugSessionImp {
208208
auto subDevice = connectedDevice->getNEODevice()->getSubDevice(i)->getSpecializedDevice<Device>();
209209
tileSessions[i] = std::pair<DebugSessionImp *, bool>{new MockDebugSession(config, subDevice), false};
210210
}
211+
tileSessionsEnabled = true;
211212
}
212213

213214
return ZE_RESULT_SUCCESS;

level_zero/tools/test/unit_tests/sources/debug/windows/test_debug_api_windows.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,56 @@ TEST_F(DebugApiWindowsAttentionTest, GivenEuAttentionEventForThreadsWhenHandling
230230
EXPECT_EQ(expectedThreads, sessionMock->newlyStoppedThreads.size());
231231
}
232232

233+
TEST_F(DebugApiWindowsAttentionTest, GivenAlreadyStoppedThreadsWhenHandlingAttEventThenStateSaveAreaIsNotRead) {
234+
zet_debug_config_t config = {};
235+
config.pid = 0x1234;
236+
237+
std::unique_ptr<uint8_t[]> bitmask;
238+
size_t bitmaskSize = 0;
239+
auto &hwInfo = neoDevice->getHardwareInfo();
240+
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
241+
242+
std::vector<EuThread::ThreadId> threads{
243+
{0, 0, 0, 0, 0},
244+
{0, 0, 0, 0, 1},
245+
};
246+
247+
auto sessionMock = std::make_unique<MockDebugSessionWindows>(config, device);
248+
for (auto thread : threads) {
249+
sessionMock->stoppedThreads[thread.packed] = 1;
250+
}
251+
sessionMock->allContexts.insert(0x12345);
252+
253+
SIP::version version = {2, 0, 0};
254+
initStateSaveArea(sessionMock->stateSaveAreaHeader, version, device);
255+
sessionMock->stateSaveAreaCaptured = true;
256+
sessionMock->stateSaveAreaVA.store(reinterpret_cast<uint64_t>(sessionMock->stateSaveAreaHeader.data()));
257+
sessionMock->stateSaveAreaSize.store(sessionMock->stateSaveAreaHeader.size());
258+
259+
mockWddm->srcReadBuffer = sessionMock->stateSaveAreaHeader.data();
260+
mockWddm->srcReadBufferBaseAddress = sessionMock->stateSaveAreaVA.load();
261+
262+
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, bitmaskSize);
263+
auto threadsWithAtt = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), bitmaskSize);
264+
265+
for (const auto &thread : threadsWithAtt) {
266+
sessionMock->stoppedThreads[thread.packed] = 1;
267+
sessionMock->allThreads[thread]->stopThread(0);
268+
}
269+
270+
mockWddm->numEvents = 1;
271+
mockWddm->eventQueue[0].readEventType = DBGUMD_READ_EVENT_EU_ATTN_BIT_SET;
272+
copyBitmaskToEventParams(&mockWddm->eventQueue[0].eventParamsBuffer.eventParamsBuffer, bitmask, bitmaskSize);
273+
sessionMock->wddm = mockWddm;
274+
sessionMock->debugHandle = MockDebugSessionWindows::mockDebugHandle;
275+
276+
auto result = sessionMock->readAndHandleEvent(100);
277+
278+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
279+
EXPECT_EQ(0u, mockWddm->dbgUmdEscapeActionCalled[DBGUMD_ACTION_READ_GFX_MEMORY]);
280+
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
281+
}
282+
233283
TEST_F(DebugApiWindowsAttentionTest, GivenNoContextWhenHandlingAttentionEventThenErrorIsReturned) {
234284
zet_debug_config_t config = {};
235285
config.pid = 0x1234;

0 commit comments

Comments
 (0)