Skip to content

Commit 254972a

Browse files
Enable thread::hardware_concurrency() to handle more than 64 processors (#5459)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 16f26c8 commit 254972a

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

stl/msbuild/stl_base/msvcp.settings.targets

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7171
<TargetLib Include="$(CrtLibPath)\msvcrt$(BuildSuffix)$(ClrLibSuffix).lib"/>
7272
<TargetLib Include="$(CrtLibPath)\vcruntime$(BuildSuffix)$(ClrLibSuffix).lib"/>
7373
<TargetLib Include="$(UniversalCRTLib)"/>
74+
<TargetLib Condition="'$(BuildArchitecture)' == 'arm64' or '$(BuildArchitecture)' == 'arm64ec'" Include="$(SdkLibPath)\softintrin.lib"/>
7475
</ItemGroup>
7576

7677
<!-- Copy the output dll and pdb to various destinations -->

stl/src/cthread.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
// Copyright (c) Microsoft Corporation.
22
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
33

4+
#include <bit>
45
#include <cstdint>
56
#include <cstdio>
67
#include <cstdlib>
78
#include <cstring>
9+
#include <memory>
810
#include <process.h>
911
#include <xthreads.h>
1012

@@ -103,9 +105,55 @@ _CRTIMP2_PURE _Thrd_id_t __cdecl _Thrd_id() noexcept { // return unique id for c
103105
}
104106

105107
_CRTIMP2_PURE unsigned int __cdecl _Thrd_hardware_concurrency() noexcept { // return number of processors
106-
SYSTEM_INFO info;
107-
GetNativeSystemInfo(&info);
108-
return info.dwNumberOfProcessors;
108+
// Most devices have only one processor group and thus have the same buffer_size.
109+
#ifdef _WIN64
110+
constexpr int stack_buffer_size = 48; // 16 bytes per group
111+
#else // ^^^ 64-bit / 32-bit vvv
112+
constexpr int stack_buffer_size = 44; // 12 bytes per group
113+
#endif // ^^^ 32-bit ^^^
114+
115+
alignas(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) unsigned char stack_buffer[stack_buffer_size];
116+
unsigned char* buffer_ptr = stack_buffer;
117+
DWORD buffer_size = stack_buffer_size;
118+
_STD unique_ptr<unsigned char[]> new_buffer;
119+
120+
// https://learn.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformationex
121+
// The buffer "receives a sequence of variable-sized SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX structures".
122+
for (;;) {
123+
if (GetLogicalProcessorInformationEx(RelationProcessorPackage,
124+
reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer_ptr), &buffer_size)) {
125+
unsigned int logical_processors = 0;
126+
127+
while (buffer_size > 0) {
128+
// Each structure in the buffer describes a processor package (aka socket)...
129+
const auto structure_ptr = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer_ptr);
130+
const auto structure_size = structure_ptr->Size;
131+
132+
// ... which contains one or more processor groups.
133+
for (WORD i = 0; i != structure_ptr->Processor.GroupCount; ++i) {
134+
logical_processors += _STD popcount(structure_ptr->Processor.GroupMask[i].Mask);
135+
}
136+
137+
// Step forward to the next structure in the buffer.
138+
buffer_ptr += structure_size;
139+
buffer_size -= structure_size;
140+
}
141+
142+
return logical_processors;
143+
}
144+
145+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
146+
return 0; // API failure
147+
}
148+
149+
new_buffer.reset(::new (_STD nothrow) unsigned char[buffer_size]);
150+
151+
if (!new_buffer) {
152+
return 0; // allocation failure
153+
}
154+
155+
buffer_ptr = new_buffer.get();
156+
}
109157
}
110158

111159
// TRANSITION, ABI: _Thrd_create() is preserved for binary compatibility

0 commit comments

Comments
 (0)