Intel(R) Threading Building Blocks Doxygen Documentation version 4.2.3
Loading...
Searching...
No Matches
tbb_misc_ex.cpp
Go to the documentation of this file.
1/*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17// Source file for miscellaneous entities that are infrequently referenced by
18// an executing program, and implementation of which requires dynamic linking.
19
20#include "tbb_misc.h"
21
22#if !defined(__TBB_HardwareConcurrency)
23
24#include "dynamic_link.h"
25#include <stdio.h>
26#include <limits.h>
27
28#if _WIN32||_WIN64
30#if __TBB_WIN8UI_SUPPORT
31#include <thread>
32#endif
33#else
34#include <unistd.h>
35#if __linux__
36#include <sys/sysinfo.h>
37#include <string.h>
38#include <sched.h>
39#include <errno.h>
40#elif __sun
41#include <sys/sysinfo.h>
42#elif __FreeBSD__
43#include <errno.h>
44#include <string.h>
45#include <sys/param.h> // Required by <sys/cpuset.h>
46#include <sys/cpuset.h>
47#endif
48#endif
49
50namespace tbb {
51namespace internal {
52
53#if __TBB_USE_OS_AFFINITY_SYSCALL
54
55#if __linux__
56// Handlers for interoperation with libiomp
57static int (*libiomp_try_restoring_original_mask)();
58// Table for mapping to libiomp entry points
59static const dynamic_link_descriptor iompLinkTable[] = {
60 DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
61};
62#endif
63
64static void set_thread_affinity_mask( size_t maskSize, const basic_mask_t* threadMask ) {
65#if __linux__
66 if( sched_setaffinity( 0, maskSize, threadMask ) )
67#else /* FreeBSD */
68 if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
69#endif
70 runtime_warning( "setaffinity syscall failed" );
71}
72
73static void get_thread_affinity_mask( size_t maskSize, basic_mask_t* threadMask ) {
74#if __linux__
75 if( sched_getaffinity( 0, maskSize, threadMask ) )
76#else /* FreeBSD */
77 if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
78#endif
79 runtime_warning( "getaffinity syscall failed" );
80}
81
82static basic_mask_t* process_mask;
83static int num_masks;
84
86 if( process_mask ) {
87 delete [] process_mask;
88 }
89}
90
91#define curMaskSize sizeof(basic_mask_t) * num_masks
92affinity_helper::~affinity_helper() {
93 if( threadMask ) {
94 if( is_changed ) {
95 set_thread_affinity_mask( curMaskSize, threadMask );
96 }
97 delete [] threadMask;
98 }
99}
100void affinity_helper::protect_affinity_mask( bool restore_process_mask ) {
101 if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity?
102 threadMask = new basic_mask_t [num_masks];
103 memset( threadMask, 0, curMaskSize );
104 get_thread_affinity_mask( curMaskSize, threadMask );
105 if( restore_process_mask ) {
106 __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" );
107 is_changed = memcmp( process_mask, threadMask, curMaskSize );
108 if( is_changed )
109 set_thread_affinity_mask( curMaskSize, process_mask );
110 } else {
111 // Assume that the mask will be changed by the caller.
112 is_changed = 1;
113 }
114 }
115}
117 if( threadMask ) {
118 delete [] threadMask;
119 threadMask = NULL;
120 }
121 is_changed = 0;
122}
123#undef curMaskSize
124
125static atomic<do_once_state> hardware_concurrency_info;
126
127static int theNumProcs;
128
129static void initialize_hardware_concurrency_info () {
130 int err;
131 int availableProcs = 0;
132 int numMasks = 1;
133#if __linux__
134#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
135 int maxProcs = INT_MAX; // To check the entire mask.
136 int pid = 0; // Get the mask of the calling thread.
137#else
138 int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
139 int pid = getpid();
140#endif
141#else /* FreeBSD >= 7.1 */
142 int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
143#endif
144 basic_mask_t* processMask;
145 const size_t BasicMaskSize = sizeof(basic_mask_t);
146 for (;;) {
147 const int curMaskSize = BasicMaskSize * numMasks;
148 processMask = new basic_mask_t[numMasks];
149 memset( processMask, 0, curMaskSize );
150#if __linux__
151 err = sched_getaffinity( pid, curMaskSize, processMask );
152 if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
153 break;
154#else /* FreeBSD >= 7.1 */
155 // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask
156#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
157 err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask );
158#else
159 err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
160#endif
161 if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
162 break;
163#endif /* FreeBSD >= 7.1 */
164 delete[] processMask;
165 numMasks <<= 1;
166 }
167 if ( !err ) {
168 // We have found the mask size and captured the process affinity mask into processMask.
169 num_masks = numMasks; // do here because it's needed for affinity_helper to work
170#if __linux__
171 // For better coexistence with libiomp which might have changed the mask already,
172 // check for its presence and ask it to restore the mask.
173 dynamic_link_handle libhandle;
174 if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) {
175 // We have found the symbol provided by libiomp5 for restoring original thread affinity.
176 affinity_helper affhelp;
177 affhelp.protect_affinity_mask( /*restore_process_mask=*/false );
178 if ( libiomp_try_restoring_original_mask()==0 ) {
179 // Now we have the right mask to capture, restored by libiomp.
180 const int curMaskSize = BasicMaskSize * numMasks;
181 memset( processMask, 0, curMaskSize );
182 get_thread_affinity_mask( curMaskSize, processMask );
183 } else
184 affhelp.dismiss(); // thread mask has not changed
185 dynamic_unlink( libhandle );
186 // Destructor of affinity_helper restores the thread mask (unless dismissed).
187 }
188#endif
189 for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
190 for ( size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
191 if ( CPU_ISSET( i, processMask + m ) )
192 ++availableProcs;
193 }
194 }
195 process_mask = processMask;
196 }
197 else {
198 // Failed to get the process affinity mask; assume the whole machine can be used.
199 availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
200 delete[] processMask;
201 }
202 theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap
203 __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
204}
205
207 atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
208 return theNumProcs;
209}
210
211/* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */
212#elif __ANDROID__
213
214// Work-around for Android that reads the correct number of available CPUs since system calls are unreliable.
215// Format of "present" file is: ([<int>-<int>|<int>],)+
217 FILE *fp = fopen("/sys/devices/system/cpu/present", "r");
218 if (fp == NULL) return 1;
219 int num_args, lower, upper, num_cpus=0;
220 while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) {
221 switch(num_args) {
222 case 2: num_cpus += upper - lower + 1; break;
223 case 1: num_cpus += 1; break;
224 }
225 fscanf(fp, ",");
226 }
227 return (num_cpus > 0) ? num_cpus : 1;
228}
229
230#elif defined(_SC_NPROCESSORS_ONLN)
231
233 int n = sysconf(_SC_NPROCESSORS_ONLN);
234 return (n > 0) ? n : 1;
235}
236
237#elif _WIN32||_WIN64
238
239static atomic<do_once_state> hardware_concurrency_info;
240
241static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
242
243// Statically allocate an array for processor group information.
244// Windows 7 supports maximum 4 groups, but let's look ahead a little.
245static const WORD MaxProcessorGroups = 64;
246
247struct ProcessorGroupInfo {
248 DWORD_PTR mask;
249 int numProcs;
250 int numProcsRunningTotal;
251
253 static int NumGroups;
254
256
262 static int HoleIndex;
263};
264
265int ProcessorGroupInfo::NumGroups = 1;
266int ProcessorGroupInfo::HoleIndex = 0;
267
268ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
269
270struct TBB_GROUP_AFFINITY {
271 DWORD_PTR Mask;
272 WORD Group;
273 WORD Reserved[3];
274};
275
276static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
277static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
278static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
279 const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
280static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
281
282static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = {
283 DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
284 , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
285 , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
286 , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
287};
288
289static void initialize_hardware_concurrency_info () {
290#if __TBB_WIN8UI_SUPPORT
291 // For these applications processor groups info is unavailable
292 // Setting up a number of processors for one processor group
293 theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
294#else /* __TBB_WIN8UI_SUPPORT */
295 dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable,
296 sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) );
297 SYSTEM_INFO si;
298 GetNativeSystemInfo(&si);
299 DWORD_PTR pam, sam, m = 1;
300 GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
301 int nproc = 0;
302 for ( size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
303 if ( pam & m )
304 ++nproc;
305 }
306 __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL );
307 // By default setting up a number of processors for one processor group
308 theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
309 // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present
310 if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
311 // The process does not have restricting affinity mask and multiple processor groups are possible
312 ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount();
313 __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
314 // Fail safety bootstrap. Release versions will limit available concurrency
315 // level, while debug ones would assert.
316 if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
317 ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
318 if ( ProcessorGroupInfo::NumGroups > 1 ) {
319 TBB_GROUP_AFFINITY ga;
320 if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
321 ProcessorGroupInfo::HoleIndex = ga.Group;
322 int nprocs = 0;
323 for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
324 ProcessorGroupInfo &pgi = theProcessorGroups[i];
325 pgi.numProcs = (int)TBB_GetActiveProcessorCount(i);
326 __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL );
327 pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
328 pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
329 }
330 __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
331 }
332 }
333#endif /* __TBB_WIN8UI_SUPPORT */
334
335 PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups);
336 if (ProcessorGroupInfo::NumGroups>1)
337 for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
338 PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs);
339}
340
341int NumberOfProcessorGroups() {
342 __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "NumberOfProcessorGroups is used before AvailableHwConcurrency" );
343 return ProcessorGroupInfo::NumGroups;
344}
345
346// Offset for the slot reserved for the first master thread
347#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx))
348
349int FindProcessorGroupIndex ( int procIdx ) {
350 // In case of oversubscription spread extra workers in a round robin manner
351 int holeIdx;
352 const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
353 if ( procIdx >= numProcs - 1 ) {
354 holeIdx = INT_MAX;
355 procIdx = (procIdx - numProcs + 1) % numProcs;
356 }
357 else
358 holeIdx = ProcessorGroupInfo::HoleIndex;
359 __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "FindProcessorGroupIndex is used before AvailableHwConcurrency" );
360 // Approximate the likely group index assuming all groups are of the same size
361 int i = procIdx / theProcessorGroups[0].numProcs;
362 // Make sure the approximation is a valid group index
363 if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
364 // Now adjust the approximation up or down
365 if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
366 while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
367 __TBB_ASSERT( i > 0, NULL );
368 --i;
369 }
370 }
371 else {
372 do {
373 ++i;
374 } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
375 }
376 __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
377 return i;
378}
379
380void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) {
381 __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" );
382 if ( !TBB_SetThreadGroupAffinity )
383 return;
384 TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
385 TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
386}
387
389 atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
390 return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
391}
392
393/* End of _WIN32||_WIN64 implementation */
394#else
395 #error AvailableHwConcurrency is not implemented for this OS
396#endif
397
398} // namespace internal
399} // namespace tbb
400
401#endif /* !__TBB_HardwareConcurrency */
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void dynamic_unlink(dynamic_link_handle)
OPEN_INTERNAL_NAMESPACE bool dynamic_link(const char *, const dynamic_link_descriptor *, size_t, dynamic_link_handle *handle, int)
void * dynamic_link_handle
Definition: dynamic_link.h:74
const int DYNAMIC_LINK_GLOBAL
Definition: dynamic_link.h:77
#define DLD(s, h)
The helper to construct dynamic_link_descriptor structure.
Definition: dynamic_link.h:56
#define DLD_NOWEAK(s, h)
Definition: dynamic_link.h:57
The graph class.
void __TBB_EXPORTED_FUNC runtime_warning(const char *format,...)
Report a runtime warning.
void atomic_do_once(const F &initializer, atomic< do_once_state > &state)
One-time initialization function.
Definition: tbb_misc.h:213
int AvailableHwConcurrency()
Returns maximal parallelism level supported by the current OS configuration.
void destroy_process_mask()
Definition: tbb_misc.h:266
void PrintExtraVersionInfo(const char *category, const char *format,...)
Prints arbitrary extra TBB version information on stderr.
Definition: tbb_misc.cpp:211
@ initialization_complete
Convenience alias.
Definition: tbb_misc.h:201
Association between a handler name and location of pointer to it.
Definition: dynamic_link.h:60

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.