From bad18bccb60c874410edd3f61624696d3abc3cbc Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Tue, 12 Mar 2019 08:35:10 +0100 Subject: [PATCH] Increase thread stack for OS X (#2035) On OS X threads other than the main thread are created with a reduced stack size of 512KB by default, this is dangerously low for deep searches, so adjust it to TH_STACK_SIZE. The implementation calls pthread_create() with proper stack size parameter. Verified for no regression at STC enabling the patch on all platforms where pthread is supported. LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 50873 W: 9768 L: 9700 D: 31405 No functional change. --- src/syzygy/tbprobe.cpp | 2 +- src/thread.h | 4 +- src/thread_win32.h | 70 ------------------------------ src/thread_win32_osx.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 73 deletions(-) delete mode 100644 src/thread_win32.h create mode 100644 src/thread_win32_osx.h diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 01bbc7e..7864486 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -32,7 +32,7 @@ #include "../movegen.h" #include "../position.h" #include "../search.h" -#include "../thread_win32.h" +#include "../thread_win32_osx.h" #include "../types.h" #include "../uci.h" diff --git a/src/thread.h b/src/thread.h index 686441c..af50660 100644 --- a/src/thread.h +++ b/src/thread.h @@ -32,7 +32,7 @@ #include "pawns.h" #include "position.h" #include "search.h" -#include "thread_win32.h" +#include "thread_win32_osx.h" /// Thread class keeps together all the thread-related stuff. We use @@ -46,7 +46,7 @@ class Thread { ConditionVariable cv; size_t idx; bool exit = false, searching = true; // Set before starting std::thread - std::thread stdThread; + NativeThread stdThread; public: explicit Thread(size_t); diff --git a/src/thread_win32.h b/src/thread_win32.h deleted file mode 100644 index 5c914df..0000000 --- a/src/thread_win32.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2019 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef THREAD_WIN32_H_INCLUDED -#define THREAD_WIN32_H_INCLUDED - -/// STL thread library used by mingw and gcc when cross compiling for Windows -/// relies on libwinpthread. Currently libwinpthread implements mutexes directly -/// on top of Windows semaphores. Semaphores, being kernel objects, require kernel -/// mode transition in order to lock or unlock, which is very slow compared to -/// interlocked operations (about 30% slower on bench test). To work around this -/// issue, we define our wrappers to the low level Win32 calls. We use critical -/// sections to support Windows XP and older versions. Unfortunately, cond_wait() -/// is racy between unlock() and WaitForSingleObject() but they have the same -/// speed performance as the SRW locks. - -#include -#include - -#if defined(_WIN32) && !defined(_MSC_VER) - -#ifndef NOMINMAX -# define NOMINMAX // Disable macros min() and max() -#endif - -#define WIN32_LEAN_AND_MEAN -#include -#undef WIN32_LEAN_AND_MEAN -#undef NOMINMAX - -/// Mutex and ConditionVariable struct are wrappers of the low level locking -/// machinery and are modeled after the corresponding C++11 classes. - -struct Mutex { - Mutex() { InitializeCriticalSection(&cs); } - ~Mutex() { DeleteCriticalSection(&cs); } - void lock() { EnterCriticalSection(&cs); } - void unlock() { LeaveCriticalSection(&cs); } - -private: - CRITICAL_SECTION cs; -}; - -typedef std::condition_variable_any ConditionVariable; - -#else // Default case: use STL classes - -typedef std::mutex Mutex; -typedef std::condition_variable ConditionVariable; - -#endif - -#endif // #ifndef THREAD_WIN32_H_INCLUDED diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h new file mode 100644 index 0000000..8890054 --- /dev/null +++ b/src/thread_win32_osx.h @@ -0,0 +1,112 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2008 Tord Romstad (Glaurung author) + Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad + Copyright (C) 2015-2019 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef THREAD_WIN32_OSX_H_INCLUDED +#define THREAD_WIN32_OSX_H_INCLUDED + +/// STL thread library used by mingw and gcc when cross compiling for Windows +/// relies on libwinpthread. Currently libwinpthread implements mutexes directly +/// on top of Windows semaphores. Semaphores, being kernel objects, require kernel +/// mode transition in order to lock or unlock, which is very slow compared to +/// interlocked operations (about 30% slower on bench test). To work around this +/// issue, we define our wrappers to the low level Win32 calls. We use critical +/// sections to support Windows XP and older versions. Unfortunately, cond_wait() +/// is racy between unlock() and WaitForSingleObject() but they have the same +/// speed performance as the SRW locks. + +#include +#include +#include + +#if defined(_WIN32) && !defined(_MSC_VER) + +#ifndef NOMINMAX +# define NOMINMAX // Disable macros min() and max() +#endif + +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX + +/// Mutex and ConditionVariable struct are wrappers of the low level locking +/// machinery and are modeled after the corresponding C++11 classes. + +struct Mutex { + Mutex() { InitializeCriticalSection(&cs); } + ~Mutex() { DeleteCriticalSection(&cs); } + void lock() { EnterCriticalSection(&cs); } + void unlock() { LeaveCriticalSection(&cs); } + +private: + CRITICAL_SECTION cs; +}; + +typedef std::condition_variable_any ConditionVariable; + +#else // Default case: use STL classes + +typedef std::mutex Mutex; +typedef std::condition_variable ConditionVariable; + +#endif + +/// On OSX threads other than the main thread are created with a reduced stack +/// size of 512KB by default, this is dangerously low for deep searches, so +/// adjust it to TH_STACK_SIZE. The implementation calls pthread_create() with +/// proper stack size parameter. + +#if defined(__APPLE__) + +#include + +static const size_t TH_STACK_SIZE = 2 * 1024 * 1024; + +template > +void* start_routine(void* ptr) +{ + P* p = reinterpret_cast(ptr); + (p->first->*(p->second))(); // Call member function pointer + delete p; + return NULL; +} + +class NativeThread { + + pthread_t thread; + +public: + template> + explicit NativeThread(void(T::*fun)(), T* obj) { + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, TH_STACK_SIZE); + pthread_create(&thread, attr, start_routine, new P(obj, fun)); + } + void join() { pthread_join(thread, NULL); } +}; + +#else // Default case: use STL classes + +typedef std::thread NativeThread; + +#endif + +#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED -- 1.7.0.4