1/*
2 * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3 * Copyright 2021 David Chisnall. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 *    this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 *    this list of conditions and the following disclaimer in the documentation
13 *    and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
16 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/**
29 * guard.cc: Functions for thread-safe static initialisation.
30 *
31 * Static values in C++ can be initialised lazily their first use.  This file
32 * contains functions that are used to ensure that two threads attempting to
33 * initialize the same static do not call the constructor twice.  This is
34 * important because constructors can have side effects, so calling the
35 * constructor twice may be very bad.
36 *
37 * Statics that require initialisation are protected by a 64-bit value.  Any
38 * platform that can do 32-bit atomic test and set operations can use this
39 * value as a low-overhead lock.  Because statics (in most sane code) are
40 * accessed far more times than they are initialised, this lock implementation
41 * is heavily optimised towards the case where the static has already been
42 * initialised.
43 */
44#include "atomic.h"
45#include <assert.h>
46#include <pthread.h>
47#include <stdint.h>
48#include <stdlib.h>
49
50// Older GCC doesn't define __LITTLE_ENDIAN__
51#ifndef __LITTLE_ENDIAN__
52// If __BYTE_ORDER__ is defined, use that instead
53#	ifdef __BYTE_ORDER__
54#		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
55#			define __LITTLE_ENDIAN__
56#		endif
57// x86 and ARM are the most common little-endian CPUs, so let's have a
58// special case for them (ARM is already special cased).  Assume everything
59// else is big endian.
60#	elif defined(__x86_64) || defined(__i386)
61#		define __LITTLE_ENDIAN__
62#	endif
63#endif
64
65/*
66 * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)
67 * values with one bit defined to indicate that the guarded variable is and
68 * another bit to indicate that it's currently locked (initialisation in
69 * progress).  The bit to use depends on the byte order of the target.
70 *
71 * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we
72 * treat the two halves of the 64-bit word as independent values and establish
73 * an ordering on them such that the guard word is never modified unless the
74 * lock word is in the locked state.  This means that we can do double-checked
75 * locking by loading the guard word and, if it is not initialised, trying to
76 * transition the lock word from the unlocked to locked state, and then
77 * manipulate the guard word.
78 */
79namespace
80{
81	/**
82	 * The state of the guard variable when an attempt is made to lock it.
83	 */
84	enum class GuardState
85	{
86		/**
87		 * The lock is not held but is not needed because initialisation is
88		 * one.
89		 */
90		InitDone,
91
92		/**
93		 * Initialisation is not done but the lock is held by the caller.
94		 */
95		InitLockSucceeded,
96
97		/**
98		 * Attempting to acquire the lock failed.
99		 */
100		InitLockFailed
101	};
102
103	/**
104	 * Class encapsulating a single atomic word being used to represent the
105	 * guard.  The word size is defined by the type of `GuardWord`.  The bit
106	 * used to indicate the locked state is `1<<LockedBit`, the bit used to
107	 * indicate the initialised state is `1<<InitBit`.
108	 */
109	template<typename GuardWord, int LockedBit, int InitBit>
110	struct SingleWordGuard
111	{
112		/**
113		 * The value indicating that the lock bit is set (and no other bits).
114		 */
115		static constexpr GuardWord locked = static_cast<GuardWord>(1)
116		                                    << LockedBit;
117
118		/**
119		 * The value indicating that the initialised bit is set (and all other
120		 * bits are zero).
121		 */
122		static constexpr GuardWord initialised = static_cast<GuardWord>(1)
123		                                         << InitBit;
124
125		/**
126		 * The guard variable.
127		 */
128		atomic<GuardWord> val;
129
130		public:
131		/**
132		 * Release the lock and set the initialised state.  In the single-word
133		 * implementation here, these are both done by a single store.
134		 */
135		void unlock(bool isInitialised)
136		{
137			val.store(isInitialised ? initialised : 0, memory_order::release);
138#ifndef NDEBUG
139			GuardWord init_state = initialised;
140			assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);
141#endif
142		}
143
144		/**
145		 * Try to acquire the lock.  This has a tri-state return, indicating
146		 * either that the lock was acquired, it wasn't acquired because it was
147		 * contended, or it wasn't acquired because the guarded variable is
148		 * already initialised.
149		 */
150		GuardState try_lock()
151		{
152			GuardWord old = 0;
153			// Try to acquire the lock, assuming that we are in the state where
154			// the lock is not held and the variable is not initialised (so the
155			// expected value is 0).
156			if (val.compare_exchange(old, locked))
157			{
158				return GuardState::InitLockSucceeded;
159			}
160			// If the CAS failed and the old value indicates that this is
161			// initialised, return that initialisation is done and skip further
162			// retries.
163			if (old == initialised)
164			{
165				return GuardState::InitDone;
166			}
167			// Otherwise, report failure.
168			return GuardState::InitLockFailed;
169		}
170
171		/**
172		 * Check whether the guard indicates that the variable is initialised.
173		 */
174		bool is_initialised()
175		{
176			return (val.load(memory_order::acquire) & initialised) ==
177			       initialised;
178		}
179	};
180
181	/**
182	 * Class encapsulating using two 32-bit atomic values to represent a 64-bit
183	 * guard variable.
184	 */
185	template<int LockedBit, int InitBit>
186	class DoubleWordGuard
187	{
188		/**
189		 * The value of `lock_word` when the lock is held.
190		 */
191		static constexpr uint32_t locked = static_cast<uint32_t>(1)
192		                                   << LockedBit;
193
194		/**
195		 * The value of `init_word` when the guarded variable is initialised.
196		 */
197		static constexpr uint32_t initialised = static_cast<uint32_t>(1)
198		                                        << InitBit;
199
200		/**
201		 * The word used for the initialised flag.  This is always the first
202		 * word irrespective of endian because the generated code compares the
203		 * first byte in memory against 0.
204		 */
205		atomic<uint32_t> init_word;
206
207		/**
208		 * The word used for the lock.
209		 */
210		atomic<uint32_t> lock_word;
211
212		public:
213		/**
214		 * Try to acquire the lock.  This has a tri-state return, indicating
215		 * either that the lock was acquired, it wasn't acquired because it was
216		 * contended, or it wasn't acquired because the guarded variable is
217		 * already initialised.
218		 */
219		GuardState try_lock()
220		{
221			uint32_t old = 0;
222			// Try to acquire the lock
223			if (lock_word.compare_exchange(old, locked))
224			{
225				// If we succeeded, check if initialisation has happened.  In
226				// this version, we don't have atomic manipulation of both the
227				// lock and initialised bits together.  Instead, we have an
228				// ordering rule that the initialised bit is only ever updated
229				// with the lock held.
230				if (is_initialised())
231				{
232					// If another thread did manage to initialise this, release
233					// the lock and notify the caller that initialisation is
234					// done.
235					lock_word.store(0, memory_order::release);
236					return GuardState::InitDone;
237				}
238				return GuardState::InitLockSucceeded;
239			}
240			return GuardState::InitLockFailed;
241		}
242
243		/**
244		 * Set the initialised state and release the lock.  In this
245		 * implementation, this is ordered, not atomic: the initialise bit is
246		 * set while the lock is held.
247		 */
248		void unlock(bool isInitialised)
249		{
250			init_word.store(isInitialised ? initialised : 0,
251			                  memory_order::release);
252			lock_word.store(0, memory_order::release);
253			assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);
254		}
255
256		/**
257		 * Return whether the guarded variable is initialised.
258		 */
259		bool is_initialised()
260		{
261			return (init_word.load(memory_order::acquire) & initialised) ==
262			       initialised;
263		}
264	};
265
266	// Check that the two implementations are the correct size.
267	static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),
268	              "Single-word 32-bit guard must be 32 bits");
269	static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),
270	              "Single-word 64-bit guard must be 64 bits");
271	static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),
272	              "Double-word guard must be 64 bits");
273
274#ifdef __arm__
275	/**
276	 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.
277	 */
278	using Guard = SingleWordGuard<uint32_t, 31, 0>;
279#elif defined(_LP64)
280#	if defined(__LITTLE_ENDIAN__)
281	/**
282	 * On little-endian 64-bit platforms the guard word is a single 64-bit
283	 * atomic with the lock in the high bit and the initialised flag in the low
284	 * bit.
285	 */
286	using Guard = SingleWordGuard<uint64_t, 63, 0>;
287#	else
288	/**
289	 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic
290	 * with the lock in the low bit and the initialised bit in the highest
291	 * byte.
292	 */
293	using Guard = SingleWordGuard<uint64_t, 0, 56>;
294#	endif
295#else
296#	if defined(__LITTLE_ENDIAN__)
297	/**
298	 * 32-bit platforms use the same layout as 64-bit.
299	 */
300	using Guard = DoubleWordGuard<31, 0>;
301#	else
302	/**
303	 * 32-bit platforms use the same layout as 64-bit.
304	 */
305	using Guard = DoubleWordGuard<0, 24>;
306#	endif
307#endif
308
309} // namespace
310
311/**
312 * Acquires a lock on a guard, returning 0 if the object has already been
313 * initialised, and 1 if it has not.  If the object is already constructed then
314 * this function just needs to read a byte from memory and return.
315 */
316extern "C" int __cxa_guard_acquire(Guard *guard_object)
317{
318	// Check if this is already initialised.  If so, we don't have to do
319	// anything.
320	if (guard_object->is_initialised())
321	{
322		return 0;
323	}
324	// Spin trying to acquire the lock.  If we fail to acquire the lock the
325	// first time then another thread will *probably* initialise it, but if the
326	// constructor throws an exception then we may have to try again in this
327	// thread.
328	for (;;)
329	{
330		// Try to acquire the lock.
331		switch (guard_object->try_lock())
332		{
333			// If we failed to acquire the lock but another thread has
334			// initialised the lock while we were waiting, return immediately
335			// indicating that initialisation is not required.
336			case GuardState::InitDone:
337				return 0;
338			// If we acquired the lock, return immediately to start
339			// initialisation.
340			case GuardState::InitLockSucceeded:
341				return 1;
342			// If we didn't acquire the lock, pause and retry.
343			case GuardState::InitLockFailed:
344				break;
345		}
346		sched_yield();
347	}
348}
349
350/**
351 * Releases the lock without marking the object as initialised.  This function
352 * is called if initialising a static causes an exception to be thrown.
353 */
354extern "C" void __cxa_guard_abort(Guard *guard_object)
355{
356	guard_object->unlock(false);
357}
358
359/**
360 * Releases the guard and marks the object as initialised.  This function is
361 * called after successful initialisation of a static.
362 */
363extern "C" void __cxa_guard_release(Guard *guard_object)
364{
365	guard_object->unlock(true);
366}
367