aboutsummaryrefslogtreecommitdiff
path: root/sys/arm/include/atomic.h
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm/include/atomic.h')
-rw-r--r--sys/arm/include/atomic.h1111
1 files changed, 1111 insertions, 0 deletions
diff --git a/sys/arm/include/atomic.h b/sys/arm/include/atomic.h
new file mode 100644
index 000000000000..f66953710615
--- /dev/null
+++ b/sys/arm/include/atomic.h
@@ -0,0 +1,1111 @@
+/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-4-Clause
+ *
+ * Copyright (C) 2003-2004 Olivier Houchard
+ * Copyright (C) 1994-1997 Mark Brinicombe
+ * Copyright (C) 1994 Brini
+ * All rights reserved.
+ *
+ * This code is derived from software written for Brini by Mark Brinicombe
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Brini.
+ * 4. The name of Brini may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MACHINE_ATOMIC_H_
+#define _MACHINE_ATOMIC_H_
+
+#include <sys/atomic_common.h>
+
+#define isb() __asm __volatile("isb" : : : "memory")
+#define dsb() __asm __volatile("dsb" : : : "memory")
+#define dmb() __asm __volatile("dmb" : : : "memory")
+
+#define mb() dmb()
+#define wmb() dmb()
+#define rmb() dmb()
+
+#define ARM_HAVE_ATOMIC64
+
+#define ATOMIC_ACQ_REL_LONG(NAME) \
+static __inline void \
+atomic_##NAME##_acq_long(__volatile u_long *p, u_long v) \
+{ \
+ atomic_##NAME##_long(p, v); \
+ dmb(); \
+} \
+ \
+static __inline void \
+atomic_##NAME##_rel_long(__volatile u_long *p, u_long v) \
+{ \
+ dmb(); \
+ atomic_##NAME##_long(p, v); \
+}
+
+#define ATOMIC_ACQ_REL(NAME, WIDTH) \
+static __inline void \
+atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
+{ \
+ atomic_##NAME##_##WIDTH(p, v); \
+ dmb(); \
+} \
+ \
+static __inline void \
+atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
+{ \
+ dmb(); \
+ atomic_##NAME##_##WIDTH(p, v); \
+}
+
+static __inline void
+atomic_add_32(volatile uint32_t *p, uint32_t val)
+{
+ uint32_t tmp = 0, tmp2 = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%2] \n"
+ " add %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "=&r" (tmp), "+r" (tmp2)
+ ,"+r" (p), "+r" (val) : : "cc", "memory");
+}
+
+static __inline void
+atomic_add_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " adds %Q[tmp], %Q[val] \n"
+ " adc %R[tmp], %R[tmp], %R[val] \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+}
+
+static __inline void
+atomic_add_long(volatile u_long *p, u_long val)
+{
+
+ atomic_add_32((volatile uint32_t *)p, val);
+}
+
+ATOMIC_ACQ_REL(add, 32)
+ATOMIC_ACQ_REL(add, 64)
+ATOMIC_ACQ_REL_LONG(add)
+
+static __inline void
+atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
+{
+ uint32_t tmp = 0, tmp2 = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%2] \n"
+ " bic %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
+ : : "cc", "memory");
+}
+
+static __inline void
+atomic_clear_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " bic %Q[tmp], %Q[val] \n"
+ " bic %R[tmp], %R[val] \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+}
+
+static __inline void
+atomic_clear_long(volatile u_long *address, u_long setmask)
+{
+
+ atomic_clear_32((volatile uint32_t *)address, setmask);
+}
+
+ATOMIC_ACQ_REL(clear, 32)
+ATOMIC_ACQ_REL(clear, 64)
+ATOMIC_ACQ_REL_LONG(clear)
+
+#define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF) \
+ { \
+ TYPE tmp; \
+ \
+ __asm __volatile( \
+ "1: ldrex" SUF " %[tmp], [%[ptr]] \n" \
+ " ldr" SUF " %[ret], [%[oldv]] \n" \
+ " teq %[tmp], %[ret] \n" \
+ " ittee ne \n" \
+ " str" SUF "ne %[tmp], [%[oldv]] \n" \
+ " movne %[ret], #0 \n" \
+ " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
+ " eorseq %[ret], #1 \n" \
+ " beq 1b \n" \
+ : [ret] "=&r" (RET), \
+ [tmp] "=&r" (tmp) \
+ : [ptr] "r" (_ptr), \
+ [oldv] "r" (_old), \
+ [newv] "r" (_new) \
+ : "cc", "memory"); \
+ }
+
+#define ATOMIC_FCMPSET_CODE64(RET) \
+ { \
+ uint64_t cmp, tmp; \
+ \
+ __asm __volatile( \
+ "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
+ " ldrd %Q[cmp], %R[cmp], [%[oldv]] \n" \
+ " teq %Q[tmp], %Q[cmp] \n" \
+ " it eq \n" \
+ " teqeq %R[tmp], %R[cmp] \n" \
+ " ittee ne \n" \
+ " movne %[ret], #0 \n" \
+ " strdne %[cmp], [%[oldv]] \n" \
+ " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
+ " eorseq %[ret], #1 \n" \
+ " beq 1b \n" \
+ : [ret] "=&r" (RET), \
+ [cmp] "=&r" (cmp), \
+ [tmp] "=&r" (tmp) \
+ : [ptr] "r" (_ptr), \
+ [oldv] "r" (_old), \
+ [newv] "r" (_new) \
+ : "cc", "memory"); \
+ }
+
+static __inline int
+atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
+ return (ret);
+}
+#define atomic_fcmpset_8 atomic_fcmpset_8
+
+static __inline int
+atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
+ return (ret);
+}
+#define atomic_fcmpset_16 atomic_fcmpset_16
+
+static __inline int
+atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, u_long, "");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE(ret, u_long, "");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_FCMPSET_CODE(ret, u_long, "");
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE64(ret);
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
+{
+ int ret;
+
+ ATOMIC_FCMPSET_CODE64(ret);
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_FCMPSET_CODE64(ret);
+ return (ret);
+}
+
+#define ATOMIC_CMPSET_CODE(RET, SUF) \
+ { \
+ __asm __volatile( \
+ "1: ldrex" SUF " %[ret], [%[ptr]] \n" \
+ " teq %[ret], %[oldv] \n" \
+ " itee ne \n" \
+ " movne %[ret], #0 \n" \
+ " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
+ " eorseq %[ret], #1 \n" \
+ " beq 1b \n" \
+ : [ret] "=&r" (RET) \
+ : [ptr] "r" (_ptr), \
+ [oldv] "r" (_old), \
+ [newv] "r" (_new) \
+ : "cc", "memory"); \
+ }
+
+#define ATOMIC_CMPSET_CODE64(RET) \
+ { \
+ uint64_t tmp; \
+ \
+ __asm __volatile( \
+ "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
+ " teq %Q[tmp], %Q[oldv] \n" \
+ " it eq \n" \
+ " teqeq %R[tmp], %R[oldv] \n" \
+ " itee ne \n" \
+ " movne %[ret], #0 \n" \
+ " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
+ " eorseq %[ret], #1 \n" \
+ " beq 1b \n" \
+ : [ret] "=&r" (RET), \
+ [tmp] "=&r" (tmp) \
+ : [ptr] "r" (_ptr), \
+ [oldv] "r" (_old), \
+ [newv] "r" (_new) \
+ : "cc", "memory"); \
+ }
+
+static __inline int
+atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "b");
+ return (ret);
+}
+#define atomic_cmpset_8 atomic_cmpset_8
+
+static __inline int
+atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "b");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_CMPSET_CODE(ret, "b");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "h");
+ return (ret);
+}
+#define atomic_cmpset_16 atomic_cmpset_16
+
+static __inline int
+atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "h");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_CMPSET_CODE(ret, "h");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_CMPSET_CODE(ret, "");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE(ret, "");
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_CMPSET_CODE(ret, "");
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE64(ret);
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
+{
+ int ret;
+
+ ATOMIC_CMPSET_CODE64(ret);
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
+{
+ int ret;
+
+ dmb();
+ ATOMIC_CMPSET_CODE64(ret);
+ return (ret);
+}
+
+static __inline uint32_t
+atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
+{
+ uint32_t tmp = 0, tmp2 = 0, ret = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%3] \n"
+ " add %1, %0, %4 \n"
+ " strex %2, %1, [%3] \n"
+ " cmp %2, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
+ : : "cc", "memory");
+ return (ret);
+}
+
+static __inline uint64_t
+atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t ret, tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
+ " adds %Q[tmp], %Q[ret], %Q[val] \n"
+ " adc %R[tmp], %R[ret], %R[val] \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [ret] "=&r" (ret),
+ [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+ return (ret);
+}
+
+static __inline u_long
+atomic_fetchadd_long(volatile u_long *p, u_long val)
+{
+
+ return (atomic_fetchadd_32((volatile uint32_t *)p, val));
+}
+
+static __inline uint32_t
+atomic_load_acq_32(const volatile uint32_t *p)
+{
+ uint32_t v;
+
+ v = *p;
+ dmb();
+ return (v);
+}
+
+static __inline uint64_t
+atomic_load_64(const volatile uint64_t *p)
+{
+ uint64_t ret;
+
+ /*
+ * The only way to atomically load 64 bits is with LDREXD which puts the
+ * exclusive monitor into the exclusive state, so reset it to open state
+ * with CLREX because we don't actually need to store anything.
+ */
+ __asm __volatile(
+ "ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
+ "clrex \n"
+ : [ret] "=&r" (ret)
+ : [ptr] "r" (p)
+ : "cc", "memory");
+ return (ret);
+}
+
+static __inline uint64_t
+atomic_load_acq_64(const volatile uint64_t *p)
+{
+ uint64_t ret;
+
+ ret = atomic_load_64(p);
+ dmb();
+ return (ret);
+}
+
+static __inline u_long
+atomic_load_acq_long(const volatile u_long *p)
+{
+ u_long v;
+
+ v = *p;
+ dmb();
+ return (v);
+}
+
+static __inline uint32_t
+atomic_readandclear_32(volatile uint32_t *p)
+{
+ uint32_t ret, tmp = 0, tmp2 = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%3] \n"
+ " mov %1, #0 \n"
+ " strex %2, %1, [%3] \n"
+ " cmp %2, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
+ : : "cc", "memory");
+ return (ret);
+}
+
+static __inline uint64_t
+atomic_readandclear_64(volatile uint64_t *p)
+{
+ uint64_t ret, tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
+ " mov %Q[tmp], #0 \n"
+ " mov %R[tmp], #0 \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [ret] "=&r" (ret),
+ [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p)
+ : "cc", "memory");
+ return (ret);
+}
+
+static __inline u_long
+atomic_readandclear_long(volatile u_long *p)
+{
+
+ return (atomic_readandclear_32((volatile uint32_t *)p));
+}
+
+static __inline void
+atomic_set_32(volatile uint32_t *address, uint32_t setmask)
+{
+ uint32_t tmp = 0, tmp2 = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%2] \n"
+ " orr %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
+ : : "cc", "memory");
+}
+
+static __inline void
+atomic_set_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " orr %Q[tmp], %Q[val] \n"
+ " orr %R[tmp], %R[val] \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+}
+
+static __inline void
+atomic_set_long(volatile u_long *address, u_long setmask)
+{
+
+ atomic_set_32((volatile uint32_t *)address, setmask);
+}
+
+ATOMIC_ACQ_REL(set, 32)
+ATOMIC_ACQ_REL(set, 64)
+ATOMIC_ACQ_REL_LONG(set)
+
+static __inline void
+atomic_subtract_32(volatile uint32_t *p, uint32_t val)
+{
+ uint32_t tmp = 0, tmp2 = 0;
+
+ __asm __volatile(
+ "1: ldrex %0, [%2] \n"
+ " sub %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
+ : : "cc", "memory");
+}
+
+static __inline void
+atomic_subtract_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t tmp;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " subs %Q[tmp], %Q[val] \n"
+ " sbc %R[tmp], %R[tmp], %R[val] \n"
+ " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [exf] "=&r" (exflag),
+ [tmp] "=&r" (tmp)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+}
+
+static __inline void
+atomic_subtract_long(volatile u_long *p, u_long val)
+{
+
+ atomic_subtract_32((volatile uint32_t *)p, val);
+}
+
+ATOMIC_ACQ_REL(subtract, 32)
+ATOMIC_ACQ_REL(subtract, 64)
+ATOMIC_ACQ_REL_LONG(subtract)
+
+static __inline void
+atomic_store_64(volatile uint64_t *p, uint64_t val)
+{
+ uint64_t tmp;
+ uint32_t exflag;
+
+ /*
+ * The only way to atomically store 64 bits is with STREXD, which will
+ * succeed only if paired up with a preceeding LDREXD using the same
+ * address, so we read and discard the existing value before storing.
+ */
+ __asm __volatile(
+ "1: \n"
+ " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
+ " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [tmp] "=&r" (tmp),
+ [exf] "=&r" (exflag)
+ : [ptr] "r" (p),
+ [val] "r" (val)
+ : "cc", "memory");
+}
+
+static __inline void
+atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
+{
+
+ dmb();
+ *p = v;
+}
+
+static __inline void
+atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
+{
+
+ dmb();
+ atomic_store_64(p, val);
+}
+
+static __inline void
+atomic_store_rel_long(volatile u_long *p, u_long v)
+{
+
+ dmb();
+ *p = v;
+}
+
+static __inline int
+atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
+{
+ int newv, oldv, result;
+
+ __asm __volatile(
+ " mov ip, #1 \n"
+ " lsl ip, ip, %[bit] \n"
+ /* Done with %[bit] as input, reuse below as output. */
+ "1: \n"
+ " ldrex %[oldv], [%[ptr]] \n"
+ " bic %[newv], %[oldv], ip \n"
+ " strex %[bit], %[newv], [%[ptr]] \n"
+ " teq %[bit], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ " ands %[bit], %[oldv], ip \n"
+ " it ne \n"
+ " movne %[bit], #1 \n"
+ : [bit] "=&r" (result),
+ [oldv] "=&r" (oldv),
+ [newv] "=&r" (newv)
+ : [ptr] "r" (ptr),
+ "[bit]" (bit & 0x1f)
+ : "cc", "ip", "memory");
+
+ return (result);
+}
+
+static __inline int
+atomic_testandclear_int(volatile u_int *p, u_int v)
+{
+
+ return (atomic_testandclear_32((volatile uint32_t *)p, v));
+}
+
+static __inline int
+atomic_testandclear_long(volatile u_long *p, u_int v)
+{
+
+ return (atomic_testandclear_32((volatile uint32_t *)p, v));
+}
+
+static __inline int
+atomic_testandclear_64(volatile uint64_t *p, u_int v)
+{
+ volatile uint32_t *p32;
+
+ p32 = (volatile uint32_t *)p;
+ /*
+ * Assume little-endian,
+ * atomic_testandclear_32() uses only last 5 bits of v
+ */
+ if ((v & 0x20) != 0)
+ p32++;
+ return (atomic_testandclear_32(p32, v));
+}
+
+static __inline int
+atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
+{
+ int newv, oldv, result;
+
+ __asm __volatile(
+ " mov ip, #1 \n"
+ " lsl ip, ip, %[bit] \n"
+ /* Done with %[bit] as input, reuse below as output. */
+ "1: \n"
+ " ldrex %[oldv], [%[ptr]] \n"
+ " orr %[newv], %[oldv], ip \n"
+ " strex %[bit], %[newv], [%[ptr]] \n"
+ " teq %[bit], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ " ands %[bit], %[oldv], ip \n"
+ " it ne \n"
+ " movne %[bit], #1 \n"
+ : [bit] "=&r" (result),
+ [oldv] "=&r" (oldv),
+ [newv] "=&r" (newv)
+ : [ptr] "r" (ptr),
+ "[bit]" (bit & 0x1f)
+ : "cc", "ip", "memory");
+
+ return (result);
+}
+
+static __inline int
+atomic_testandset_int(volatile u_int *p, u_int v)
+{
+
+ return (atomic_testandset_32((volatile uint32_t *)p, v));
+}
+
+static __inline int
+atomic_testandset_long(volatile u_long *p, u_int v)
+{
+
+ return (atomic_testandset_32((volatile uint32_t *)p, v));
+}
+
+static __inline int
+atomic_testandset_acq_long(volatile u_long *p, u_int v)
+{
+ int ret;
+
+ ret = atomic_testandset_32((volatile uint32_t *)p, v);
+ dmb();
+ return (ret);
+}
+
+static __inline int
+atomic_testandset_64(volatile uint64_t *p, u_int v)
+{
+ volatile uint32_t *p32;
+
+ p32 = (volatile uint32_t *)p;
+ /*
+ * Assume little-endian,
+ * atomic_testandset_32() uses only last 5 bits of v
+ */
+ if ((v & 0x20) != 0)
+ p32++;
+ return (atomic_testandset_32(p32, v));
+}
+
+static __inline uint32_t
+atomic_swap_32(volatile uint32_t *p, uint32_t v)
+{
+ uint32_t ret, exflag;
+
+ __asm __volatile(
+ "1: ldrex %[ret], [%[ptr]] \n"
+ " strex %[exf], %[val], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [ret] "=&r" (ret),
+ [exf] "=&r" (exflag)
+ : [val] "r" (v),
+ [ptr] "r" (p)
+ : "cc", "memory");
+ return (ret);
+}
+
+static __inline u_long
+atomic_swap_long(volatile u_long *p, u_long v)
+{
+
+ return (atomic_swap_32((volatile uint32_t *)p, v));
+}
+
+static __inline uint64_t
+atomic_swap_64(volatile uint64_t *p, uint64_t v)
+{
+ uint64_t ret;
+ uint32_t exflag;
+
+ __asm __volatile(
+ "1: ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
+ " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
+ " teq %[exf], #0 \n"
+ " it ne \n"
+ " bne 1b \n"
+ : [ret] "=&r" (ret),
+ [exf] "=&r" (exflag)
+ : [val] "r" (v),
+ [ptr] "r" (p)
+ : "cc", "memory");
+ return (ret);
+}
+
+#undef ATOMIC_ACQ_REL
+#undef ATOMIC_ACQ_REL_LONG
+
+static __inline void
+atomic_thread_fence_acq(void)
+{
+
+ dmb();
+}
+
+static __inline void
+atomic_thread_fence_rel(void)
+{
+
+ dmb();
+}
+
+static __inline void
+atomic_thread_fence_acq_rel(void)
+{
+
+ dmb();
+}
+
+static __inline void
+atomic_thread_fence_seq_cst(void)
+{
+
+ dmb();
+}
+
+#define atomic_add_ptr atomic_add_32
+#define atomic_add_acq_ptr atomic_add_acq_32
+#define atomic_add_rel_ptr atomic_add_rel_32
+#define atomic_subtract_ptr atomic_subtract_32
+#define atomic_subtract_acq_ptr atomic_subtract_acq_32
+#define atomic_subtract_rel_ptr atomic_subtract_rel_32
+#define atomic_clear_ptr atomic_clear_32
+#define atomic_clear_acq_ptr atomic_clear_acq_32
+#define atomic_clear_rel_ptr atomic_clear_rel_32
+#define atomic_set_ptr atomic_set_32
+#define atomic_set_acq_ptr atomic_set_acq_32
+#define atomic_set_rel_ptr atomic_set_rel_32
+#define atomic_fcmpset_ptr atomic_fcmpset_32
+#define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_32
+#define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_32
+#define atomic_cmpset_ptr atomic_cmpset_32
+#define atomic_cmpset_acq_ptr atomic_cmpset_acq_32
+#define atomic_cmpset_rel_ptr atomic_cmpset_rel_32
+#define atomic_fetchadd_ptr atomic_fetchadd_32
+#define atomic_readandclear_ptr atomic_readandclear_32
+#define atomic_load_acq_ptr atomic_load_acq_32
+#define atomic_store_rel_ptr atomic_store_rel_32
+#define atomic_swap_ptr atomic_swap_32
+#define atomic_testandset_ptr atomic_testandset_32
+#define atomic_testandclear_ptr atomic_testandclear_32
+
+#define atomic_add_int atomic_add_32
+#define atomic_add_acq_int atomic_add_acq_32
+#define atomic_add_rel_int atomic_add_rel_32
+#define atomic_subtract_int atomic_subtract_32
+#define atomic_subtract_acq_int atomic_subtract_acq_32
+#define atomic_subtract_rel_int atomic_subtract_rel_32
+#define atomic_clear_int atomic_clear_32
+#define atomic_clear_acq_int atomic_clear_acq_32
+#define atomic_clear_rel_int atomic_clear_rel_32
+#define atomic_set_int atomic_set_32
+#define atomic_set_acq_int atomic_set_acq_32
+#define atomic_set_rel_int atomic_set_rel_32
+#define atomic_fcmpset_int atomic_fcmpset_32
+#define atomic_fcmpset_acq_int atomic_fcmpset_acq_32
+#define atomic_fcmpset_rel_int atomic_fcmpset_rel_32
+#define atomic_cmpset_int atomic_cmpset_32
+#define atomic_cmpset_acq_int atomic_cmpset_acq_32
+#define atomic_cmpset_rel_int atomic_cmpset_rel_32
+#define atomic_fetchadd_int atomic_fetchadd_32
+#define atomic_readandclear_int atomic_readandclear_32
+#define atomic_load_acq_int atomic_load_acq_32
+#define atomic_store_rel_int atomic_store_rel_32
+#define atomic_swap_int atomic_swap_32
+
+#include <sys/_atomic_subword.h>
+
+#define atomic_set_short atomic_set_16
+#define atomic_clear_short atomic_clear_16
+
+#endif /* _MACHINE_ATOMIC_H_ */