diff options
Diffstat (limited to 'MdePkg/Library/BaseMemoryLibOptDxe/Arm')
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.S | 65 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.asm | 70 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S | 138 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm | 140 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S | 175 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm | 147 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/MemLibGuid.c | 165 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S | 148 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm | 147 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c | 142 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S | 89 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm | 96 |
12 files changed, 1522 insertions, 0 deletions
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.S new file mode 100644 index 0000000000000..abda83a1f18cf --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.S @@ -0,0 +1,65 @@ +// +// Copyright (c) 2016, Linaro Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the Linaro nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + + .text + .thumb + .syntax unified + .align 5 +ASM_GLOBAL ASM_PFX(InternalMemCompareGuid) +ASM_PFX(InternalMemCompareGuid): + push {r4, lr} + ldr r2, [r0] + ldr r3, [r0, #4] + ldr r4, [r0, #8] + ldr r0, [r0, #12] + cbz r1, 1f + ldr ip, [r1] + ldr lr, [r1, #4] + cmp r2, ip + it eq + cmpeq.n r3, lr + beq 0f + movs r0, #0 + pop {r4, pc} + +0: ldr r2, [r1, #8] + ldr r3, [r1, #12] + cmp r4, r2 + it eq + cmpeq.n r0, r3 + bne 2f + movs r0, #1 + pop {r4, pc} + +1: orrs r2, r2, r3 + orrs r4, r4, r0 + movs r0, #1 + orrs r2, r2, r4 +2: it ne + movne.n r0, #0 + pop {r4, pc} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.asm new file mode 100644 index 0000000000000..0373404ea9d59 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareGuid.asm @@ -0,0 +1,70 @@ +; +; Copyright (c) 2016, Linaro Limited +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; * Neither the name of the Linaro nor the +; names of its contributors may be used to endorse or promote products +; derived from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; + + EXPORT InternalMemCompareGuid + THUMB + AREA CompareGuid, CODE, READONLY, CODEALIGN, ALIGN=5 + +InternalMemCompareGuid + push {r4, lr} + ldr r2, [r0] + ldr r3, [r0, #4] + ldr r4, [r0, #8] + ldr r0, [r0, #12] + cbz r1, L1 + ldr ip, [r1] + ldr lr, [r1, #4] + cmp r2, ip + it eq + cmpeq r3, lr + beq L0 + movs r0, #0 + pop {r4, pc} + +L0 + ldr r2, [r1, #8] + ldr r3, [r1, #12] + cmp r4, r2 + it eq + cmpeq r0, r3 + bne L2 + movs r0, #1 + pop {r4, pc} + +L1 + orrs r2, r2, r3 + orrs r4, r4, r0 + movs r0, #1 + orrs r2, r2, r4 + +L2 + it ne + movne r0, #0 + pop {r4, pc} + + END diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S new file mode 100644 index 0000000000000..763f54a8b8029 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S @@ -0,0 +1,138 @@ +// +// Copyright (c) 2013 - 2016, Linaro Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the Linaro nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +// Parameters and result. +#define src1 r0 +#define src2 r1 +#define limit r2 +#define result r0 + +// Internal variables. +#define data1 r3 +#define data2 r4 +#define limit_wd r5 +#define diff r6 +#define tmp1 r7 +#define tmp2 r12 +#define pos r8 +#define mask r14 + + .text + .thumb + .syntax unified + .align 5 +ASM_GLOBAL ASM_PFX(InternalMemCompareMem) +ASM_PFX(InternalMemCompareMem): + push {r4-r8, lr} + eor tmp1, src1, src2 + tst tmp1, #3 + bne .Lmisaligned4 + ands tmp1, src1, #3 + bne .Lmutual_align + add limit_wd, limit, #3 + nop.w + lsr limit_wd, limit_wd, #2 + + // Start of performance-critical section -- one 32B cache line. +.Lloop_aligned: + ldr data1, [src1], #4 + ldr data2, [src2], #4 +.Lstart_realigned: + subs limit_wd, limit_wd, #1 + eor diff, data1, data2 // Non-zero if differences found. + cbnz diff, 0f + bne .Lloop_aligned + // End of performance-critical section -- one 32B cache line. + + // Not reached the limit, must have found a diff. +0: cbnz limit_wd, .Lnot_limit + + // Limit % 4 == 0 => all bytes significant. + ands limit, limit, #3 + beq .Lnot_limit + + lsl limit, limit, #3 // Bits -> bytes. + mov mask, #~0 + lsl mask, mask, limit + bic data1, data1, mask + bic data2, data2, mask + + orr diff, diff, mask + +.Lnot_limit: + rev diff, diff + rev data1, data1 + rev data2, data2 + + // The MS-non-zero bit of DIFF marks either the first bit + // that is different, or the end of the significant data. + // Shifting left now will bring the critical information into the + // top bits. + clz pos, diff + lsl data1, data1, pos + lsl data2, data2, pos + + // But we need to zero-extend (char is unsigned) the value and then + // perform a signed 32-bit subtraction. + lsr data1, data1, #28 + sub result, data1, data2, lsr #28 + pop {r4-r8, pc} + +.Lmutual_align: + // Sources are mutually aligned, but are not currently at an + // alignment boundary. Round down the addresses and then mask off + // the bytes that precede the start point. + bic src1, src1, #3 + bic src2, src2, #3 + add limit, limit, tmp1 // Adjust the limit for the extra. + lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits. + ldr data1, [src1], #4 + rsb tmp1, tmp1, #32 // Bits to alignment -32. + ldr data2, [src2], #4 + mov tmp2, #~0 + + // Little-endian. Early bytes are at LSB. + lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 31). + add limit_wd, limit, #3 + orr data1, data1, tmp2 + orr data2, data2, tmp2 + lsr limit_wd, limit_wd, #2 + b .Lstart_realigned + +.Lmisaligned4: + sub limit, limit, #1 +1: + // Perhaps we can do better than this. + ldrb data1, [src1], #1 + ldrb data2, [src2], #1 + subs limit, limit, #1 + it cs + cmpcs.n data1, data2 + beq 1b + sub result, data1, data2 + pop {r4-r8, pc} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm new file mode 100644 index 0000000000000..4c72dcf38597a --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm @@ -0,0 +1,140 @@ +; +; Copyright (c) 2013 - 2016, Linaro Limited +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; * Neither the name of the Linaro nor the +; names of its contributors may be used to endorse or promote products +; derived from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; + +; Parameters and result. +#define src1 r0 +#define src2 r1 +#define limit r2 +#define result r0 + +; Internal variables. +#define data1 r3 +#define data2 r4 +#define limit_wd r5 +#define diff r6 +#define tmp1 r7 +#define tmp2 r12 +#define pos r8 +#define mask r14 + + EXPORT InternalMemCompareMem + THUMB + AREA CompareMem, CODE, READONLY + +InternalMemCompareMem + push {r4-r8, lr} + eor tmp1, src1, src2 + tst tmp1, #3 + bne Lmisaligned4 + ands tmp1, src1, #3 + bne Lmutual_align + add limit_wd, limit, #3 + nop.w + lsr limit_wd, limit_wd, #2 + + ; Start of performance-critical section -- one 32B cache line. +Lloop_aligned + ldr data1, [src1], #4 + ldr data2, [src2], #4 +Lstart_realigned + subs limit_wd, limit_wd, #1 + eor diff, data1, data2 ; Non-zero if differences found. + cbnz diff, L0 + bne Lloop_aligned + ; End of performance-critical section -- one 32B cache line. + + ; Not reached the limit, must have found a diff. +L0 + cbnz limit_wd, Lnot_limit + + // Limit % 4 == 0 => all bytes significant. + ands limit, limit, #3 + beq Lnot_limit + + lsl limit, limit, #3 // Bits -> bytes. + mov mask, #~0 + lsl mask, mask, limit + bic data1, data1, mask + bic data2, data2, mask + + orr diff, diff, mask + +Lnot_limit + rev diff, diff + rev data1, data1 + rev data2, data2 + + ; The MS-non-zero bit of DIFF marks either the first bit + ; that is different, or the end of the significant data. + ; Shifting left now will bring the critical information into the + ; top bits. + clz pos, diff + lsl data1, data1, pos + lsl data2, data2, pos + + ; But we need to zero-extend (char is unsigned) the value and then + ; perform a signed 32-bit subtraction. + lsr data1, data1, #28 + sub result, data1, data2, lsr #28 + pop {r4-r8, pc} + +Lmutual_align + ; Sources are mutually aligned, but are not currently at an + ; alignment boundary. Round down the addresses and then mask off + ; the bytes that precede the start point. + bic src1, src1, #3 + bic src2, src2, #3 + add limit, limit, tmp1 ; Adjust the limit for the extra. + lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits. + ldr data1, [src1], #4 + neg tmp1, tmp1 ; Bits to alignment -32. + ldr data2, [src2], #4 + mov tmp2, #~0 + + ; Little-endian. Early bytes are at LSB. + lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31). + add limit_wd, limit, #3 + orr data1, data1, tmp2 + orr data2, data2, tmp2 + lsr limit_wd, limit_wd, #2 + b Lstart_realigned + +Lmisaligned4 + sub limit, limit, #1 +L1 + // Perhaps we can do better than this. + ldrb data1, [src1], #1 + ldrb data2, [src2], #1 + subs limit, limit, #1 + it cs + cmpcs data1, data2 + beq L1 + sub result, data1, data2 + pop {r4-r8, pc} + + END diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S new file mode 100644 index 0000000000000..32c104999d224 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S @@ -0,0 +1,175 @@ +#------------------------------------------------------------------------------ +# +# CopyMem() worker for ARM +# +# This file started out as C code that did 64 bit moves if the buffer was +# 32-bit aligned, else it does a byte copy. It also does a byte copy for +# any trailing bytes. It was updated to do 32-byte copies using stm/ldm. +# +# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR> +# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + + .text + .thumb + .syntax unified + +/** + Copy Length bytes from Source to Destination. Overlap is OK. + + This implementation + + @param Destination Target of copy + @param Source Place to copy from + @param Length Number of bytes to copy + + @return Destination + + +VOID * +EFIAPI +InternalMemCopyMem ( + OUT VOID *DestinationBuffer, + IN CONST VOID *SourceBuffer, + IN UINTN Length + ) +**/ +ASM_GLOBAL ASM_PFX(InternalMemCopyMem) +ASM_PFX(InternalMemCopyMem): + push {r4-r11, lr} + // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length) + mov r11, r0 + mov r10, r0 + mov r12, r2 + mov r14, r1 + + cmp r11, r1 + // If (dest < source) + bcc memcopy_check_optim_default + + // If (source + length < dest) + rsb r3, r1, r11 + cmp r12, r3 + bcc memcopy_check_optim_default + b memcopy_check_optim_overlap + +memcopy_check_optim_default: + // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1) + tst r0, #0xF + it ne + movne.n r0, #0 + bne memcopy_default + tst r1, #0xF + it ne + movne.n r3, #0 + it eq + moveq.n r3, #1 + cmp r2, #31 + it ls + movls.n r0, #0 + bls memcopy_default + and r0, r3, #1 + b memcopy_default + +memcopy_check_optim_overlap: + // r10 = dest_end, r14 = source_end + add r10, r11, r12 + add r14, r12, r1 + + // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned) + cmp r2, #31 + it ls + movls.n r0, #0 + it hi + movhi.n r0, #1 + tst r10, #0xF + it ne + movne.n r0, #0 + tst r14, #0xF + it ne + movne.n r0, #0 + b memcopy_overlapped + +memcopy_overlapped_non_optim: + // We read 1 byte from the end of the source buffer + sub r3, r14, #1 + sub r12, r12, #1 + ldrb r3, [r3, #0] + sub r2, r10, #1 + cmp r12, #0 + // We write 1 byte at the end of the dest buffer + sub r10, r10, #1 + sub r14, r14, #1 + strb r3, [r2, #0] + bne memcopy_overlapped_non_optim + b memcopy_end + +// r10 = dest_end, r14 = source_end +memcopy_overlapped: + // Are we in the optimized case ? + cmp r0, #0 + beq memcopy_overlapped_non_optim + + // Optimized Overlapped - Read 32 bytes + sub r14, r14, #32 + sub r12, r12, #32 + cmp r12, #31 + ldmia r14, {r2-r9} + + // If length is less than 32 then disable optim + it ls + movls.n r0, #0 + + cmp r12, #0 + + // Optimized Overlapped - Write 32 bytes + sub r10, r10, #32 + stmia r10, {r2-r9} + + // while (length != 0) + bne memcopy_overlapped + b memcopy_end + +memcopy_default_non_optim: + // Byte copy + ldrb r3, [r14], #1 + sub r12, r12, #1 + strb r3, [r10], #1 + +memcopy_default: + cmp r12, #0 + beq memcopy_end + +// r10 = dest, r14 = source +memcopy_default_loop: + cmp r0, #0 + beq memcopy_default_non_optim + + // Optimized memcopy - Read 32 Bytes + sub r12, r12, #32 + cmp r12, #31 + ldmia r14!, {r2-r9} + + // If length is less than 32 then disable optim + it ls + movls.n r0, #0 + + cmp r12, #0 + + // Optimized memcopy - Write 32 Bytes + stmia r10!, {r2-r9} + + // while (length != 0) + bne memcopy_default_loop + +memcopy_end: + mov r0, r11 + pop {r4-r11, pc} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm new file mode 100644 index 0000000000000..f5447405fbf1b --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm @@ -0,0 +1,147 @@ +;------------------------------------------------------------------------------ +; +; CopyMem() worker for ARM +; +; This file started out as C code that did 64 bit moves if the buffer was +; 32-bit aligned, else it does a byte copy. It also does a byte copy for +; any trailing bytes. It was updated to do 32-byte copies using stm/ldm. +; +; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR> +; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> +; This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +;------------------------------------------------------------------------------ + + EXPORT InternalMemCopyMem + AREA SetMem, CODE, READONLY + THUMB + +InternalMemCopyMem + stmfd sp!, {r4-r11, lr} + // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length) + mov r11, r0 + mov r10, r0 + mov r12, r2 + mov r14, r1 + +memcopy_check_overlapped + cmp r11, r1 + // If (dest < source) + bcc memcopy_check_optim_default + + // If (source + length < dest) + rsb r3, r1, r11 + cmp r12, r3 + bcc memcopy_check_optim_default + b memcopy_check_optim_overlap + +memcopy_check_optim_default + // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1) + tst r0, #0xF + movne r0, #0 + bne memcopy_default + tst r1, #0xF + movne r3, #0 + moveq r3, #1 + cmp r2, #31 + movls r0, #0 + andhi r0, r3, #1 + b memcopy_default + +memcopy_check_optim_overlap + // r10 = dest_end, r14 = source_end + add r10, r11, r12 + add r14, r12, r1 + + // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned) + cmp r2, #31 + movls r0, #0 + movhi r0, #1 + tst r10, #0xF + movne r0, #0 + tst r14, #0xF + movne r0, #0 + b memcopy_overlapped + +memcopy_overlapped_non_optim + // We read 1 byte from the end of the source buffer + sub r3, r14, #1 + sub r12, r12, #1 + ldrb r3, [r3, #0] + sub r2, r10, #1 + cmp r12, #0 + // We write 1 byte at the end of the dest buffer + sub r10, r10, #1 + sub r14, r14, #1 + strb r3, [r2, #0] + bne memcopy_overlapped_non_optim + b memcopy_end + +// r10 = dest_end, r14 = source_end +memcopy_overlapped + // Are we in the optimized case ? + cmp r0, #0 + beq memcopy_overlapped_non_optim + + // Optimized Overlapped - Read 32 bytes + sub r14, r14, #32 + sub r12, r12, #32 + cmp r12, #31 + ldmia r14, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized Overlapped - Write 32 bytes + sub r10, r10, #32 + stmia r10, {r2-r9} + + // while (length != 0) + bne memcopy_overlapped + b memcopy_end + +memcopy_default_non_optim + // Byte copy + ldrb r3, [r14], #1 + sub r12, r12, #1 + strb r3, [r10], #1 + +memcopy_default + cmp r12, #0 + beq memcopy_end + +// r10 = dest, r14 = source +memcopy_default_loop + cmp r0, #0 + beq memcopy_default_non_optim + + // Optimized memcopy - Read 32 Bytes + sub r12, r12, #32 + cmp r12, #31 + ldmia r14!, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized memcopy - Write 32 Bytes + stmia r10!, {r2-r9} + + // while (length != 0) + bne memcopy_default_loop + +memcopy_end + mov r0, r11 + ldmfd sp!, {r4-r11, pc} + + END + diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/MemLibGuid.c b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/MemLibGuid.c new file mode 100644 index 0000000000000..b2942f31c3f5c --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/MemLibGuid.c @@ -0,0 +1,165 @@ +/** @file + Implementation of GUID functions for ARM and AARCH64 + + Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR> + Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> + This program and the accompanying materials + are licensed and made available under the terms and conditions of the BSD License + which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php. + + THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +**/ + +#include "MemLibInternals.h" + +/** + Internal function to compare two GUIDs. + + This function compares Guid1 to Guid2. If the GUIDs are identical then TRUE is returned. + If there are any bit differences in the two GUIDs, then FALSE is returned. + + @param Guid1 A pointer to a 128 bit GUID. + @param Guid2 A pointer to a 128 bit GUID. + + @retval TRUE Guid1 and Guid2 are identical. + @retval FALSE Guid1 and Guid2 are not identical. + +**/ +BOOLEAN +EFIAPI +InternalMemCompareGuid ( + IN CONST GUID *Guid1, + IN CONST GUID *Guid2 + ); + +/** + Copies a source GUID to a destination GUID. + + This function copies the contents of the 128-bit GUID specified by SourceGuid to + DestinationGuid, and returns DestinationGuid. + + If DestinationGuid is NULL, then ASSERT(). + If SourceGuid is NULL, then ASSERT(). + + @param DestinationGuid The pointer to the destination GUID. + @param SourceGuid The pointer to the source GUID. + + @return DestinationGuid. + +**/ +GUID * +EFIAPI +CopyGuid ( + OUT GUID *DestinationGuid, + IN CONST GUID *SourceGuid + ) +{ + ASSERT (DestinationGuid != NULL); + ASSERT (SourceGuid != NULL); + + return InternalMemCopyMem (DestinationGuid, SourceGuid, sizeof (GUID)); +} + +/** + Compares two GUIDs. + + This function compares Guid1 to Guid2. If the GUIDs are identical then TRUE is returned. + If there are any bit differences in the two GUIDs, then FALSE is returned. + + If Guid1 is NULL, then ASSERT(). + If Guid2 is NULL, then ASSERT(). + + @param Guid1 A pointer to a 128 bit GUID. + @param Guid2 A pointer to a 128 bit GUID. + + @retval TRUE Guid1 and Guid2 are identical. + @retval FALSE Guid1 and Guid2 are not identical. + +**/ +BOOLEAN +EFIAPI +CompareGuid ( + IN CONST GUID *Guid1, + IN CONST GUID *Guid2 + ) +{ + ASSERT (Guid1 != NULL); + ASSERT (Guid2 != NULL); + + return InternalMemCompareGuid (Guid1, Guid2); +} + +/** + Scans a target buffer for a GUID, and returns a pointer to the matching GUID + in the target buffer. + + This function searches the target buffer specified by Buffer and Length from + the lowest address to the highest address at 128-bit increments for the 128-bit + GUID value that matches Guid. If a match is found, then a pointer to the matching + GUID in the target buffer is returned. If no match is found, then NULL is returned. + If Length is 0, then NULL is returned. + + If Length > 0 and Buffer is NULL, then ASSERT(). + If Buffer is not aligned on a 32-bit boundary, then ASSERT(). + If Length is not aligned on a 128-bit boundary, then ASSERT(). + If Length is greater than (MAX_ADDRESS - Buffer + 1), then ASSERT(). + + @param Buffer The pointer to the target buffer to scan. + @param Length The number of bytes in Buffer to scan. + @param Guid The value to search for in the target buffer. + + @return A pointer to the matching Guid in the target buffer or NULL otherwise. + +**/ +VOID * +EFIAPI +ScanGuid ( + IN CONST VOID *Buffer, + IN UINTN Length, + IN CONST GUID *Guid + ) +{ + CONST GUID *GuidPtr; + + ASSERT (((UINTN)Buffer & (sizeof (Guid->Data1) - 1)) == 0); + ASSERT (Length <= (MAX_ADDRESS - (UINTN)Buffer + 1)); + ASSERT ((Length & (sizeof (*GuidPtr) - 1)) == 0); + + GuidPtr = (GUID*)Buffer; + Buffer = GuidPtr + Length / sizeof (*GuidPtr); + while (GuidPtr < (CONST GUID*)Buffer) { + if (InternalMemCompareGuid (GuidPtr, Guid)) { + return (VOID*)GuidPtr; + } + GuidPtr++; + } + return NULL; +} + +/** + Checks if the given GUID is a zero GUID. + + This function checks whether the given GUID is a zero GUID. If the GUID is + identical to a zero GUID then TRUE is returned. Otherwise, FALSE is returned. + + If Guid is NULL, then ASSERT(). + + @param Guid The pointer to a 128 bit GUID. + + @retval TRUE Guid is a zero GUID. + @retval FALSE Guid is not a zero GUID. + +**/ +BOOLEAN +EFIAPI +IsZeroGuid ( + IN CONST GUID *Guid + ) +{ + ASSERT (Guid != NULL); + + return InternalMemCompareGuid (Guid, NULL); +} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S new file mode 100644 index 0000000000000..5502f286537e9 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S @@ -0,0 +1,148 @@ +// Copyright (c) 2010-2011, Linaro Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// * Neither the name of Linaro Limited nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +// +// Written by Dave Gilbert <david.gilbert@linaro.org> +// +// This memchr routine is optimised on a Cortex-A9 and should work on +// all ARMv7 processors. It has a fast past for short sizes, and has +// an optimised path for large data sets; the worst case is finding the +// match early in a large data set. +// + + +// 2011-02-07 david.gilbert@linaro.org +// Extracted from local git a5b438d861 +// 2011-07-14 david.gilbert@linaro.org +// Import endianness fix from local git ea786f1b +// 2011-12-07 david.gilbert@linaro.org +// Removed unneeded cbz from align loop + +// this lets us check a flag in a 00/ff byte easily in either endianness +#define CHARTSTMASK(c) 1<<(c*8) + + .text + .thumb + .syntax unified + + .type ASM_PFX(InternalMemScanMem8), %function +ASM_GLOBAL ASM_PFX(InternalMemScanMem8) +ASM_PFX(InternalMemScanMem8): + // r0 = start of memory to scan + // r1 = length + // r2 = character to look for + // returns r0 = pointer to character or NULL if not found + uxtb r2, r2 // Don't think we can trust the caller to actually pass a char + + cmp r1, #16 // If it's short don't bother with anything clever + blt 20f + + tst r0, #7 // If it's already aligned skip the next bit + beq 10f + + // Work up to an aligned point +5: + ldrb r3, [r0],#1 + subs r1, r1, #1 + cmp r3, r2 + beq 50f // If it matches exit found + tst r0, #7 + bne 5b // If not aligned yet then do next byte + +10: + // At this point, we are aligned, we know we have at least 8 bytes to work with + push {r4-r7} + orr r2, r2, r2, lsl #8 // expand the match word across to all bytes + orr r2, r2, r2, lsl #16 + bic r4, r1, #7 // Number of double words to work with + mvns r7, #0 // all F's + movs r3, #0 + +15: + ldmia r0!, {r5,r6} + subs r4, r4, #8 + eor r5, r5, r2 // Get it so that r5,r6 have 00's where the bytes match the target + eor r6, r6, r2 + uadd8 r5, r5, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r5, r3, r7 // bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + uadd8 r6, r6, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r6, r5, r7 // chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + cbnz r6, 60f + bne 15b // (Flags from the subs above) If not run out of bytes then go around again + + pop {r4-r7} + and r2, r2, #0xff // Get r2 back to a single character from the expansion above + and r1, r1, #7 // Leave the count remaining as the number after the double words have been done + +20: + cbz r1, 40f // 0 length or hit the end already then not found + +21: // Post aligned section, or just a short call + ldrb r3, [r0], #1 + subs r1, r1, #1 + eor r3, r3, r2 // r3 = 0 if match - doesn't break flags from sub + cbz r3, 50f + bne 21b // on r1 flags + +40: + movs r0, #0 // not found + bx lr + +50: + subs r0, r0, #1 // found + bx lr + +60: // We're here because the fast path found a hit - now we have to track down exactly which word it was + // r0 points to the start of the double word after the one that was tested + // r5 has the 00/ff pattern for the first word, r6 has the chained value + subs r0, r0, #3 + cmp r5, #0 + it eq + moveq.n r5, r6 // the end is in the 2nd word + it ne + subne.n r0, r0, #4 // or 2nd byte of 1st word + + // r0 currently points to the 3rd byte of the word containing the hit + tst r5, #CHARTSTMASK(0) // 1st character + bne 61f + adds r0, r0, #1 + tst r5, #CHARTSTMASK(1) // 2nd character + bne 61f + adds r0, r0 ,#1 + tst r5, #(3 << 15) // 2nd & 3rd character + // If not the 3rd must be the last one + it eq + addeq.n r0, r0, #1 + +61: + pop {r4-r7} + subs r0, r0, #1 + bx lr diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm new file mode 100644 index 0000000000000..bb489f14cf7ee --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm @@ -0,0 +1,147 @@ +; Copyright (c) 2010-2011, Linaro Limited +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; * Neither the name of Linaro Limited nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; + +; +; Written by Dave Gilbert <david.gilbert@linaro.org> +; +; This memchr routine is optimised on a Cortex-A9 and should work on +; all ARMv7 processors. It has a fast past for short sizes, and has +; an optimised path for large data sets; the worst case is finding the +; match early in a large data set. +; + + +; 2011-02-07 david.gilbert@linaro.org +; Extracted from local git a5b438d861 +; 2011-07-14 david.gilbert@linaro.org +; Import endianness fix from local git ea786f1b +; 2011-12-07 david.gilbert@linaro.org +; Removed unneeded cbz from align loop + +; this lets us check a flag in a 00/ff byte easily in either endianness +#define CHARTSTMASK(c) 1<<(c*8) + + EXPORT InternalMemScanMem8 + AREA ScanMem, CODE, READONLY + THUMB + +InternalMemScanMem8 + ; r0 = start of memory to scan + ; r1 = length + ; r2 = character to look for + ; returns r0 = pointer to character or NULL if not found + uxtb r2, r2 ; Don't think we can trust the caller to actually pass a char + + cmp r1, #16 ; If it's short don't bother with anything clever + blt L20 + + tst r0, #7 ; If it's already aligned skip the next bit + beq L10 + + ; Work up to an aligned point +L5 + ldrb r3, [r0],#1 + subs r1, r1, #1 + cmp r3, r2 + beq L50 ; If it matches exit found + tst r0, #7 + bne L5 ; If not aligned yet then do next byte + +L10 + ; At this point, we are aligned, we know we have at least 8 bytes to work with + push {r4-r7} + orr r2, r2, r2, lsl #8 ; expand the match word across to all bytes + orr r2, r2, r2, lsl #16 + bic r4, r1, #7 ; Number of double words to work with + mvns r7, #0 ; all F's + movs r3, #0 + +L15 + ldmia r0!, {r5,r6} + subs r4, r4, #8 + eor r5, r5, r2 ; Get it so that r5,r6 have 00's where the bytes match the target + eor r6, r6, r2 + uadd8 r5, r5, r7 ; Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r5, r3, r7 ; bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + uadd8 r6, r6, r7 ; Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r6, r5, r7 ; chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + cbnz r6, L60 + bne L15 ; (Flags from the subs above) If not run out of bytes then go around again + + pop {r4-r7} + and r2, r2, #0xff ; Get r2 back to a single character from the expansion above + and r1, r1, #7 ; Leave the count remaining as the number after the double words have been done + +L20 + cbz r1, L40 ; 0 length or hit the end already then not found + +L21 ; Post aligned section, or just a short call + ldrb r3, [r0], #1 + subs r1, r1, #1 + eor r3, r3, r2 ; r3 = 0 if match - doesn't break flags from sub + cbz r3, L50 + bne L21 ; on r1 flags + +L40 + movs r0, #0 ; not found + bx lr + +L50 + subs r0, r0, #1 ; found + bx lr + +L60 ; We're here because the fast path found a hit - now we have to track down exactly which word it was + ; r0 points to the start of the double word after the one that was tested + ; r5 has the 00/ff pattern for the first word, r6 has the chained value + cmp r5, #0 + itte eq + moveq r5, r6 ; the end is in the 2nd word + subeq r0, r0, #3 ; Points to 2nd byte of 2nd word + subne r0, r0, #7 ; or 2nd byte of 1st word + + ; r0 currently points to the 3rd byte of the word containing the hit + tst r5, #CHARTSTMASK(0) ; 1st character + bne L61 + adds r0, r0, #1 + tst r5, #CHARTSTMASK(1) ; 2nd character + ittt eq + addeq r0, r0 ,#1 + tsteq r5, #(3 << 15) ; 2nd & 3rd character + ; If not the 3rd must be the last one + addeq r0, r0, #1 + +L61 + pop {r4-r7} + subs r0, r0, #1 + bx lr + + END + diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c new file mode 100644 index 0000000000000..e48db7278aa00 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c @@ -0,0 +1,142 @@ +/** @file + Architecture Independent Base Memory Library Implementation. + + The following BaseMemoryLib instances contain the same copy of this file: + BaseMemoryLib + PeiMemoryLib + UefiMemoryLib + + Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR> + This program and the accompanying materials + are licensed and made available under the terms and conditions of the BSD License + which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php. + + THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +**/ + +#include "../MemLibInternals.h" + +/** + Scans a target buffer for a 16-bit value, and returns a pointer to the + matching 16-bit value in the target buffer. + + @param Buffer The pointer to the target buffer to scan. + @param Length The count of 16-bit value to scan. Must be non-zero. + @param Value The value to search for in the target buffer. + + @return The pointer to the first occurrence, or NULL if not found. + +**/ +CONST VOID * +EFIAPI +InternalMemScanMem16 ( + IN CONST VOID *Buffer, + IN UINTN Length, + IN UINT16 Value + ) +{ + CONST UINT16 *Pointer; + + Pointer = (CONST UINT16*)Buffer; + do { + if (*Pointer == Value) { + return Pointer; + } + ++Pointer; + } while (--Length != 0); + return NULL; +} + +/** + Scans a target buffer for a 32-bit value, and returns a pointer to the + matching 32-bit value in the target buffer. + + @param Buffer The pointer to the target buffer to scan. + @param Length The count of 32-bit value to scan. Must be non-zero. + @param Value The value to search for in the target buffer. + + @return The pointer to the first occurrence, or NULL if not found. + +**/ +CONST VOID * +EFIAPI +InternalMemScanMem32 ( + IN CONST VOID *Buffer, + IN UINTN Length, + IN UINT32 Value + ) +{ + CONST UINT32 *Pointer; + + Pointer = (CONST UINT32*)Buffer; + do { + if (*Pointer == Value) { + return Pointer; + } + ++Pointer; + } while (--Length != 0); + return NULL; +} + +/** + Scans a target buffer for a 64-bit value, and returns a pointer to the + matching 64-bit value in the target buffer. + + @param Buffer The pointer to the target buffer to scan. + @param Length The count of 64-bit value to scan. Must be non-zero. + @param Value The value to search for in the target buffer. + + @return The pointer to the first occurrence, or NULL if not found. + +**/ +CONST VOID * +EFIAPI +InternalMemScanMem64 ( + IN CONST VOID *Buffer, + IN UINTN Length, + IN UINT64 Value + ) +{ + CONST UINT64 *Pointer; + + Pointer = (CONST UINT64*)Buffer; + do { + if (*Pointer == Value) { + return Pointer; + } + ++Pointer; + } while (--Length != 0); + return NULL; +} + +/** + Checks whether the contents of a buffer are all zeros. + + @param Buffer The pointer to the buffer to be checked. + @param Length The size of the buffer (in bytes) to be checked. + + @retval TRUE Contents of the buffer are all zeros. + @retval FALSE Contents of the buffer are not all zeros. + +**/ +BOOLEAN +EFIAPI +InternalMemIsZeroBuffer ( + IN CONST VOID *Buffer, + IN UINTN Length + ) +{ + CONST UINT8 *BufferData; + UINTN Index; + + BufferData = Buffer; + for (Index = 0; Index < Length; Index++) { + if (BufferData[Index] != 0) { + return FALSE; + } + } + return TRUE; +} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S new file mode 100644 index 0000000000000..41cf81a8e20f1 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S @@ -0,0 +1,89 @@ +#------------------------------------------------------------------------------ +# +# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> +# +# This program and the accompanying materials are licensed and made available +# under the terms and conditions of the BSD License which accompanies this +# distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + + .text + .thumb + .syntax unified + .align 5 +ASM_GLOBAL ASM_PFX(InternalMemSetMem16) +ASM_PFX(InternalMemSetMem16): + uxth r2, r2 + lsl r1, r1, #1 + orr r2, r2, r2, lsl #16 + b 0f + +ASM_GLOBAL ASM_PFX(InternalMemSetMem32) +ASM_PFX(InternalMemSetMem32): + lsl r1, r1, #2 + b 0f + +ASM_GLOBAL ASM_PFX(InternalMemSetMem64) +ASM_PFX(InternalMemSetMem64): + lsl r1, r1, #3 + b 1f + + .align 5 +ASM_GLOBAL ASM_PFX(InternalMemSetMem) +ASM_PFX(InternalMemSetMem): + uxtb r2, r2 + orr r2, r2, r2, lsl #8 + orr r2, r2, r2, lsl #16 + b 0f + +ASM_GLOBAL ASM_PFX(InternalMemZeroMem) +ASM_PFX(InternalMemZeroMem): + movs r2, #0 +0: mov r3, r2 + +1: push {r4, lr} + cmp r1, #16 // fewer than 16 bytes of input? + add r1, r1, r0 // r1 := dst + length + add lr, r0, #16 + blt 2f + bic lr, lr, #15 // align output pointer + + str r2, [r0] // potentially unaligned store of 4 bytes + str r3, [r0, #4] // potentially unaligned store of 4 bytes + str r2, [r0, #8] // potentially unaligned store of 4 bytes + str r3, [r0, #12] // potentially unaligned store of 4 bytes + beq 1f + +0: add lr, lr, #16 // advance the output pointer by 16 bytes + subs r4, r1, lr // past the output? + blt 3f // break out of the loop + strd r2, r3, [lr, #-16] // aligned store of 16 bytes + strd r2, r3, [lr, #-8] + bne 0b // goto beginning of loop +1: pop {r4, pc} + +2: subs r4, r1, lr +3: adds r4, r4, #16 + subs r1, r1, #8 + cmp r4, #4 // between 4 and 15 bytes? + blt 4f + cmp r4, #8 // between 8 and 15 bytes? + sub r4, lr, #16 + str r2, [r4] // overlapping store of 4 + (4 + 4) + 4 bytes + it gt + strgt.n r3, [r4, #4] + it gt + strgt.n r2, [r1] + str r3, [r1, #4] + pop {r4, pc} + +4: cmp r4, #2 // 2 or 3 bytes? + strb r2, [lr, #-16] // store 1 byte + it ge + strhge.n r2, [r1, #6] // store 2 bytes + pop {r4, pc} diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm new file mode 100644 index 0000000000000..000c2a22fad60 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm @@ -0,0 +1,96 @@ +;------------------------------------------------------------------------------ +; +; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> +; +; This program and the accompanying materials are licensed and made available +; under the terms and conditions of the BSD License which accompanies this +; distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +;------------------------------------------------------------------------------ + + EXPORT InternalMemZeroMem + EXPORT InternalMemSetMem + EXPORT InternalMemSetMem16 + EXPORT InternalMemSetMem32 + EXPORT InternalMemSetMem64 + + AREA SetMem, CODE, READONLY, CODEALIGN, ALIGN=5 + THUMB + +InternalMemSetMem16 + uxth r2, r2 + lsl r1, r1, #1 + orr r2, r2, r2, lsl #16 + b B0 + +InternalMemSetMem32 + lsl r1, r1, #2 + b B0 + +InternalMemSetMem64 + lsl r1, r1, #3 + b B1 + + ALIGN 32 +InternalMemSetMem + uxtb r2, r2 + orr r2, r2, r2, lsl #8 + orr r2, r2, r2, lsl #16 + b B0 + +InternalMemZeroMem + movs r2, #0 +B0 + mov r3, r2 + +B1 + push {r4, lr} + cmp r1, #16 ; fewer than 16 bytes of input? + add r1, r1, r0 ; r1 := dst + length + add lr, r0, #16 + blt L2 + bic lr, lr, #15 ; align output pointer + + str r2, [r0] ; potentially unaligned store of 4 bytes + str r3, [r0, #4] ; potentially unaligned store of 4 bytes + str r2, [r0, #8] ; potentially unaligned store of 4 bytes + str r3, [r0, #12] ; potentially unaligned store of 4 bytes + beq L1 + +L0 + add lr, lr, #16 ; advance the output pointer by 16 bytes + subs r4, r1, lr ; past the output? + blt L3 ; break out of the loop + strd r2, r3, [lr, #-16] ; aligned store of 16 bytes + strd r2, r3, [lr, #-8] + bne L0 ; goto beginning of loop +L1 + pop {r4, pc} + +L2 + subs r4, r1, lr +L3 + adds r4, r4, #16 + subs r1, r1, #8 + cmp r4, #4 ; between 4 and 15 bytes? + blt L4 + cmp r4, #8 ; between 8 and 15 bytes? + str r2, [lr, #-16] ; overlapping store of 4 + (4 + 4) + 4 bytes + itt gt + strgt r3, [lr, #-12] + strgt r2, [r1] + str r3, [r1, #4] + pop {r4, pc} + +L4 + cmp r4, #2 ; 2 or 3 bytes? + strb r2, [lr, #-16] ; store 1 byte + it ge + strhge r2, [r1, #6] ; store 2 bytes + pop {r4, pc} + + END |