From 416ada0f75bab22b084a1776deb229cd4a669c4d Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 1 Jun 2017 20:58:49 +0000 Subject: Vendor import of clang trunk r304460: https://llvm.org/svn/llvm-project/cfe/trunk@304460 --- test/CodeGen/arm_neon_intrinsics.c | 1238 ++++++++++++++++----------------- test/CodeGen/ubsan-pointer-overflow.m | 171 +++++ 2 files changed, 789 insertions(+), 620 deletions(-) create mode 100644 test/CodeGen/ubsan-pointer-overflow.m (limited to 'test/CodeGen') diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c index ae7c78e08f86..62888dd73339 100644 --- a/test/CodeGen/arm_neon_intrinsics.c +++ b/test/CodeGen/arm_neon_intrinsics.c @@ -3,8 +3,6 @@ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s -// REQUIRES: long-tests - #include // CHECK-LABEL: @test_vaba_s8( @@ -3481,11 +3479,11 @@ float32_t test_vgetq_lane_f32(float32x4_t a) { } // CHECK-LABEL: @test_vgetq_lane_f16( -// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 8 +// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 -// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 8 +// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* -// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 @@ -4543,7 +4541,7 @@ poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) { } // CHECK-LABEL: @test_vld2q_u8( -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> uint8x16x2_t test_vld2q_u8(uint8_t const * a) { @@ -4551,7 +4549,7 @@ uint8x16x2_t test_vld2q_u8(uint8_t const * a) { } // CHECK-LABEL: @test_vld2q_u16( -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> @@ -4560,7 +4558,7 @@ uint16x8x2_t test_vld2q_u16(uint16_t const * a) { } // CHECK-LABEL: @test_vld2q_u32( -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> @@ -4569,7 +4567,7 @@ uint32x4x2_t test_vld2q_u32(uint32_t const * a) { } // CHECK-LABEL: @test_vld2q_s8( -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> int8x16x2_t test_vld2q_s8(int8_t const * a) { @@ -4577,7 +4575,7 @@ int8x16x2_t test_vld2q_s8(int8_t const * a) { } // CHECK-LABEL: @test_vld2q_s16( -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> @@ -4586,7 +4584,7 @@ int16x8x2_t test_vld2q_s16(int16_t const * a) { } // CHECK-LABEL: @test_vld2q_s32( -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> @@ -4595,7 +4593,7 @@ int32x4x2_t test_vld2q_s32(int32_t const * a) { } // CHECK-LABEL: @test_vld2q_f16( -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> @@ -4604,7 +4602,7 @@ float16x8x2_t test_vld2q_f16(float16_t const * a) { } // CHECK-LABEL: @test_vld2q_f32( -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> @@ -4613,7 +4611,7 @@ float32x4x2_t test_vld2q_f32(float32_t const * a) { } // CHECK-LABEL: @test_vld2q_p8( -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> poly8x16x2_t test_vld2q_p8(poly8_t const * a) { @@ -4621,7 +4619,7 @@ poly8x16x2_t test_vld2q_p8(poly8_t const * a) { } // CHECK-LABEL: @test_vld2q_p16( -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> @@ -4840,24 +4838,24 @@ poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) { } // CHECK-LABEL: @test_vld2q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -4867,24 +4865,24 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -4894,24 +4892,24 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -4921,24 +4919,24 @@ int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -4948,24 +4946,24 @@ int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -4975,24 +4973,24 @@ float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> @@ -5002,24 +5000,24 @@ float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) { } // CHECK-LABEL: @test_vld2q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -5284,7 +5282,7 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) { } // CHECK-LABEL: @test_vld3q_u8( -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> uint8x16x3_t test_vld3q_u8(uint8_t const * a) { @@ -5292,7 +5290,7 @@ uint8x16x3_t test_vld3q_u8(uint8_t const * a) { } // CHECK-LABEL: @test_vld3q_u16( -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> @@ -5301,7 +5299,7 @@ uint16x8x3_t test_vld3q_u16(uint16_t const * a) { } // CHECK-LABEL: @test_vld3q_u32( -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> @@ -5310,7 +5308,7 @@ uint32x4x3_t test_vld3q_u32(uint32_t const * a) { } // CHECK-LABEL: @test_vld3q_s8( -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> int8x16x3_t test_vld3q_s8(int8_t const * a) { @@ -5318,7 +5316,7 @@ int8x16x3_t test_vld3q_s8(int8_t const * a) { } // CHECK-LABEL: @test_vld3q_s16( -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> @@ -5327,7 +5325,7 @@ int16x8x3_t test_vld3q_s16(int16_t const * a) { } // CHECK-LABEL: @test_vld3q_s32( -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> @@ -5336,7 +5334,7 @@ int32x4x3_t test_vld3q_s32(int32_t const * a) { } // CHECK-LABEL: @test_vld3q_f16( -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> @@ -5345,7 +5343,7 @@ float16x8x3_t test_vld3q_f16(float16_t const * a) { } // CHECK-LABEL: @test_vld3q_f32( -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> @@ -5354,7 +5352,7 @@ float32x4x3_t test_vld3q_f32(float32_t const * a) { } // CHECK-LABEL: @test_vld3q_p8( -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> poly8x16x3_t test_vld3q_p8(poly8_t const * a) { @@ -5362,7 +5360,7 @@ poly8x16x3_t test_vld3q_p8(poly8_t const * a) { } // CHECK-LABEL: @test_vld3q_p16( -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> @@ -5581,28 +5579,28 @@ poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) { } // CHECK-LABEL: @test_vld3q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -5613,28 +5611,28 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -5645,28 +5643,28 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -5677,28 +5675,28 @@ int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -5709,28 +5707,28 @@ int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -5741,28 +5739,28 @@ float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> @@ -5773,28 +5771,28 @@ float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) { } // CHECK-LABEL: @test_vld3q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -6104,7 +6102,7 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) { } // CHECK-LABEL: @test_vld4q_u8( -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> uint8x16x4_t test_vld4q_u8(uint8_t const * a) { @@ -6112,7 +6110,7 @@ uint8x16x4_t test_vld4q_u8(uint8_t const * a) { } // CHECK-LABEL: @test_vld4q_u16( -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> @@ -6121,7 +6119,7 @@ uint16x8x4_t test_vld4q_u16(uint16_t const * a) { } // CHECK-LABEL: @test_vld4q_u32( -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> @@ -6130,7 +6128,7 @@ uint32x4x4_t test_vld4q_u32(uint32_t const * a) { } // CHECK-LABEL: @test_vld4q_s8( -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> int8x16x4_t test_vld4q_s8(int8_t const * a) { @@ -6138,7 +6136,7 @@ int8x16x4_t test_vld4q_s8(int8_t const * a) { } // CHECK-LABEL: @test_vld4q_s16( -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> @@ -6147,7 +6145,7 @@ int16x8x4_t test_vld4q_s16(int16_t const * a) { } // CHECK-LABEL: @test_vld4q_s32( -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> @@ -6156,7 +6154,7 @@ int32x4x4_t test_vld4q_s32(int32_t const * a) { } // CHECK-LABEL: @test_vld4q_f16( -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> @@ -6165,7 +6163,7 @@ float16x8x4_t test_vld4q_f16(float16_t const * a) { } // CHECK-LABEL: @test_vld4q_f32( -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> @@ -6174,7 +6172,7 @@ float32x4x4_t test_vld4q_f32(float32_t const * a) { } // CHECK-LABEL: @test_vld4q_p8( -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> poly8x16x4_t test_vld4q_p8(poly8_t const * a) { @@ -6182,7 +6180,7 @@ poly8x16x4_t test_vld4q_p8(poly8_t const * a) { } // CHECK-LABEL: @test_vld4q_p16( -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 8 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> @@ -6401,32 +6399,32 @@ poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) { } // CHECK-LABEL: @test_vld4q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -6438,32 +6436,32 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -6475,32 +6473,32 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -6512,32 +6510,32 @@ int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> @@ -6549,32 +6547,32 @@ int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -6586,32 +6584,32 @@ float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> @@ -6623,32 +6621,32 @@ float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) { } // CHECK-LABEL: @test_vld4q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 +// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> @@ -14549,21 +14547,21 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { // CHECK-LABEL: @test_vsetq_lane_f16( // CHECK: [[__REINT_248:%.*]] = alloca half, align 2 -// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 8 -// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 8 +// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 +// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 -// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 8 +// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3 -// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 8 +// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 // CHECK: ret <8 x half> [[TMP8]] float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { return vsetq_lane_f16(*a, b, 3); @@ -16194,20 +16192,20 @@ void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) { } // CHECK-LABEL: @test_vst2q_u8( -// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) // CHECK: ret void void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) { @@ -16215,22 +16213,22 @@ void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) { } // CHECK-LABEL: @test_vst2q_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16241,22 +16239,22 @@ void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -16267,20 +16265,20 @@ void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_s8( -// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) // CHECK: ret void void test_vst2q_s8(int8_t * a, int8x16x2_t b) { @@ -16288,22 +16286,22 @@ void test_vst2q_s8(int8_t * a, int8x16x2_t b) { } // CHECK-LABEL: @test_vst2q_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16314,22 +16312,22 @@ void test_vst2q_s16(int16_t * a, int16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -16340,22 +16338,22 @@ void test_vst2q_s32(int32_t * a, int32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16366,22 +16364,22 @@ void test_vst2q_f16(float16_t * a, float16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -16392,20 +16390,20 @@ void test_vst2q_f32(float32_t * a, float32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_p8( -// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) // CHECK: ret void void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) { @@ -16413,22 +16411,22 @@ void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) { } // CHECK-LABEL: @test_vst2q_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16736,22 +16734,22 @@ void test_vst2_p16(poly16_t * a, poly16x4x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16762,22 +16760,22 @@ void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -16788,22 +16786,22 @@ void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16814,22 +16812,22 @@ void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -16840,22 +16838,22 @@ void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -16866,22 +16864,22 @@ void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -16892,22 +16890,22 @@ void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) { } // CHECK-LABEL: @test_vst2q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* -// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 +// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17163,23 +17161,23 @@ void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) { } // CHECK-LABEL: @test_vst3q_u8( -// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) // CHECK: ret void void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) { @@ -17187,26 +17185,26 @@ void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) { } // CHECK-LABEL: @test_vst3q_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17218,26 +17216,26 @@ void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -17249,23 +17247,23 @@ void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_s8( -// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) // CHECK: ret void void test_vst3q_s8(int8_t * a, int8x16x3_t b) { @@ -17273,26 +17271,26 @@ void test_vst3q_s8(int8_t * a, int8x16x3_t b) { } // CHECK-LABEL: @test_vst3q_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17304,26 +17302,26 @@ void test_vst3q_s16(int16_t * a, int16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -17335,26 +17333,26 @@ void test_vst3q_s32(int32_t * a, int32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17366,26 +17364,26 @@ void test_vst3q_f16(float16_t * a, float16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -17397,23 +17395,23 @@ void test_vst3q_f32(float32_t * a, float32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_p8( -// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) // CHECK: ret void void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) { @@ -17421,26 +17419,26 @@ void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) { } // CHECK-LABEL: @test_vst3q_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17803,26 +17801,26 @@ void test_vst3_p16(poly16_t * a, poly16x4x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17834,26 +17832,26 @@ void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -17865,26 +17863,26 @@ void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17896,26 +17894,26 @@ void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -17927,26 +17925,26 @@ void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -17958,26 +17956,26 @@ void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -17989,26 +17987,26 @@ void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) { } // CHECK-LABEL: @test_vst3q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* -// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8 +// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -18309,26 +18307,26 @@ void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) { } // CHECK-LABEL: @test_vst4q_u8( -// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) // CHECK: ret void void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) { @@ -18336,30 +18334,30 @@ void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) { } // CHECK-LABEL: @test_vst4q_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -18372,30 +18370,30 @@ void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -18408,26 +18406,26 @@ void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_s8( -// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) // CHECK: ret void void test_vst4q_s8(int8_t * a, int8x16x4_t b) { @@ -18435,30 +18433,30 @@ void test_vst4q_s8(int8_t * a, int8x16x4_t b) { } // CHECK-LABEL: @test_vst4q_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -18471,30 +18469,30 @@ void test_vst4q_s16(int16_t * a, int16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -18507,30 +18505,30 @@ void test_vst4q_s32(int32_t * a, int32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -18543,30 +18541,30 @@ void test_vst4q_f16(float16_t * a, float16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -18579,26 +18577,26 @@ void test_vst4q_f32(float32_t * a, float32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_p8( -// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) // CHECK: ret void void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) { @@ -18606,30 +18604,30 @@ void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) { } // CHECK-LABEL: @test_vst4q_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -19047,30 +19045,30 @@ void test_vst4_p16(poly16_t * a, poly16x4x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_u16( -// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -19083,30 +19081,30 @@ void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_u32( -// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -19119,30 +19117,30 @@ void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_s16( -// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -19155,30 +19153,30 @@ void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_s32( -// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> @@ -19191,30 +19189,30 @@ void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_f16( -// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -19227,30 +19225,30 @@ void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_f32( -// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> @@ -19263,30 +19261,30 @@ void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) { } // CHECK-LABEL: @test_vst4q_lane_p16( -// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8 -// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8 +// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 +// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* -// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8 +// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> @@ -20631,7 +20629,7 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { } // CHECK-LABEL: @test_vtrnq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -20641,14 +20639,14 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); } // CHECK-LABEL: @test_vtrnq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -20660,14 +20658,14 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); } // CHECK-LABEL: @test_vtrnq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -20679,14 +20677,14 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); } // CHECK-LABEL: @test_vtrnq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -20696,14 +20694,14 @@ int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); } // CHECK-LABEL: @test_vtrnq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -20715,14 +20713,14 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); } // CHECK-LABEL: @test_vtrnq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -20734,14 +20732,14 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); } // CHECK-LABEL: @test_vtrnq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> @@ -20753,14 +20751,14 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); } // CHECK-LABEL: @test_vtrnq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -20770,14 +20768,14 @@ float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); } // CHECK-LABEL: @test_vtrnq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -20789,7 +20787,7 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); @@ -21125,7 +21123,7 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { } // CHECK-LABEL: @test_vuzpq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21135,14 +21133,14 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); } // CHECK-LABEL: @test_vuzpq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21154,14 +21152,14 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); } // CHECK-LABEL: @test_vuzpq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -21173,14 +21171,14 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); } // CHECK-LABEL: @test_vuzpq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21190,14 +21188,14 @@ int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); } // CHECK-LABEL: @test_vuzpq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21209,14 +21207,14 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); } // CHECK-LABEL: @test_vuzpq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -21228,14 +21226,14 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); } // CHECK-LABEL: @test_vuzpq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> @@ -21247,14 +21245,14 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); } // CHECK-LABEL: @test_vuzpq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21264,14 +21262,14 @@ float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); } // CHECK-LABEL: @test_vuzpq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21283,7 +21281,7 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); @@ -21455,7 +21453,7 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { } // CHECK-LABEL: @test_vzipq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21465,14 +21463,14 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); } // CHECK-LABEL: @test_vzipq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21484,14 +21482,14 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); } // CHECK-LABEL: @test_vzipq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -21503,14 +21501,14 @@ int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); } // CHECK-LABEL: @test_vzipq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21520,14 +21518,14 @@ int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); } // CHECK-LABEL: @test_vzipq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21539,14 +21537,14 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); } // CHECK-LABEL: @test_vzipq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> @@ -21558,14 +21556,14 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); } // CHECK-LABEL: @test_vzipq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> @@ -21577,14 +21575,14 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); } // CHECK-LABEL: @test_vzipq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -21594,14 +21592,14 @@ float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); } // CHECK-LABEL: @test_vzipq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8 +// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> @@ -21613,7 +21611,7 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); diff --git a/test/CodeGen/ubsan-pointer-overflow.m b/test/CodeGen/ubsan-pointer-overflow.m new file mode 100644 index 000000000000..c83d527e624e --- /dev/null +++ b/test/CodeGen/ubsan-pointer-overflow.m @@ -0,0 +1,171 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=pointer-overflow | FileCheck %s + +// CHECK-LABEL: define void @unary_arith +void unary_arith(char *p) { + // CHECK: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 1, !nosanitize + // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 true, i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize + // CHECK-NEXT: [[VALID:%.*]] = and i1 true, [[DIFFVALID]], !nosanitize + // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize + ++p; + + // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize + // CHECK-NEXT: add i64 {{.*}}, -1, !nosanitize + // CHECK: select i1 false{{.*}}, !nosanitize + // CHECK-NEXT: and i1 true{{.*}}, !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + --p; + + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p++; + + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p--; +} + +// CHECK-LABEL: define void @binary_arith +void binary_arith(char *p, int i) { + // CHECK: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %{{.*}}), !nosanitize + // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize + // CHECK-NEXT: [[OFFSETOFLOW:%.*]] = or i1 false, [[SMULOFLOW]], !nosanitize + // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize + // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[OFFSETOFLOW]], true, !nosanitize + // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize + // CHECK-NEXT: [[VALID:%.*]] = and i1 [[OFFSETVALID]], [[DIFFVALID]], !nosanitize + // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize + p + i; + + // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}} + // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]] + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p - i; +} + +// CHECK-LABEL: define void @fixed_len_array +void fixed_len_array(int k) { + // CHECK: getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARR:%.*]], i64 0, i64 [[IDXPROM:%.*]] + // CHECK-NEXT: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 40, i64 [[IDXPROM]]), !nosanitize + // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize + // CHECK-NEXT: [[OFFSETOFLOW:%.*]] = or i1 false, [[SMULOFLOW]], !nosanitize + // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint [10 x [10 x i32]]* [[ARR]] to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize + // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[OFFSETOFLOW]], true, !nosanitize + // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize + // CHECK-NEXT: [[VALID:%.*]] = and i1 [[OFFSETVALID]], [[DIFFVALID]], !nosanitize + // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize + + // CHECK: getelementptr inbounds [10 x i32], [10 x i32]* {{.*}}, i64 0, i64 [[IDXPROM1:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + int arr[10][10]; + arr[k][k]; +} + +// CHECK-LABEL: define void @variable_len_array +void variable_len_array(int n, int k) { + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM1:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + int arr[n][n]; + arr[k][k]; +} + +// CHECK-LABEL: define void @pointer_array +void pointer_array(int **arr, int k) { + // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 {{.*}}), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + arr[k][k]; +} + +struct S1 { + int pad1; + union { + char leaf; + struct S1 *link; + } u; + struct S1 *arr; +}; + +// TODO: Currently, structure GEPs are not checked, so there are several +// potentially unsafe GEPs here which we don't instrument. +// +// CHECK-LABEL: define void @struct_index +void struct_index(struct S1 *p) { + // CHECK: getelementptr inbounds %struct.S1, %struct.S1* [[P:%.*]], i64 10 + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint %struct.S1* [[P]] to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 240, !nosanitize + // CHECK: @__ubsan_handle_pointer_overflow{{.*}} i64 [[BASE]], i64 [[COMPGEP]]) {{.*}}, !nosanitize + + // CHECK-NOT: @__ubsan_handle_pointer_overflow + + p->arr[10].u.link->u.leaf; +} + +typedef void (*funcptr_t)(void); + +// CHECK-LABEL: define void @function_pointer_arith +void function_pointer_arith(funcptr_t *p, int k) { + // CHECK: add i64 {{.*}}, 8, !nosanitize + // CHECK: @__ubsan_handle_pointer_overflow{{.*}} + ++p; + + // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p + k; +} + +// CHECK-LABEL: define void @variable_len_array_arith +void variable_len_array_arith(int n, int k) { + int vla[n]; + int (*p)[n] = &vla; + + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[INC:%.*]] + // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 [[INC]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + ++p; + + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM:%.*]] + // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p + k; +} + +// CHECK-LABEL: define void @objc_id +void objc_id(id *p) { + // CHECK: add i64 {{.*}}, 8, !nosanitize + // CHECK: @__ubsan_handle_pointer_overflow{{.*}} + p++; +} + +// CHECK-LABEL: define void @dont_emit_checks_for_no_op_GEPs +// CHECK-NOT: __ubsan_handle_pointer_overflow +void dont_emit_checks_for_no_op_GEPs(char *p) { + &p[0]; + + int arr[10][10]; + &arr[0][0]; +} -- cgit v1.3