; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -interleaved-access -S | FileCheck %s ; This file tests the function `llvm::lowerInterleavedLoad/Store`. define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { ; CHECK-LABEL: @load_factorf64_4( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* %ptr to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = fadd <4 x double> [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[ADD2:%.*]] = fadd <4 x double> [[ADD1]], [[TMP15]] ; CHECK-NEXT: [[ADD3:%.*]] = fadd <4 x double> [[ADD2]], [[TMP17]] ; CHECK-NEXT: ret <4 x double> [[ADD3]] ; %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v1 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v2 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %add1 = fadd <4 x double> %strided.v0, %strided.v1 %add2 = fadd <4 x double> %add1, %strided.v2 %add3 = fadd <4 x double> %add2, %strided.v3 ret <4 x double> %add3 } define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { ; CHECK-LABEL: @load_factori64_4( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i64>* %ptr to <4 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, <4 x i64>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <4 x i64> [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[ADD2:%.*]] = add <4 x i64> [[ADD1]], [[TMP15]] ; CHECK-NEXT: [[ADD3:%.*]] = add <4 x i64> [[ADD2]], [[TMP17]] ; CHECK-NEXT: ret <4 x i64> [[ADD3]] ; %wide.vec = load <16 x i64>, <16 x i64>* %ptr, align 16 %strided.v0 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> %strided.v1 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> %strided.v2 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> %strided.v3 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> %add1 = add <4 x i64> %strided.v0, %strided.v1 %add2 = add <4 x i64> %add1, %strided.v2 %add3 = add <4 x i64> %add2, %strided.v3 ret <4 x i64> %add3 } define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { ; CHECK-LABEL: @load_factorf64_1( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* %ptr to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[TMP14]], [[TMP14]] ; CHECK-NEXT: ret <4 x double> [[MUL]] ; %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %mul = fmul <4 x double> %strided.v0, %strided.v3 ret <4 x double> %mul } define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { ; CHECK-LABEL: @store_factorf64_4( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> ; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 ret void } define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) { ; CHECK-LABEL: @store_factori64_4( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <16 x i32> ; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], <16 x i64>* [[PTR:%.*]], align 16 ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16 ret void } define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { ; CHECK-LABEL: @store_factorf64_4_revMask( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> ; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 ret void } define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) { ; CHECK-LABEL: @store_factorf64_4_arbitraryMask( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <16 x i32> ; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 ; CHECK-NEXT: ret void ; %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 ret void }