; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" ; This test checks vector GEP before scatter. ; The code bellow crashed due to destroyed SSA while incorrect vectorization of ; the GEP. @d = global [10 x [10 x i32]] zeroinitializer, align 16 @c = external global i32, align 4 @a = external global i32, align 4 @b = external global i64, align 8 ; Function Attrs: norecurse nounwind ssp uwtable define void @_Z3fn1v() #0 { ; CHECK-LABEL: @_Z3fn1v( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ ; CHECK-NEXT: [[SHL:%.*]] = shl i64 %index, 1 ; CHECK-NEXT: %offset.idx = add i64 [[SHL]], 8 ; CHECK-NEXT: [[IND00:%.*]] = add i64 %offset.idx, 0 ; CHECK-NEXT: [[IND02:%.*]] = add i64 %offset.idx, 2 ; CHECK-NEXT: [[IND04:%.*]] = add i64 %offset.idx, 4 ; CHECK-NEXT: [[IND06:%.*]] = add i64 %offset.idx, 6 ; CHECK-NEXT: [[IND08:%.*]] = add i64 %offset.idx, 8 ; CHECK-NEXT: [[IND10:%.*]] = add i64 %offset.idx, 10 ; CHECK-NEXT: [[IND12:%.*]] = add i64 %offset.idx, 12 ; CHECK-NEXT: [[IND14:%.*]] = add i64 %offset.idx, 14 ; CHECK-NEXT: [[IND16:%.*]] = add i64 %offset.idx, 16 ; CHECK-NEXT: [[IND18:%.*]] = add i64 %offset.idx, 18 ; CHECK-NEXT: [[IND20:%.*]] = add i64 %offset.idx, 20 ; CHECK-NEXT: [[IND22:%.*]] = add i64 %offset.idx, 22 ; CHECK-NEXT: [[IND24:%.*]] = add i64 %offset.idx, 24 ; CHECK-NEXT: [[IND26:%.*]] = add i64 %offset.idx, 26 ; CHECK-NEXT: [[IND28:%.*]] = add i64 %offset.idx, 28 ; CHECK-NEXT: [[IND30:%.*]] = add i64 %offset.idx, 30 ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> , [[VEC_IND]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]] ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]] ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]] ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]] ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]] ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3 ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4 ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9 ; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10 ; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11 ; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13 ; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14 ; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15 ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0 ; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0 ; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1 ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP15]], i64 [[TMP65]], i64 0 ; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP18]], i64 [[TMP69]], i64 0 ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP21]], i64 [[TMP73]], i64 0 ; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4 ; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP24]], i64 [[TMP77]], i64 0 ; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5 ; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP27]], i64 [[TMP81]], i64 0 ; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6 ; CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP30]], i64 [[TMP85]], i64 0 ; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7 ; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP33]], i64 [[TMP89]], i64 0 ; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8 ; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP36]], i64 [[TMP93]], i64 0 ; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9 ; CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP39]], i64 [[TMP97]], i64 0 ; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10 ; CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP42]], i64 [[TMP101]], i64 0 ; CHECK-NEXT: [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11 ; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP45]], i64 [[TMP105]], i64 0 ; CHECK-NEXT: [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12 ; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP48]], i64 [[TMP109]], i64 0 ; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13 ; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP51]], i64 [[TMP113]], i64 0 ; CHECK-NEXT: [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14 ; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0 ; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15 ; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0 ; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> ) ; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], ; CHECK: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], entry: %0 = load i32, i32* @c, align 4 %cmp34 = icmp sgt i32 %0, 8 br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup for.body.lr.ph: ; preds = %entry %1 = load i32, i32* @a, align 4 %tobool = icmp eq i32 %1, 0 %2 = load i64, i64* @b, align 8 %mul = mul i64 %2, 4063299859190 %tobool6 = icmp eq i64 %mul, 0 %3 = sext i32 %0 to i64 br i1 %tobool, label %for.body.us.preheader, label %for.body.preheader for.body.preheader: ; preds = %for.body.lr.ph br label %for.body for.body.us.preheader: ; preds = %for.body.lr.ph br label %for.body.us for.body.us: ; preds = %for.body.us.preheader, %for.cond.cleanup4.us-lcssa.us.us %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ] %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ] %4 = sub nsw i64 8, %indvars.iv78 %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78 %5 = add nsw i64 %4, %indvars.iv70 %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0 br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader for.body5.us.us48.preheader: ; preds = %for.body.us store i32 8, i32* %arraydecay.us.us.us, align 16 %indvars.iv.next66 = or i64 %indvars.iv70, 1 %6 = add nsw i64 %4, %indvars.iv.next66 %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0 store i32 8, i32* %arraydecay.us.us55.1, align 8 br label %for.cond.cleanup4.us-lcssa.us.us for.body5.us.us.us.preheader: ; preds = %for.body.us store i32 7, i32* %arraydecay.us.us.us, align 16 %indvars.iv.next73 = or i64 %indvars.iv70, 1 %7 = add nsw i64 %4, %indvars.iv.next73 %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0 store i32 7, i32* %arraydecay.us.us.us.1, align 8 br label %for.cond.cleanup4.us-lcssa.us.us for.cond.cleanup4.us-lcssa.us.us: ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader %indvars.iv.next79 = add nuw nsw i64 %indvars.iv78, 2 %cmp.us = icmp slt i64 %indvars.iv.next79, %3 %indvars.iv.next71 = add nuw nsw i64 %indvars.iv70, 2 br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup4.us-lcssa.us.us br label %for.cond.cleanup for.cond.cleanup.loopexit99: ; preds = %for.body br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit99, %for.cond.cleanup.loopexit, %entry ret void for.body: ; preds = %for.body.preheader, %for.body %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ] %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ] %8 = sub nsw i64 8, %indvars.iv95 %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95 %9 = add nsw i64 %8, %indvars.iv87 %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0 store i32 8, i32* %arraydecay.us31, align 16 %indvars.iv.next90 = or i64 %indvars.iv87, 1 %10 = add nsw i64 %8, %indvars.iv.next90 %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0 store i32 8, i32* %arraydecay.us31.1, align 8 %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2 %cmp = icmp slt i64 %indvars.iv.next96, %3 %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2 br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99 } attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }