aboutsummaryrefslogtreecommitdiff
path: root/math
diff options
context:
space:
mode:
authorYuri Victorovich <yuri@FreeBSD.org>2020-09-21 17:19:26 +0000
committerYuri Victorovich <yuri@FreeBSD.org>2020-09-21 17:19:26 +0000
commit56b91bfece0d9424b0514504d831ed59a88dbcaf (patch)
tree632de7035452f97aef42755e018a7021c027d672 /math
parentf870f6d380e69f6adb327d1a293e97cebc75a224 (diff)
downloadports-56b91bfece0d9424b0514504d831ed59a88dbcaf.tar.gz
ports-56b91bfece0d9424b0514504d831ed59a88dbcaf.zip
math/libxsmm: Update 1.10 -> 1.16.1
Notes
Notes: svn path=/head/; revision=549464
Diffstat (limited to 'math')
-rw-r--r--math/libxsmm/Makefile6
-rw-r--r--math/libxsmm/distinfo6
-rw-r--r--math/libxsmm/files/patch-Makefile15
-rw-r--r--math/libxsmm/pkg-plist291
4 files changed, 147 insertions, 171 deletions
diff --git a/math/libxsmm/Makefile b/math/libxsmm/Makefile
index c92fefe71b37..1e9f35e67580 100644
--- a/math/libxsmm/Makefile
+++ b/math/libxsmm/Makefile
@@ -1,8 +1,7 @@
# $FreeBSD$
PORTNAME= libxsmm
-DISTVERSION= 1.10
-PORTREVISION= 6
+DISTVERSION= 1.16.1
CATEGORIES= math science
MAINTAINER= yuri@FreeBSD.org
@@ -27,4 +26,7 @@ LDFLAGS+= -lm
BINARY_ALIAS= python=${PYTHON_CMD}
+post-install: # https://github.com/hfp/libxsmm/issues/413
+ ${RMDIR} ${STAGEDIR}${PREFIX}/share/modules
+
.include <bsd.port.mk>
diff --git a/math/libxsmm/distinfo b/math/libxsmm/distinfo
index 24d5c4934b75..bdeca8009303 100644
--- a/math/libxsmm/distinfo
+++ b/math/libxsmm/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1542251689
-SHA256 (hfp-libxsmm-1.10_GH0.tar.gz) = 2904f7983719fd5c5af081121c1d028d45b10b854aec9a9e67996a0602631abc
-SIZE (hfp-libxsmm-1.10_GH0.tar.gz) = 2856599
+TIMESTAMP = 1600706738
+SHA256 (hfp-libxsmm-1.16.1_GH0.tar.gz) = 93dc7a3ec40401988729ddb2c6ea2294911261f7e6cd979cf061b5c3691d729d
+SIZE (hfp-libxsmm-1.16.1_GH0.tar.gz) = 2977275
diff --git a/math/libxsmm/files/patch-Makefile b/math/libxsmm/files/patch-Makefile
index 1ede558698c0..307e238c1555 100644
--- a/math/libxsmm/files/patch-Makefile
+++ b/math/libxsmm/files/patch-Makefile
@@ -1,11 +1,20 @@
---- Makefile.orig 2018-11-15 03:24:29 UTC
+--- Makefile.orig 2020-06-26 14:58:46 UTC
+++ Makefile
-@@ -1683,7 +1683,7 @@ endif
+@@ -27,6 +27,8 @@ CFLAGS = $(RPM_OPT_FLAGS)
+ CXXFLAGS = $(RPM_OPT_FLAGS)
+ FCFLAGS = $(RPM_OPT_FLAGS)
+
++CFLAGS += -fcommon # multiple definition of `libxsmm_scratch_pools' (and other symbols) https://github.com/hfp/libxsmm/issues/412
++
+ # THRESHOLD problem size (M x N x K) determining when to use BLAS
+ # A value of zero (0) populates a default threshold
+ THRESHOLD ?= 0
+@@ -1611,7 +1613,7 @@ endif
endif
.PHONY: install-all
-install-all: install samples
+install-all: install
- ifneq ($(abspath $(INSTALL_ROOT)),$(abspath .))
+ ifneq ($(PREFIX),$(ABSDIR))
@echo
@echo "LIBXSMM installing samples..."
diff --git a/math/libxsmm/pkg-plist b/math/libxsmm/pkg-plist
index 2de39ce88c16..643b1cd6dbdd 100644
--- a/math/libxsmm/pkg-plist
+++ b/math/libxsmm/pkg-plist
@@ -1,44 +1,20 @@
-bin/libxsmm_conv_generator
-bin/libxsmm_convwino_generator
bin/libxsmm_gemm_generator
include/libxsmm.f
include/libxsmm.h
include/libxsmm.mod
include/libxsmm/generator_common.c
include/libxsmm/generator_common.h
-include/libxsmm/generator_convolution.c
-include/libxsmm/generator_convolution_common.c
-include/libxsmm/generator_convolution_common.h
-include/libxsmm/generator_convolution_forward_avx512.c
-include/libxsmm/generator_convolution_forward_avx512.h
-include/libxsmm/generator_convolution_weight_update_avx512.c
-include/libxsmm/generator_convolution_weight_update_avx512.h
-include/libxsmm/generator_convolution_winograd.c
-include/libxsmm/generator_convolution_winograd_forward_avx512.c
-include/libxsmm/generator_convolution_winograd_forward_avx512.h
-include/libxsmm/generator_convolution_winograd_weight_update_avx512.c
-include/libxsmm/generator_convolution_winograd_weight_update_avx512.h
include/libxsmm/generator_gemm.c
include/libxsmm/generator_gemm_avx2_microkernel.c
include/libxsmm/generator_gemm_avx2_microkernel.h
include/libxsmm/generator_gemm_avx512_microkernel.c
include/libxsmm/generator_gemm_avx512_microkernel.h
-include/libxsmm/generator_gemm_avx512_microkernel_nofsdbcst.c
-include/libxsmm/generator_gemm_avx512_microkernel_nofsdbcst.h
include/libxsmm/generator_gemm_avx_microkernel.c
include/libxsmm/generator_gemm_avx_microkernel.h
include/libxsmm/generator_gemm_common.c
include/libxsmm/generator_gemm_common.h
-include/libxsmm/generator_gemm_imci_avx512.c
-include/libxsmm/generator_gemm_imci_avx512.h
-include/libxsmm/generator_gemm_imci_microkernel.c
-include/libxsmm/generator_gemm_imci_microkernel.h
include/libxsmm/generator_gemm_noarch.c
include/libxsmm/generator_gemm_noarch.h
-include/libxsmm/generator_gemm_rm_ac_soa.c
-include/libxsmm/generator_gemm_rm_ac_soa.h
-include/libxsmm/generator_gemm_rm_bc_soa.c
-include/libxsmm/generator_gemm_rm_bc_soa.h
include/libxsmm/generator_gemm_sse3_avx_avx2_avx512.c
include/libxsmm/generator_gemm_sse3_avx_avx2_avx512.h
include/libxsmm/generator_gemm_sse3_microkernel.c
@@ -46,10 +22,22 @@ include/libxsmm/generator_gemm_sse3_microkernel.h
include/libxsmm/generator_matcopy.c
include/libxsmm/generator_matcopy_avx_avx512.c
include/libxsmm/generator_matcopy_avx_avx512.h
+include/libxsmm/generator_mateltwise.c
+include/libxsmm/generator_mateltwise_avx_avx512.c
+include/libxsmm/generator_mateltwise_avx_avx512.h
+include/libxsmm/generator_packed.c
include/libxsmm/generator_packed_aux.h
+include/libxsmm/generator_packed_gemm_ac_rm_avx_avx2_avx512.c
+include/libxsmm/generator_packed_gemm_ac_rm_avx_avx2_avx512.h
+include/libxsmm/generator_packed_gemm_avx_avx512.c
+include/libxsmm/generator_packed_gemm_avx_avx512.h
+include/libxsmm/generator_packed_gemm_bc_rm_avx_avx2_avx512.c
+include/libxsmm/generator_packed_gemm_bc_rm_avx_avx2_avx512.h
+include/libxsmm/generator_packed_gemmnn.h
+include/libxsmm/generator_packed_getrf_avx_avx512.c
+include/libxsmm/generator_packed_getrf_avx_avx512.h
include/libxsmm/generator_packed_trmm_avx_avx512.c
include/libxsmm/generator_packed_trmm_avx_avx512.h
-include/libxsmm/generator_packed_trsm.c
include/libxsmm/generator_packed_trsm_avx_avx512.c
include/libxsmm/generator_packed_trsm_avx_avx512.h
include/libxsmm/generator_packed_trsm_dmacros.h
@@ -61,6 +49,8 @@ include/libxsmm/generator_spgemm_csc_bsparse.c
include/libxsmm/generator_spgemm_csc_bsparse.h
include/libxsmm/generator_spgemm_csc_bsparse_soa.c
include/libxsmm/generator_spgemm_csc_bsparse_soa.h
+include/libxsmm/generator_spgemm_csc_csparse_soa.c
+include/libxsmm/generator_spgemm_csc_csparse_soa.h
include/libxsmm/generator_spgemm_csc_reader.c
include/libxsmm/generator_spgemm_csc_reader.h
include/libxsmm/generator_spgemm_csr_asparse.c
@@ -78,41 +68,38 @@ include/libxsmm/generator_transpose_avx_avx512.c
include/libxsmm/generator_transpose_avx_avx512.h
include/libxsmm/generator_x86_instructions.c
include/libxsmm/generator_x86_instructions.h
-include/libxsmm/libxsmm_bgemm.c
-include/libxsmm/libxsmm_bgemm_types.h
+include/libxsmm/libxsmm_blocked_gemm.c
+include/libxsmm/libxsmm_blocked_gemm_types.h
include/libxsmm/libxsmm_cpuid_x86.c
+include/libxsmm/libxsmm_diff.h
include/libxsmm/libxsmm_dnn.c
+include/libxsmm/libxsmm_dnn_convolution.c
include/libxsmm/libxsmm_dnn_convolution_backward.c
include/libxsmm/libxsmm_dnn_convolution_backward.h
include/libxsmm/libxsmm_dnn_convolution_forward.c
include/libxsmm/libxsmm_dnn_convolution_forward.h
include/libxsmm/libxsmm_dnn_convolution_weight_update.c
include/libxsmm/libxsmm_dnn_convolution_weight_update.h
-include/libxsmm/libxsmm_dnn_convolution_winograd_backward.c
-include/libxsmm/libxsmm_dnn_convolution_winograd_backward.h
-include/libxsmm/libxsmm_dnn_convolution_winograd_forward.c
-include/libxsmm/libxsmm_dnn_convolution_winograd_forward.h
-include/libxsmm/libxsmm_dnn_convolution_winograd_weight_update.c
-include/libxsmm/libxsmm_dnn_convolution_winograd_weight_update.h
-include/libxsmm/libxsmm_dnn_dryruns.c
-include/libxsmm/libxsmm_dnn_dryruns.h
include/libxsmm/libxsmm_dnn_elementwise.c
include/libxsmm/libxsmm_dnn_elementwise.h
include/libxsmm/libxsmm_dnn_fullyconnected.c
-include/libxsmm/libxsmm_dnn_fullyconnected_backward.c
-include/libxsmm/libxsmm_dnn_fullyconnected_backward.h
+include/libxsmm/libxsmm_dnn_fullyconnected_backward_weight_update.c
+include/libxsmm/libxsmm_dnn_fullyconnected_backward_weight_update.h
include/libxsmm/libxsmm_dnn_fullyconnected_forward.c
include/libxsmm/libxsmm_dnn_fullyconnected_forward.h
-include/libxsmm/libxsmm_dnn_fullyconnected_weight_update.c
-include/libxsmm/libxsmm_dnn_fullyconnected_weight_update.h
include/libxsmm/libxsmm_dnn_fusedbatchnorm.c
include/libxsmm/libxsmm_dnn_fusedbatchnorm_backward.c
include/libxsmm/libxsmm_dnn_fusedbatchnorm_backward.h
include/libxsmm/libxsmm_dnn_fusedbatchnorm_forward.c
include/libxsmm/libxsmm_dnn_fusedbatchnorm_forward.h
-include/libxsmm/libxsmm_dnn_grucell.c
-include/libxsmm/libxsmm_dnn_handle.c
-include/libxsmm/libxsmm_dnn_handle.h
+include/libxsmm/libxsmm_dnn_fusedgroupnorm.c
+include/libxsmm/libxsmm_dnn_fusedgroupnorm_backward.c
+include/libxsmm/libxsmm_dnn_fusedgroupnorm_backward.h
+include/libxsmm/libxsmm_dnn_fusedgroupnorm_forward.c
+include/libxsmm/libxsmm_dnn_fusedgroupnorm_forward.h
+include/libxsmm/libxsmm_dnn_optimizer.c
+include/libxsmm/libxsmm_dnn_optimizer_sgd.c
+include/libxsmm/libxsmm_dnn_optimizer_sgd.h
include/libxsmm/libxsmm_dnn_pooling.c
include/libxsmm/libxsmm_dnn_pooling_backward.c
include/libxsmm/libxsmm_dnn_pooling_backward.h
@@ -123,19 +110,21 @@ include/libxsmm/libxsmm_dnn_rnncell_backward_weight_update.c
include/libxsmm/libxsmm_dnn_rnncell_backward_weight_update.h
include/libxsmm/libxsmm_dnn_rnncell_forward.c
include/libxsmm/libxsmm_dnn_rnncell_forward.h
-include/libxsmm/libxsmm_dnn_setup.c
-include/libxsmm/libxsmm_dnn_setup.h
+include/libxsmm/libxsmm_dnn_softmaxloss.c
+include/libxsmm/libxsmm_dnn_softmaxloss_backward.c
+include/libxsmm/libxsmm_dnn_softmaxloss_backward.h
+include/libxsmm/libxsmm_dnn_softmaxloss_forward.c
+include/libxsmm/libxsmm_dnn_softmaxloss_forward.h
+include/libxsmm/libxsmm_dnn_tensor.c
include/libxsmm/libxsmm_ext.c
include/libxsmm/libxsmm_ext.h
-include/libxsmm/libxsmm_ext_bgemm.c
+include/libxsmm/libxsmm_ext_blocked_gemm.c
include/libxsmm/libxsmm_ext_gemm.c
-include/libxsmm/libxsmm_ext_trans.c
+include/libxsmm/libxsmm_ext_xcopy.c
include/libxsmm/libxsmm_fsspmdm.c
include/libxsmm/libxsmm_gemm.c
include/libxsmm/libxsmm_gemm.h
include/libxsmm/libxsmm_generator.c
-include/libxsmm/libxsmm_generator_convolution_driver.c
-include/libxsmm/libxsmm_generator_convolution_winograd_driver.c
include/libxsmm/libxsmm_generator_gemm_driver.c
include/libxsmm/libxsmm_hash.c
include/libxsmm/libxsmm_hash.h
@@ -143,10 +132,12 @@ include/libxsmm/libxsmm_main.c
include/libxsmm/libxsmm_main.h
include/libxsmm/libxsmm_malloc.c
include/libxsmm/libxsmm_math.c
+include/libxsmm/libxsmm_memory.c
include/libxsmm/libxsmm_mhd.c
include/libxsmm/libxsmm_perf.c
include/libxsmm/libxsmm_perf.h
include/libxsmm/libxsmm_python.c
+include/libxsmm/libxsmm_rng.c
include/libxsmm/libxsmm_spmdm.c
include/libxsmm/libxsmm_spmdm_begin.h
include/libxsmm/libxsmm_spmdm_begin_avx2.h
@@ -156,127 +147,90 @@ include/libxsmm/libxsmm_sync.c
include/libxsmm/libxsmm_timer.c
include/libxsmm/libxsmm_trace.c
include/libxsmm/libxsmm_trace.h
-include/libxsmm/libxsmm_trans.c
-include/libxsmm/libxsmm_trans.h
+include/libxsmm/libxsmm_xcopy.c
+include/libxsmm/libxsmm_xcopy.h
include/libxsmm/perf_jitdump.h
-include/libxsmm/template/kernel_repeat.tpl.c
include/libxsmm/template/libxsmm.f
include/libxsmm/template/libxsmm.h
-include/libxsmm/template/libxsmm_bgemm.tpl.c
-include/libxsmm/template/libxsmm_bgemm_convert_b_to_a.tpl.c
-include/libxsmm/template/libxsmm_bgemm_copyin_a.tpl.c
-include/libxsmm/template/libxsmm_bgemm_copyin_b.tpl.c
-include/libxsmm/template/libxsmm_bgemm_copyin_c.tpl.c
-include/libxsmm/template/libxsmm_bgemm_copyout_c.tpl.c
-include/libxsmm/template/libxsmm_bgemm_transpose_b.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_convert_b_to_a.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_copyin_a.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_copyin_b.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_copyin_c.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_copyout_c.tpl.c
+include/libxsmm/template/libxsmm_blocked_gemm_transpose_b.tpl.c
include/libxsmm/template/libxsmm_config.h
-include/libxsmm/template/libxsmm_dnn_bwd_custom_custom_padding.tpl.c
-include/libxsmm/template/libxsmm_dnn_bwd_custom_custom_padding_img_par.tpl.c
-include/libxsmm/template/libxsmm_dnn_bwd_fuse_postconv_ops_externally.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_inlined_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_input_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_output_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_output_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_custom_custom_output_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_inlined_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_input_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_output_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_output_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_nhwc_custom_output_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_weight_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_backward_weight_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_inlined_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_input_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_output_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_output_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_custom_custom_output_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_inlined_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_input_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_output_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_output_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_nhwc_custom_output_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_weight_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_weight_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_forward_weight_trans_alpha6_avx512.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_deloutput_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_deloutput_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_inlined_knm.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_custom_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_delweight_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_delweight_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_deloutput_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_deloutput_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_inlined.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_inlined_knm.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_input_trans_alpha4.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolution_winograd_weight_update_nhwc_custom_input_trans_alpha6.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_custom_custom.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_custom_custom_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_custom_custom_img_par.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_nhwc_custom.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_nhwc_custom_img_par.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_nhwc_rsck.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_fwd_nhwc_rsck_img_par.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_upd_custom_custom.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_upd_custom_custom_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_upd_custom_custom_fma_opt.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_upd_custom_custom_opt.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_dryrun_upd_custom_custom_opt_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_custom_custom_2.tpl.c
+include/libxsmm/template/libxsmm_dnn_bf16_macros_define.tpl.c
+include/libxsmm/template/libxsmm_dnn_bf16_macros_undefine.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_custom_custom_fallback_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_custom_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_custom_custom_generic_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_fallback_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_bwd_via_fwd_custom_custom_stream.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_2.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_generic_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_stream.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i32.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i8.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_nhwc_custom-rsck_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_fwd_stream.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_2.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_fma_opt.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_lp.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_opt.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_opt_bf16.tpl.c
-include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_stream_opt_lp.tpl.c
+include/libxsmm/template/libxsmm_dnn_convolve_st_upd_custom_custom_generic_bf16.tpl.c
include/libxsmm/template/libxsmm_dnn_convolve_st_upd_nhwc_custom-rsck_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_fullyconnected_st_bwd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fullyconnected_st_bwdupd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fullyconnected_st_bwdupd_ncnc_kcck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fullyconnected_st_bwdupd_ncnc_kcck_generic_bf16.tpl.c
include/libxsmm/template/libxsmm_dnn_fullyconnected_st_fwd_custom_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_fullyconnected_st_upd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic_bf16.tpl.c
include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_bwd_custom_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
include/libxsmm/template/libxsmm_dnn_fusedbatchnorm_st_fwd_custom_generic.tpl.c
-include/libxsmm/template/libxsmm_dnn_fwd_custom_custom_bias.tpl.c
-include/libxsmm/template/libxsmm_dnn_fwd_custom_custom_bias_img_par.tpl.c
-include/libxsmm/template/libxsmm_dnn_fwd_custom_custom_padding.tpl.c
-include/libxsmm/template/libxsmm_dnn_fwd_custom_custom_padding_img_par.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_bwd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_fusedgroupnorm_st_fwd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_optimizer_sgd_st_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
include/libxsmm/template/libxsmm_dnn_pooling_st_bwd_custom_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
+include/libxsmm/template/libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
include/libxsmm/template/libxsmm_dnn_pooling_st_fwd_custom_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_gru_bwdupd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_gru_bwdupd_nc_kcck.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c
include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core_bf16.tpl.c
include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c
include/libxsmm/template/libxsmm_dnn_rnncell_st_rnn_bwdupd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_rnn_bwdupd_nc_kcck.tpl.c
include/libxsmm/template/libxsmm_dnn_rnncell_st_rnn_fwd_nc_ck_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c
+include/libxsmm/template/libxsmm_dnn_rnncell_st_rnn_fwd_ncnc_kcck.tpl.c
+include/libxsmm/template/libxsmm_dnn_softmaxloss_st_bwd_ncnc_generic.tpl.c
+include/libxsmm/template/libxsmm_dnn_softmaxloss_st_fwd_ncnc_generic.tpl.c
include/libxsmm/template/libxsmm_dnn_tensor_bias_copy_in_nchw.tpl.c
include/libxsmm/template/libxsmm_dnn_tensor_bias_copy_out_nchw.tpl.c
include/libxsmm/template/libxsmm_dnn_tensor_buffer_copy_in_nchw.tpl.c
@@ -285,26 +239,33 @@ include/libxsmm/template/libxsmm_dnn_tensor_filter_copy_in_kcrs.tpl.c
include/libxsmm/template/libxsmm_dnn_tensor_filter_copy_out_kcrs.tpl.c
include/libxsmm/template/libxsmm_dnn_zero_rim_st_input_custom.tpl.c
include/libxsmm/template/libxsmm_dnn_zero_rim_st_input_nhwc.tpl.c
+include/libxsmm/template/libxsmm_internal_gru_bwdupd_fused_eltwise_1.tpl.c
+include/libxsmm/template/libxsmm_internal_gru_bwdupd_fused_eltwise_2.tpl.c
+include/libxsmm/template/libxsmm_internal_lstm_bwdupd_fused_eltwise.tpl.c
+include/libxsmm/template/libxsmm_internal_lstm_bwdupd_fused_eltwise_reformat.tpl.c
+include/libxsmm/template/libxsmm_internal_lstm_bwdupd_fused_eltwise_reformat_bf16.tpl.c
+include/libxsmm/template/libxsmm_internal_lstm_fwd_fused_eltwise.tpl.c
+include/libxsmm/template/libxsmm_internal_lstm_fwd_fused_eltwise_bf16.tpl.c
include/libxsmm/template/libxsmm_matdiff.tpl.c
include/libxsmm/template/libxsmm_spmdm_compute_bfloat16_thread.tpl.c
include/libxsmm/template/libxsmm_spmdm_compute_fp32_thread.tpl.c
include/libxsmm/template/libxsmm_spmdm_createSparseSlice_bfloat16_thread.tpl.c
include/libxsmm/template/libxsmm_spmdm_createSparseSlice_fp32_thread.tpl.c
+include/libxsmm/template/libxsmm_version.h
include/libxsmm/template/transpose.tpl.c
-include/libxsmm/template/transpose_lp_input.tpl.c
-include/libxsmm/template/transpose_lp_input_remainder.tpl.c
-include/libxsmm/template/transpose_lp_input_resizer.tpl.c
-include/libxsmm/template/transpose_lp_input_resizer_remainder.tpl.c
-include/libxsmm/template/transpose_lp_output.tpl.c
-include/libxsmm_bgemm.h
+include/libxsmm_blocked_gemm.h
include/libxsmm_config.h
include/libxsmm_cpuid.h
include/libxsmm_dnn.h
+include/libxsmm_dnn_convolution.h
include/libxsmm_dnn_fullyconnected.h
include/libxsmm_dnn_fusedbatchnorm.h
-include/libxsmm_dnn_grucell.h
+include/libxsmm_dnn_fusedgroupnorm.h
+include/libxsmm_dnn_optimizer.h
include/libxsmm_dnn_pooling.h
include/libxsmm_dnn_rnncell.h
+include/libxsmm_dnn_softmaxloss.h
+include/libxsmm_dnn_tensor.h
include/libxsmm_frontend.h
include/libxsmm_fsspmdm.h
include/libxsmm_generator.h
@@ -312,41 +273,47 @@ include/libxsmm_intrinsics_x86.h
include/libxsmm_macros.h
include/libxsmm_malloc.h
include/libxsmm_math.h
+include/libxsmm_memory.h
include/libxsmm_mhd.h
+include/libxsmm_rng.h
include/libxsmm_source.h
include/libxsmm_spmdm.h
include/libxsmm_sync.h
include/libxsmm_timer.h
include/libxsmm_typedefs.h
-lib/libxsmm.pc
+include/libxsmm_version.h
lib/libxsmm.so
lib/libxsmm.so.1
-lib/libxsmm.so.1.10.0
-lib/libxsmmext.pc
+lib/libxsmm.so.1.16.1
lib/libxsmmext.so
lib/libxsmmext.so.1
-lib/libxsmmext.so.1.10.0
-lib/libxsmmf.pc
+lib/libxsmmext.so.1.16.1
lib/libxsmmf.so
lib/libxsmmf.so.1
-lib/libxsmmf.so.1.10.0
+lib/libxsmmf.so.1.16.1
lib/libxsmmgen.so
lib/libxsmmgen.so.1
-lib/libxsmmgen.so.1.10.0
+lib/libxsmmgen.so.1.16.1
lib/libxsmmnoblas.so
lib/libxsmmnoblas.so.1
-lib/libxsmmnoblas.so.1.10.0
+lib/libxsmmnoblas.so.1.16.1
+libdata/pkgconfig/libxsmm.pc
+libdata/pkgconfig/libxsmmext.pc
+libdata/pkgconfig/libxsmmf.pc
+libdata/pkgconfig/libxsmmnoblas.pc
%%DATADIR%%/CONTRIBUTING.md
%%DATADIR%%/LICENSE.md
%%DATADIR%%/README.md
+%%DATADIR%%/SECURITY.md
+%%DATADIR%%/build.txt
%%DATADIR%%/cp2k.md
-%%DATADIR%%/cp2k.pdf
%%DATADIR%%/gxm.md
%%DATADIR%%/index.md
%%DATADIR%%/libxsmm.pdf
%%DATADIR%%/libxsmm_aux.md
%%DATADIR%%/libxsmm_be.md
%%DATADIR%%/libxsmm_dl.md
+%%DATADIR%%/libxsmm_fortran.md
%%DATADIR%%/libxsmm_mm.md
%%DATADIR%%/libxsmm_prof.md
%%DATADIR%%/libxsmm_samples.md
@@ -354,6 +321,4 @@ lib/libxsmmnoblas.so.1.10.0
%%DATADIR%%/libxsmm_tune.md
%%DATADIR%%/tensorflow.md
%%DATADIR%%/tensorflow.pdf
-%%DATADIR%%/tfserving.md
-%%DATADIR%%/tfserving.pdf
%%DATADIR%%/version.txt