diff options
245 files changed, 29187 insertions, 6990 deletions
diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index c82cb0f31335..3a6a5e2b13e4 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -190,6 +190,12 @@ OLD_FILES+=usr/lib/clang/7.0.1/lib/freebsd/libclang_rt.ubsan_standalone_cxx-x86_ OLD_DIRS+=usr/lib/clang/7.0.1/lib/freebsd OLD_DIRS+=usr/lib/clang/7.0.1/lib OLD_DIRS+=usr/lib/clang/7.0.1 +# 20190131: pfil(9) changed +OLD_FILES+=usr/share/man/man9/pfil_hook_get.9 +OLD_FILES+=usr/share/man/man9/pfil_rlock.9 +OLD_FILES+=usr/share/man/man9/pfil_runlock.9 +OLD_FILES+=usr/share/man/man9/pfil_wlock.9 +OLD_FILES+=usr/share/man/man9/pfil_wunlock.9 # 20190126: adv(4) / adw(4) removal OLD_FILES+=usr/share/man/man4/adv.4.gz OLD_FILES+=usr/share/man/man4/adw.4.gz @@ -37,6 +37,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: prerequisites and upgrading, if you are not already using clang 3.5.0 or higher. +20190131: + Iflib is no longer unconditionally compiled into the kernel. Drivers + using iflib and statically compiled into the kernel, now require + the 'device iflib' config option. For the same drivers loaded as + modules on kernels not having 'device iflib', the iflib.ko module + is loaded automatically. + 20181230: r342635 changes the way efibootmgr(8) works by requiring users to add the -b (bootnum) parameter for commands where the bootnum was previously diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 5c202ca87b3f..a53f65f6af77 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -220,6 +220,11 @@ struct mips_option { const char *desc; }; +struct flag_desc { + uint64_t flag; + const char *desc; +}; + static void add_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t); static const char *aeabi_adv_simd_arch(uint64_t simd); @@ -293,6 +298,7 @@ static void dump_dwarf_ranges_foreach(struct readelf *re, Dwarf_Die die, static void dump_dwarf_str(struct readelf *re); static void dump_eflags(struct readelf *re, uint64_t e_flags); static void dump_elf(struct readelf *re); +static void dump_flags(struct flag_desc *fd, uint64_t flags); static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab); static void dump_dynamic(struct readelf *re); static void dump_liblist(struct readelf *re); @@ -307,6 +313,8 @@ static void dump_mips_specific_info(struct readelf *re); static void dump_notes(struct readelf *re); static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off); +static void dump_notes_data(const char *name, uint32_t type, const char *buf, + size_t sz); static void dump_svr4_hash(struct section *s); static void dump_svr4_hash64(struct readelf *re, struct section *s); static void dump_gnu_hash(struct readelf *re, struct section *s); @@ -2721,6 +2729,58 @@ dump_arch_dyn_val(struct readelf *re, GElf_Dyn *dyn) } static void +dump_flags(struct flag_desc *desc, uint64_t val) +{ + struct flag_desc *fd; + + for (fd = desc; fd->flag != 0; fd++) { + if (val & fd->flag) { + val &= ~fd->flag; + printf(" %s", fd->desc); + } + } + if (val != 0) + printf(" unknown (0x%jx)", (uintmax_t)val); +} + +static struct flag_desc dt_flags[] = { + { DF_ORIGIN, "ORIGIN" }, + { DF_SYMBOLIC, "SYMBOLIC" }, + { DF_TEXTREL, "TEXTREL" }, + { DF_BIND_NOW, "BIND_NOW" }, + { DF_STATIC_TLS, "STATIC_TLS" }, + { 0, NULL } +}; + +static struct flag_desc dt_flags_1[] = { + { DF_1_BIND_NOW, "NOW" }, + { DF_1_GLOBAL, "GLOBAL" }, + { 0x4, "GROUP" }, + { DF_1_NODELETE, "NODELETE" }, + { DF_1_LOADFLTR, "LOADFLTR" }, + { 0x20, "INITFIRST" }, + { DF_1_NOOPEN, "NOOPEN" }, + { DF_1_ORIGIN, "ORIGIN" }, + { 0x100, "DIRECT" }, + { DF_1_INTERPOSE, "INTERPOSE" }, + { DF_1_NODEFLIB, "NODEFLIB" }, + { 0x1000, "NODUMP" }, + { 0x2000, "CONFALT" }, + { 0x4000, "ENDFILTEE" }, + { 0x8000, "DISPRELDNE" }, + { 0x10000, "DISPRELPND" }, + { 0x20000, "NODIRECT" }, + { 0x40000, "IGNMULDEF" }, + { 0x80000, "NOKSYMS" }, + { 0x100000, "NOHDR" }, + { 0x200000, "EDITED" }, + { 0x400000, "NORELOC" }, + { 0x800000, "SYMINTPOSE" }, + { 0x1000000, "GLOBAUDIT" }, + { 0, NULL } +}; + +static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) { const char *name; @@ -2804,6 +2864,12 @@ dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) case DT_GNU_PRELINKED: printf(" %s\n", timestamp(dyn->d_un.d_val)); break; + case DT_FLAGS: + dump_flags(dt_flags, dyn->d_un.d_val); + break; + case DT_FLAGS_1: + dump_flags(dt_flags_1, dyn->d_un.d_val); + break; default: printf("\n"); } @@ -3422,6 +3488,53 @@ dump_notes(struct readelf *re) } } +static struct flag_desc note_feature_ctl_flags[] = { + { NT_FREEBSD_FCTL_ASLR_DISABLE, "ASLR_DISABLE" }, + { 0, NULL } +}; + +static void +dump_notes_data(const char *name, uint32_t type, const char *buf, size_t sz) +{ + size_t i; + const uint32_t *ubuf; + + /* Note data is at least 4-byte aligned. */ + if (((uintptr_t)buf & 3) != 0) { + warnx("bad note data alignment"); + goto unknown; + } + ubuf = (const uint32_t *)(const void *)buf; + + if (strcmp(name, "FreeBSD") == 0) { + switch (type) { + case NT_FREEBSD_ABI_TAG: + if (sz != 4) + goto unknown; + printf(" ABI tag: %u\n", ubuf[0]); + return; + /* NT_FREEBSD_NOINIT_TAG carries no data, treat as unknown. */ + case NT_FREEBSD_ARCH_TAG: + if (sz != 4) + goto unknown; + printf(" Arch tag: %x\n", ubuf[0]); + return; + case NT_FREEBSD_FEATURE_CTL: + if (sz != 4) + goto unknown; + printf(" Features:"); + dump_flags(note_feature_ctl_flags, ubuf[0]); + printf("\n"); + return; + } + } +unknown: + printf(" description data:"); + for (i = 0; i < sz; i++) + printf(" %02x", (unsigned char)buf[i]); + printf("\n"); +} + static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) { @@ -3438,7 +3551,9 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) return; } note = (Elf_Note *)(uintptr_t) buf; - name = (char *)(uintptr_t)(note + 1); + buf += sizeof(Elf_Note); + name = buf; + buf += roundup2(note->n_namesz, 4); /* * The name field is required to be nul-terminated, and * n_namesz includes the terminating nul in observed @@ -3456,8 +3571,8 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) printf(" %-13s %#010jx", name, (uintmax_t) note->n_descsz); printf(" %s\n", note_type(name, re->ehdr.e_type, note->n_type)); - buf += sizeof(Elf_Note) + roundup2(note->n_namesz, 4) + - roundup2(note->n_descsz, 4); + dump_notes_data(name, note->n_type, buf, note->n_descsz); + buf += roundup2(note->n_descsz, 4); } } diff --git a/contrib/ipfilter/arc4random.c b/contrib/ipfilter/arc4random.c index 499428702a81..bdb6b2d9ba9a 100644 --- a/contrib/ipfilter/arc4random.c +++ b/contrib/ipfilter/arc4random.c @@ -7,7 +7,7 @@ * * Dan Moschuk */ -#if !defined(SOLARIS2) && !defined(__osf__) +#if !defined(SOLARIS2) # include <sys/cdefs.h> #endif @@ -16,26 +16,16 @@ #ifdef __FreeBSD__ # include <sys/kernel.h> #endif -#if !defined(__osf__) # include <sys/random.h> -#endif #ifdef __FreeBSD__ # include <sys/libkern.h> #endif #include <sys/lock.h> -#ifndef __osf__ # include <sys/mutex.h> -#endif #include <sys/time.h> -#if defined(SOLARIS2) && (SOLARIS2 < 9) -# include <netinet/in_systm.h> -#endif #include <sys/socket.h> #include <net/if.h> -#ifdef __osf__ -# include <net/route.h> -#endif #include <netinet/in.h> #include <netinet/ip.h> #include "netinet/ip_compat.h" diff --git a/contrib/ipfilter/ip_dstlist.c b/contrib/ipfilter/ip_dstlist.c index ce2e72e8130f..99c7a22668df 100644 --- a/contrib/ipfilter/ip_dstlist.c +++ b/contrib/ipfilter/ip_dstlist.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include <sys/errno.h> #include <sys/types.h> #include <sys/param.h> @@ -21,9 +18,6 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #else @@ -33,14 +27,12 @@ struct file; # endif #endif #include <sys/time.h> -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> -#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__)) +#if defined(_KERNEL) && !defined(__SVR4) # include <sys/mbuf.h> #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -49,7 +41,7 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif diff --git a/contrib/ipfilter/ip_fil.c b/contrib/ipfilter/ip_fil.c index 32cba4cdd372..794d7e205bb3 100644 --- a/contrib/ipfilter/ip_fil.c +++ b/contrib/ipfilter/ip_fil.c @@ -25,24 +25,10 @@ struct rtentry; static void ipf_setifpaddr __P((struct ifnet *, char *)); void init_ifp __P((void)); -#if defined(__sgi) && (IRIX < 60500) -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -#else -# if TRU64 >= 1885 -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *, char *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *, char *)); -# else static int no_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); static int write_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); -# endif -#endif struct ifaddr { struct sockaddr_storage ifa_addr; @@ -123,17 +109,8 @@ ipf_forgetifp(softc, ifp) static int -#if defined(__sgi) && (IRIX < 60500) -no_output(ifp, m, s) -#else -# if TRU64 >= 1885 -no_output (ifp, m, s, rt, cp) - char *cp; -# else no_output(ifp, m, s, rt) -# endif struct rtentry *rt; -#endif struct ifnet *ifp; struct mbuf *m; struct sockaddr *s; @@ -143,17 +120,8 @@ no_output(ifp, m, s, rt) static int -#if defined(__sgi) && (IRIX < 60500) -write_output(ifp, m, s) -#else -# if TRU64 >= 1885 -write_output (ifp, m, s, rt, cp) - char *cp; -# else write_output(ifp, m, s, rt) -# endif struct rtentry *rt; -#endif struct ifnet *ifp; struct mbuf *m; struct sockaddr *s; @@ -167,8 +135,7 @@ write_output(ifp, m, s, rt) ip = MTOD(mb, ip_t *); #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) sprintf(fname, "/tmp/%s", ifp->if_xname); #else sprintf(fname, "/tmp/%s%d", ifp->if_name, ifp->if_unit); @@ -189,42 +156,26 @@ ipf_setifpaddr(ifp, addr) struct ifnet *ifp; char *addr; { -#ifdef __sgi - struct in_ifaddr *ifa; -#else struct ifaddr *ifa; -#endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) if (ifp->if_addrlist.tqh_first != NULL) #else -# ifdef __sgi - if (ifp->in_ifaddr != NULL) -# else if (ifp->if_addrlist != NULL) -# endif #endif return; ifa = (struct ifaddr *)malloc(sizeof(*ifa)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) ifp->if_addrlist.tqh_first = ifa; #else -# ifdef __sgi - ifp->in_ifaddr = ifa; -# else ifp->if_addrlist = ifa; -# endif #endif if (ifa != NULL) { struct sockaddr_in *sin; -#ifdef __sgi - sin = (struct sockaddr_in *)&ifa->ia_addr; -#else sin = (struct sockaddr_in *)&ifa->ifa_addr; -#endif #ifdef USE_INET6 if (index(addr, ':') != NULL) { struct sockaddr_in6 *sin6; @@ -263,8 +214,7 @@ get_unit(name, family) struct ifnet *ifp, **ifpp, **old_ifneta; char *addr; #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) if (!*name) return NULL; @@ -333,12 +283,11 @@ get_unit(name, family) } ifp = ifneta[nifs - 1]; -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) TAILQ_INIT(&ifp->if_addrlist); #endif #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) (void) strncpy(ifp->if_xname, name, sizeof(ifp->if_xname)); #else s = name + strlen(name) - 1; @@ -375,8 +324,7 @@ get_ifname(ifp) { static char ifname[LIFNAMSIZ]; -#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) +#if defined(__NetBSD__) || defined(__FreeBSD__) sprintf(ifname, "%s", ifp->if_xname); #else if (ifp->if_unit != -1) @@ -397,8 +345,7 @@ init_ifp() int fd; #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) for (ifpp = ifneta; ifpp && (ifp = *ifpp); ifpp++) { ifp->if_output = (void *)write_output; sprintf(fname, "/tmp/%s", ifp->if_xname); @@ -717,20 +664,12 @@ ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) i6addr_t *inp, *inpmask; { struct ifnet *ifp = ifptr; -#ifdef __sgi - struct in_ifaddr *ifa; -#else struct ifaddr *ifa; -#endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) ifa = ifp->if_addrlist.tqh_first; #else -# ifdef __sgi - ifa = (struct in_ifaddr *)ifp->in_ifaddr; -# else ifa = ifp->if_addrlist; -# endif #endif if (ifa != NULL) { if (v == 4) { @@ -738,11 +677,7 @@ ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) mask.sin_addr.s_addr = 0xffffffff; -#ifdef __sgi - sin = (struct sockaddr_in *)&ifa->ia_addr; -#else sin = (struct sockaddr_in *)&ifa->ifa_addr; -#endif return ipf_ifpfillv4addr(atype, sin, &mask, &inp->in4, &inpmask->in4); diff --git a/contrib/ipfilter/ip_fil_compat.c b/contrib/ipfilter/ip_fil_compat.c index d0b356f76904..271c2e065738 100644 --- a/contrib/ipfilter/ip_fil_compat.c +++ b/contrib/ipfilter/ip_fil_compat.c @@ -9,15 +9,12 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include <sys/param.h> #include <sys/errno.h> #include <sys/types.h> #include <sys/time.h> #include <sys/file.h> -#if __FreeBSD_version >= 220000 && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include <sys/fcntl.h> # include <sys/filio.h> #else @@ -26,17 +23,10 @@ #if !defined(_KERNEL) # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif #include <sys/socket.h> -#if (defined(__osf__) || defined(AIX) || defined(__hpux) || defined(__sgi)) && defined(_KERNEL) -# include "radix_ipf_local.h" -# define _RADIX_H_ -#endif #include <net/if.h> #if defined(__FreeBSD__) # include <sys/cdefs.h> @@ -44,7 +34,7 @@ struct file; #endif #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif @@ -348,9 +338,6 @@ typedef struct fr_info_4_1_32 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_32_t; typedef struct fr_info_4_1_24 { @@ -389,9 +376,6 @@ typedef struct fr_info_4_1_24 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_24_t; typedef struct fr_info_4_1_23 { @@ -429,9 +413,6 @@ typedef struct fr_info_4_1_23 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_23_t; typedef struct fr_info_4_1_11 { @@ -468,9 +449,6 @@ typedef struct fr_info_4_1_11 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_11_t; /* ------------------------------------------------------------------------ */ @@ -2678,9 +2656,6 @@ fr_info_4_1_32_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = old->fin_hbuf; -#endif } @@ -2719,9 +2694,6 @@ fr_info_4_1_24_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = old->fin_hbuf; -#endif } @@ -2759,9 +2731,6 @@ fr_info_4_1_23_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = fin->fin_hbuf; -#endif } @@ -2799,9 +2768,6 @@ fr_info_4_1_11_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4078,9 +4044,6 @@ fr_info_current_to_4_1_24(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4121,9 +4084,6 @@ fr_info_current_to_4_1_23(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4164,9 +4124,6 @@ fr_info_current_to_4_1_11(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } diff --git a/contrib/ipfilter/ipf.h b/contrib/ipfilter/ipf.h index 695325a2788f..f5617334a0ba 100644 --- a/contrib/ipfilter/ipf.h +++ b/contrib/ipfilter/ipf.h @@ -12,11 +12,6 @@ #ifndef __IPF_H__ #define __IPF_H__ -#if defined(__osf__) -# define radix_mask ipf_radix_mask -# define radix_node ipf_radix_node -# define radix_node_head ipf_radix_node_head -#endif #include <sys/param.h> #include <sys/types.h> @@ -31,9 +26,6 @@ # define _KERNEL # define KERNEL #endif -#ifdef __OpenBSD__ -struct file; -#endif #include <sys/uio.h> #ifdef ADD_KERNEL # undef _KERNEL @@ -188,9 +180,8 @@ typedef struct proxyrule { } proxyrule_t; -#if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - SOLARIS || defined(__sgi) || defined(__osf__) || defined(linux) +#if defined(__NetBSD__) || defined(__FreeBSD_version) || \ + SOLARIS # include <stdarg.h> typedef int (* ioctlfunc_t) __P((int, ioctlcmd_t, ...)); #else @@ -200,13 +191,6 @@ typedef int (* addfunc_t) __P((int, ioctlfunc_t, void *)); typedef int (* copyfunc_t) __P((void *, void *, size_t)); -/* - * SunOS4 - */ -#if defined(sun) && !defined(__SVR4) && !defined(__svr4__) -extern int ioctl __P((int, int, void *)); -#endif - extern char thishost[]; extern char flagset[]; extern u_char flags[]; diff --git a/contrib/ipfilter/ipsend/.OLD/ip_compat.h b/contrib/ipfilter/ipsend/.OLD/ip_compat.h deleted file mode 100644 index b5b8f0741c25..000000000000 --- a/contrib/ipfilter/ipsend/.OLD/ip_compat.h +++ /dev/null @@ -1,244 +0,0 @@ -/* $FreeBSD$ */ - -/* - * (C)opyright 1995 by Darren Reed. - * - * This code may be freely distributed as long as it retains this notice - * and is not changed in any way. The author accepts no responsibility - * for the use of this software. I hate legaleese, don't you ? - * - * @(#)ip_compat.h 1.2 12/7/95 - */ - -/* - * These #ifdef's are here mainly for linux, but who knows, they may - * not be in other places or maybe one day linux will grow up and some - * of these will turn up there too. - */ -#ifndef ICMP_UNREACH -# define ICMP_UNREACH ICMP_DEST_UNREACH -#endif -#ifndef ICMP_SOURCEQUENCH -# define ICMP_SOURCEQUENCH ICMP_SOURCE_QUENCH -#endif -#ifndef ICMP_TIMXCEED -# define ICMP_TIMXCEED ICMP_TIME_EXCEEDED -#endif -#ifndef ICMP_PARAMPROB -# define ICMP_PARAMPROB ICMP_PARAMETERPROB -#endif -#ifndef IPVERSION -# define IPVERSION 4 -#endif -#ifndef IPOPT_MINOFF -# define IPOPT_MINOFF 4 -#endif -#ifndef IPOPT_COPIED -# define IPOPT_COPIED(x) ((x)&0x80) -#endif -#ifndef IPOPT_EOL -# define IPOPT_EOL 0 -#endif -#ifndef IPOPT_NOP -# define IPOPT_NOP 1 -#endif -#ifndef IP_MF -# define IP_MF ((u_short)0x2000) -#endif -#ifndef ETHERTYPE_IP -# define ETHERTYPE_IP ((u_short)0x0800) -#endif -#ifndef TH_FIN -# define TH_FIN 0x01 -#endif -#ifndef TH_SYN -# define TH_SYN 0x02 -#endif -#ifndef TH_RST -# define TH_RST 0x04 -#endif -#ifndef TH_PUSH -# define TH_PUSH 0x08 -#endif -#ifndef TH_ACK -# define TH_ACK 0x10 -#endif -#ifndef TH_URG -# define TH_URG 0x20 -#endif -#ifndef IPOPT_EOL -# define IPOPT_EOL 0 -#endif -#ifndef IPOPT_NOP -# define IPOPT_NOP 1 -#endif -#ifndef IPOPT_RR -# define IPOPT_RR 7 -#endif -#ifndef IPOPT_TS -# define IPOPT_TS 68 -#endif -#ifndef IPOPT_SECURITY -# define IPOPT_SECURITY 130 -#endif -#ifndef IPOPT_LSRR -# define IPOPT_LSRR 131 -#endif -#ifndef IPOPT_SATID -# define IPOPT_SATID 136 -#endif -#ifndef IPOPT_SSRR -# define IPOPT_SSRR 137 -#endif -#ifndef IPOPT_SECUR_UNCLASS -# define IPOPT_SECUR_UNCLASS ((u_short)0x0000) -#endif -#ifndef IPOPT_SECUR_CONFID -# define IPOPT_SECUR_CONFID ((u_short)0xf135) -#endif -#ifndef IPOPT_SECUR_EFTO -# define IPOPT_SECUR_EFTO ((u_short)0x789a) -#endif -#ifndef IPOPT_SECUR_MMMM -# define IPOPT_SECUR_MMMM ((u_short)0xbc4d) -#endif -#ifndef IPOPT_SECUR_RESTR -# define IPOPT_SECUR_RESTR ((u_short)0xaf13) -#endif -#ifndef IPOPT_SECUR_SECRET -# define IPOPT_SECUR_SECRET ((u_short)0xd788) -#endif -#ifndef IPOPT_SECUR_TOPSECRET -# define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5) -#endif - -#ifdef linux -# if LINUX < 0200 -# define icmp icmphdr -# define icmp_type type -# define icmp_code code -# endif - -/* - * From /usr/include/netinet/ip_var.h - * !%@#!$@# linux... - */ -struct ipovly { - caddr_t ih_next, ih_prev; /* for protocol sequence q's */ - u_char ih_x1; /* (unused) */ - u_char ih_pr; /* protocol */ - short ih_len; /* protocol length */ - struct in_addr ih_src; /* source internet address */ - struct in_addr ih_dst; /* destination internet address */ -}; - -typedef struct { - __u16 th_sport; - __u16 th_dport; - __u32 th_seq; - __u32 th_ack; -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(vax) - __u8 th_res:4; - __u8 th_off:4; -#else - __u8 th_off:4; - __u8 th_res:4; -#endif - __u8 th_flags; - __u16 th_win; - __u16 th_sum; - __u16 th_urp; -} tcphdr_t; - -typedef struct { - __u16 uh_sport; - __u16 uh_dport; - __s16 uh_ulen; - __u16 uh_sum; -} udphdr_t; - -typedef struct { -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(vax) - __u8 ip_hl:4; - __u8 ip_v:4; -# else - __u8 ip_hl:4; - __u8 ip_v:4; -# endif - __u8 ip_tos; - __u16 ip_len; - __u16 ip_id; - __u16 ip_off; - __u8 ip_ttl; - __u8 ip_p; - __u16 ip_sum; - struct in_addr ip_src; - struct in_addr ip_dst; -} ip_t; - -typedef struct { - __u8 ether_dhost[6]; - __u8 ether_shost[6]; - __u16 ether_type; -} ether_header_t; - -typedef struct icmp { - u_char icmp_type; /* type of message, see below */ - u_char icmp_code; /* type sub code */ - u_short icmp_cksum; /* ones complement cksum of struct */ - union { - u_char ih_pptr; /* ICMP_PARAMPROB */ - struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ - struct ih_idseq { - n_short icd_id; - n_short icd_seq; - } ih_idseq; - int ih_void; - } icmp_hun; -#define icmp_pptr icmp_hun.ih_pptr -#define icmp_gwaddr icmp_hun.ih_gwaddr -#define icmp_id icmp_hun.ih_idseq.icd_id -#define icmp_seq icmp_hun.ih_idseq.icd_seq -#define icmp_void icmp_hun.ih_void - union { - struct id_ts { - n_time its_otime; - n_time its_rtime; - n_time its_ttime; - } id_ts; - struct id_ip { - ip_t idi_ip; - /* options and then 64 bits of data */ - } id_ip; - u_long id_mask; - char id_data[1]; - } icmp_dun; -#define icmp_otime icmp_dun.id_ts.its_otime -#define icmp_rtime icmp_dun.id_ts.its_rtime -#define icmp_ttime icmp_dun.id_ts.its_ttime -#define icmp_ip icmp_dun.id_ip.idi_ip -#define icmp_mask icmp_dun.id_mask -#define icmp_data icmp_dun.id_data -} icmphdr_t; - -# define bcopy(a,b,c) memmove(b,a,c) -# define bcmp(a,b,c) memcmp(a,b,c) - -# define ifnet device - -#else - -typedef struct udphdr udphdr_t; -typedef struct tcphdr tcphdr_t; -typedef struct ip ip_t; -typedef struct ether_header ether_header_t; - -#endif - -#if defined(__SVR4) || defined(__svr4__) -# define bcopy(a,b,c) memmove(b,a,c) -# define bcmp(a,b,c) memcmp(a,b,c) -# define bzero(a,b) memset(a,0,b) -#endif diff --git a/contrib/ipfilter/ipsend/44arp.c b/contrib/ipfilter/ipsend/44arp.c index 9215959395ab..80521ad15084 100644 --- a/contrib/ipfilter/ipsend/44arp.c +++ b/contrib/ipfilter/ipsend/44arp.c @@ -10,9 +10,7 @@ #include <net/if.h> #include <net/if_dl.h> #include <net/if_types.h> -#ifndef __osf__ # include <net/route.h> -#endif #include <netinet/in.h> #include <netinet/if_ether.h> #include <arpa/inet.h> diff --git a/contrib/ipfilter/ipsend/arp.c b/contrib/ipfilter/ipsend/arp.c index 58a1523e5db5..05f255ea47d2 100644 --- a/contrib/ipfilter/ipsend/arp.c +++ b/contrib/ipfilter/ipsend/arp.c @@ -88,7 +88,6 @@ int arp(ip, ether) sin = (struct sockaddr_in *)&ar.arp_pa; sin->sin_family = AF_INET; bcopy(ip, (char *)&sin->sin_addr.s_addr, 4); -#ifndef hpux if ((hp = gethostbyaddr(ip, 4, AF_INET))) # if SOLARIS && (SOLARIS2 >= 10) if (!(ether_hostton(hp->h_name, (struct ether_addr *)ether))) @@ -96,7 +95,6 @@ int arp(ip, ether) if (!(ether_hostton(hp->h_name, ether))) # endif goto savearp; -#endif if (sfd == -1) if ((sfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) diff --git a/contrib/ipfilter/ipsend/dlcommon.c b/contrib/ipfilter/ipsend/dlcommon.c index 55bc9423ab15..8a8cbf6a6a94 100644 --- a/contrib/ipfilter/ipsend/dlcommon.c +++ b/contrib/ipfilter/ipsend/dlcommon.c @@ -20,11 +20,7 @@ typedef unsigned long ulong; #include <sys/types.h> #include <sys/stream.h> #include <sys/stropts.h> -#ifdef __osf__ -# include <sys/dlpihdr.h> -#else # include <sys/dlpi.h> -#endif #include <sys/signal.h> #include <stdio.h> #include <string.h> diff --git a/contrib/ipfilter/ipsend/ip.c b/contrib/ipfilter/ipsend/ip.c index 4f2eaed3a9b9..c1bb73f0b169 100644 --- a/contrib/ipfilter/ipsend/ip.c +++ b/contrib/ipfilter/ipsend/ip.c @@ -17,11 +17,9 @@ static const char rcsid[] = "@(#)$Id$"; #include <netinet/in.h> #include <netinet/ip.h> #include <sys/param.h> -#ifndef linux # include <net/route.h> # include <netinet/if_ether.h> # include <netinet/ip_var.h> -#endif #include <errno.h> #include <stdio.h> #include <stdlib.h> diff --git a/contrib/ipfilter/ipsend/ipresend.c b/contrib/ipfilter/ipsend/ipresend.c index 7520a0e5bf55..ea0b4211c101 100644 --- a/contrib/ipfilter/ipsend/ipresend.c +++ b/contrib/ipfilter/ipsend/ipresend.c @@ -18,9 +18,7 @@ static const char rcsid[] = "@(#)$Id$"; #include <arpa/inet.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#ifndef linux #include <netinet/ip_var.h> -#endif #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -37,9 +35,6 @@ extern struct ipread pcap, iphex, iptext; int opts = 0; #ifndef DEFAULT_DEVICE -# ifdef linux -char default_device[] = "eth0"; -# else # ifdef sun char default_device[] = "le0"; # else @@ -49,15 +44,10 @@ char default_device[] = "ln0"; # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else char default_device[] = "lan0"; -# endif # endif # endif # endif -# endif #else char default_device[] = DEFAULT_DEVICE; #endif diff --git a/contrib/ipfilter/ipsend/ipsend.c b/contrib/ipfilter/ipsend/ipsend.c index 3df5c071e2e3..95a1bb1e5c78 100644 --- a/contrib/ipfilter/ipsend/ipsend.c +++ b/contrib/ipfilter/ipsend/ipsend.c @@ -21,14 +21,10 @@ static const char rcsid[] = "@(#)$Id$"; #include <netdb.h> #include <string.h> #include <netinet/ip.h> -#ifndef linux # include <netinet/ip_var.h> -#endif #include "ipsend.h" #include "ipf.h" -#ifndef linux # include <netinet/udp_var.h> -#endif extern char *optarg; @@ -37,27 +33,15 @@ extern void iplang __P((FILE *)); char options[68]; int opts; -#ifdef linux -char default_device[] = "eth0"; -#else # ifdef ultrix char default_device[] = "ln0"; # else # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else -# ifdef __hpux -char default_device[] = "lan0"; -# else char default_device[] = "le0"; -# endif /* __hpux */ -# endif /* __sgi */ # endif /* __bsdi__ */ # endif /* ultrix */ -#endif /* linux */ static void usage __P((char *)); diff --git a/contrib/ipfilter/ipsend/ipsend.h b/contrib/ipfilter/ipsend/ipsend.h index 75a0496e7f83..f409e89c656e 100644 --- a/contrib/ipfilter/ipsend/ipsend.h +++ b/contrib/ipfilter/ipsend/ipsend.h @@ -26,9 +26,6 @@ #include <net/if.h> #include "ipf.h" -#ifdef linux -#include <linux/sockios.h> -#endif /* XXX: The following is needed by tcpip.h */ #include <netinet/ip_var.h> #include "netinet/tcpip.h" @@ -49,11 +46,7 @@ extern u_32_t buildopts __P((char *, char *, int)); extern int addipopt __P((char *, struct ipopt_names *, int, char *)); extern int initdevice __P((char *, int)); extern int sendip __P((int, char *, int)); -#ifdef linux -extern struct sock *find_tcp __P((int, struct tcpiphdr *)); -#else extern struct tcpcb *find_tcp __P((int, struct tcpiphdr *)); -#endif extern int ip_resend __P((char *, int, struct ipread *, struct in_addr, char *)); extern void ip_test1 __P((char *, int, ip_t *, struct in_addr, int)); diff --git a/contrib/ipfilter/ipsend/ipsopt.c b/contrib/ipfilter/ipsend/ipsopt.c index a2cc4d04aad1..7f9ab5e32d79 100644 --- a/contrib/ipfilter/ipsend/ipsopt.c +++ b/contrib/ipfilter/ipsend/ipsopt.c @@ -20,9 +20,7 @@ static const char rcsid[] = "@(#)$Id$"; #include <stdio.h> #include <string.h> #include <stdlib.h> -#ifndef linux #include <netinet/ip_var.h> -#endif #include <netinet/tcp.h> #include <arpa/inet.h> #include "ipsend.h" diff --git a/contrib/ipfilter/ipsend/iptest.c b/contrib/ipfilter/ipsend/iptest.c index c6cfb1c75a4a..bc93106c8b89 100644 --- a/contrib/ipfilter/ipsend/iptest.c +++ b/contrib/ipfilter/ipsend/iptest.c @@ -18,12 +18,7 @@ static const char rcsid[] = "@(#)$Id$"; #include <arpa/inet.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#ifndef linux #include <netinet/ip_var.h> -#endif -#ifdef linux -#include <linux/sockios.h> -#endif #include <stdio.h> #include <netdb.h> #include <unistd.h> @@ -36,9 +31,6 @@ extern char *optarg; extern int optind; char options[68]; -#ifdef linux -char default_device[] = "eth0"; -#else # ifdef sun char default_device[] = "le0"; # else @@ -48,15 +40,10 @@ char default_device[] = "ln0"; # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else char default_device[] = "lan0"; -# endif # endif # endif # endif -#endif static void usage __P((char *)); int main __P((int, char **)); diff --git a/contrib/ipfilter/ipsend/iptests.c b/contrib/ipfilter/ipsend/iptests.c index 0ca02db0b04d..af8772cc2097 100644 --- a/contrib/ipfilter/ipsend/iptests.c +++ b/contrib/ipfilter/ipsend/iptests.c @@ -21,7 +21,6 @@ static const char rcsid[] = "@(#)$Id$"; typedef int boolean_t; #endif #include <sys/time.h> -#if !defined(__osf__) # ifdef __NetBSD__ # include <machine/lock.h> # include <machine/mutex.h> @@ -37,7 +36,6 @@ typedef int boolean_t; # endif # undef _KERNEL # undef KERNEL -#endif #if !defined(solaris) && !defined(linux) && !defined(__sgi) # include <nlist.h> # include <sys/user.h> @@ -66,24 +64,13 @@ typedef int boolean_t; #endif #include <netinet/in_systm.h> #include <sys/socket.h> -#ifdef __hpux -# define _NET_ROUTE_INCLUDED -#endif #include <net/if.h> -#if defined(linux) && (LINUX >= 0200) -# include <asm/atomic.h> -#endif -#if !defined(linux) # if defined(__FreeBSD__) # include "radix_ipf.h" # endif # if !defined(solaris) # include <net/route.h> # endif -#else -# define __KERNEL__ /* because there's a macro not wrapped by this */ -# include <net/route.h> /* in this file :-/ */ -#endif #include <netinet/in.h> #include <arpa/inet.h> #include <netinet/ip.h> @@ -94,20 +81,13 @@ typedef int boolean_t; #include <unistd.h> #include <stdlib.h> #include <string.h> -#ifdef __hpux -# undef _NET_ROUTE_INCLUDED -#endif -#if !defined(linux) # include <netinet/ip_var.h> # if !defined(__hpux) && !defined(solaris) # include <netinet/in_pcb.h> # endif -#endif #include "ipsend.h" -#if !defined(linux) && !defined(__hpux) # include <netinet/tcp_timer.h> # include <netinet/tcp_var.h> -#endif #if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000000) # define USE_NANOSLEEP #endif @@ -951,9 +931,7 @@ void ip_test5(dev, mtu, ip, gwip, ptest) int nfd, i; t = (tcphdr_t *)((char *)ip + (IP_HL(ip) << 2)); -#if !defined(linux) && !defined(__osf__) t->th_x2 = 0; -#endif TCP_OFF_A(t, 0); t->th_sport = htons(1); t->th_dport = htons(1); diff --git a/contrib/ipfilter/ipsend/resend.c b/contrib/ipfilter/ipsend/resend.c index 8fd289ed562e..3401673ff3a1 100644 --- a/contrib/ipfilter/ipsend/resend.c +++ b/contrib/ipfilter/ipsend/resend.c @@ -19,10 +19,8 @@ static const char rcsid[] = "@(#)$Id$"; #include <arpa/inet.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#ifndef linux # include <netinet/ip_var.h> # include <netinet/if_ether.h> -#endif #include <stdio.h> #include <netdb.h> #include <string.h> diff --git a/contrib/ipfilter/ipsend/sdlpi.c b/contrib/ipfilter/ipsend/sdlpi.c index 1aee2e4108c6..cd540337b2fa 100644 --- a/contrib/ipfilter/ipsend/sdlpi.c +++ b/contrib/ipfilter/ipsend/sdlpi.c @@ -25,14 +25,7 @@ # include <sys/pfmod.h> # include <sys/bufmod.h> #endif -#ifdef __osf__ -# include <sys/dlpihdr.h> -#else # include <sys/dlpi.h> -#endif -#ifdef __hpux -# include <sys/dlpi_ext.h> -#endif #include <net/if.h> #include <netinet/in.h> diff --git a/contrib/ipfilter/ipsend/sock.c b/contrib/ipfilter/ipsend/sock.c index d9361dcd44e0..d7eae8a13196 100644 --- a/contrib/ipfilter/ipsend/sock.c +++ b/contrib/ipfilter/ipsend/sock.c @@ -29,7 +29,6 @@ typedef int boolean_t; #else # include <sys/dir.h> #endif -#if !defined(__osf__) # ifdef __NetBSD__ # include <machine/lock.h> # endif @@ -50,7 +49,6 @@ typedef int boolean_t; # undef _KERNEL # undef KERNEL # endif -#endif #include <nlist.h> #include <sys/user.h> #include <sys/socket.h> @@ -74,9 +72,7 @@ typedef int boolean_t; #include <netinet/ip.h> #include <netinet/tcp.h> #include <net/if.h> -#ifndef __osf__ # include <net/route.h> -#endif #include <netinet/ip_var.h> #define _WANT_INPCB #include <netinet/in_pcb.h> diff --git a/contrib/ipfilter/lib/getifname.c b/contrib/ipfilter/lib/getifname.c index 88cad329f1e3..dfba83b83c99 100644 --- a/contrib/ipfilter/lib/getifname.c +++ b/contrib/ipfilter/lib/getifname.c @@ -25,9 +25,6 @@ char *getifname(ptr) # include <sys/mutex.h> # include <sys/condvar.h> # endif -# ifdef __hpux -# include "compat.h" -# endif # include "../pfil/qif.h" char *ifname; qif_t qif; diff --git a/contrib/ipfilter/lib/getproto.c b/contrib/ipfilter/lib/getproto.c index 6c52cd3b7677..f57fe06358fb 100644 --- a/contrib/ipfilter/lib/getproto.c +++ b/contrib/ipfilter/lib/getproto.c @@ -23,14 +23,6 @@ int getproto(name) if (*s == '\0') return atoi(name); -#ifdef _AIX51 - /* - * For some bogus reason, "ip" is 252 in /etc/protocols on AIX 5 - * The IANA has doubled up on the definition of 0 - it is now also - * used for IPv6 hop-opts, so we can no longer rely on /etc/protocols - * providing the correct name->number mapping - */ -#endif if (!strcasecmp(name, "ip")) return 0; diff --git a/contrib/ipfilter/lib/inet_addr.c b/contrib/ipfilter/lib/inet_addr.c index c7ae44375a5e..8667c2b33038 100644 --- a/contrib/ipfilter/lib/inet_addr.c +++ b/contrib/ipfilter/lib/inet_addr.c @@ -72,7 +72,6 @@ static const char rcsid[] = "@(#)$Id: inet_addr.c,v 1.8.2.3 2004/12/09 19:41:20 # define __P(x) () # endif #endif -#ifndef linux int inet_aton __P((const char *, struct in_addr *)); /* @@ -189,7 +188,6 @@ inet_aton(cp, addr) addr->s_addr = htonl(val); return (1); } -#endif /* these are compatibility routines, not needed on recent BSD releases */ diff --git a/contrib/ipfilter/lib/kmem.c b/contrib/ipfilter/lib/kmem.c index d895bafd0540..de97512cf5d3 100644 --- a/contrib/ipfilter/lib/kmem.c +++ b/contrib/ipfilter/lib/kmem.c @@ -18,9 +18,7 @@ #include <string.h> #include <fcntl.h> #include <sys/file.h> -#if !defined(__sgi) && !defined(__hpux) && !defined(__osf__) && !defined(linux) && !defined(_AIX51) #include <kvm.h> -#endif #include <fcntl.h> #include <sys/socket.h> #include <sys/ioctl.h> @@ -29,9 +27,6 @@ #include <netinet/in_systm.h> #include <netinet/ip.h> #include <net/if.h> -#if defined(linux) || defined(__osf__) || defined(__sgi) || defined(__hpux) -# include <stdlib.h> -#endif #include "kmem.h" @@ -46,82 +41,8 @@ static const char rcsid[] = "@(#)$Id$"; -#if !defined(__sgi) && !defined(__hpux) && !defined(__osf__) && \ - !defined(linux) && !defined(_AIX51) -/* - * For all platforms where there is a libkvm and a kvm_t, we use that... - */ static kvm_t *kvm_f = NULL; -#else -/* - *...and for the others (HP-UX, IRIX, Tru64), we have to provide our own. - */ - -typedef int * kvm_t; - -static kvm_t kvm_f = NULL; -static char *kvm_errstr = NULL; - -kvm_t kvm_open __P((char *, char *, char *, int, char *)); -int kvm_read __P((kvm_t, u_long, char *, size_t)); - -kvm_t kvm_open(kernel, core, swap, mode, errstr) - char *kernel, *core, *swap; - int mode; - char *errstr; -{ - kvm_t k; - int fd; - - kvm_errstr = errstr; - - if (core == NULL) - core = "/dev/kmem"; - - fd = open(core, mode); - if (fd == -1) - return NULL; - k = malloc(sizeof(*k)); - if (k == NULL) - return NULL; - *k = fd; - return k; -} - -int kvm_read(kvm, pos, buffer, size) - kvm_t kvm; - u_long pos; - char *buffer; - size_t size; -{ - int r = 0, left; - char *bufp; - - if (lseek(*kvm, pos, 0) == -1) { - if (kvm_errstr != NULL) { - fprintf(stderr, "%s", kvm_errstr); - perror("lseek"); - } - return -1; - } - - for (bufp = buffer, left = size; left > 0; bufp += r, left -= r) { - r = read(*kvm, bufp, left); -#ifdef __osf__ - /* - * Tru64 returns "0" for successful operation, not the number - * of bytes read. - */ - if (r == 0) - r = left; -#endif - if (r <= 0) - return -1; - } - return r; -} -#endif /* !defined(__sgi) && !defined(__hpux) && !defined(__osf__) */ int openkmem(kern, core) char *kern, *core; diff --git a/contrib/ipfilter/lib/printproto.c b/contrib/ipfilter/lib/printproto.c index d411bfa00421..879da12d7857 100644 --- a/contrib/ipfilter/lib/printproto.c +++ b/contrib/ipfilter/lib/printproto.c @@ -27,14 +27,6 @@ printproto(pr, p, np) PRINTF("udp"); else if (np->in_flags & IPN_ICMPQUERY) PRINTF("icmp"); -#ifdef _AIX51 - /* - * To make up for "ip = 252" and "hopopt = 0" in /etc/protocols - * The IANA has doubled up on the definition of 0 - it is now - * also used for IPv6 hop-opts, so we can no longer rely on - * /etc/protocols providing the correct name->number mapping. - */ -#endif else if (np->in_pr[0] == 0) PRINTF("ip"); else if (pr != NULL) @@ -42,11 +34,6 @@ printproto(pr, p, np) else PRINTF("%d", np->in_pr[0]); } else { -#ifdef _AIX51 - if (p == 0) - PRINTF("ip"); - else -#endif if (pr != NULL) PRINTF("%s", pr->p_name); else diff --git a/contrib/ipfilter/md5.c b/contrib/ipfilter/md5.c index 35756cdde7cc..6ac639935902 100644 --- a/contrib/ipfilter/md5.c +++ b/contrib/ipfilter/md5.c @@ -35,16 +35,11 @@ *********************************************************************** */ -#if defined(linux) && defined(_KERNEL) -extern void *memcpy(void *, const void *, unsigned long); -# define bcopy(a,b,c) memcpy(b,a,c) -#else -# if defined(_KERNEL) && !defined(__sgi) +# if defined(_KERNEL) # include <sys/systm.h> # else # include <string.h> # endif -#endif #include "md5.h" diff --git a/contrib/ipfilter/ml_ipl.c b/contrib/ipfilter/ml_ipl.c deleted file mode 100644 index aaf61a419c06..000000000000 --- a/contrib/ipfilter/ml_ipl.c +++ /dev/null @@ -1,164 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <sys/syslog.h> -#include <sys/buf.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/uio.h> -#include <sys/vnode.h> -#include <sundev/mbvar.h> -#include <sun/autoconf.h> -#include <sun/vddrv.h> -#if defined(sun4c) || defined(sun4m) -#include <sun/openprom.h> -#endif - -#ifndef IPL_NAME -#define IPL_NAME "/dev/ipf" -#endif - -extern int ipfattach(), ipfopen(), ipfclose(), ipfioctl(), ipfread(); -extern int nulldev(), ipfidentify(), errno; - -struct cdevsw ipfdevsw = -{ - ipfopen, ipfclose, ipfread, nulldev, - ipfioctl, nulldev, nulldev, nulldev, - 0, nulldev, -}; - - -struct dev_ops ipf_ops = -{ - 1, - ipfidentify, - ipfattach, - ipfopen, - ipfclose, - ipfread, - NULL, /* write */ - NULL, /* strategy */ - NULL, /* dump */ - 0, /* psize */ - ipfioctl, - NULL, /* reset */ - NULL /* mmap */ -}; - -int ipf_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, - "ipf", - &ipf_ops, - NULL, - &ipfdevsw, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, /* magic */ - "ipf", /* name */ -#ifdef sun4c - &ipf_ops, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - &ipfdevsw, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - -extern int vd_unuseddev(); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - -xxxinit(fc, vdp, vdi, vds) - u_int fc; - struct vddrv *vdp; - caddr_t vdi; - struct vdstat *vds; -{ - struct vdlinkage *v; - int i; - - switch (fc) - { - case VDLOAD: - while (ipf_major < nchrdev && - cdevsw[ipf_major].d_open != vd_unuseddev) - ipf_major++; - if (ipf_major == nchrdev) - return ENODEV; - vd.Drv_charmajor = ipf_major; - vdp->vdd_vdtab = (struct vdlinkage *)&vd; - return ipf_attach(vdi); - case VDUNLOAD: - return unload(vdp, vdi); - - case VDSTAT: - return 0; - - default: - return EIO; - } -} - -static unload(vdp, vdi) - struct vddrv *vdp; - struct vdioctl_unload *vdi; -{ - int i; - - (void) vn_remove(IPL_NAME, UIO_SYSSPACE, FILE); - return ipfdetach(); -} - - -static int ipf_attach(vdi) -struct vdioctl_load *vdi; -{ - struct vnode *vp; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600; - - (void) vn_remove(IPL_NAME, UIO_SYSSPACE, FILE); - vattr_null(&vattr); - vattr.va_type = MFTOVT(fmode); - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = ipf_major<<8; - - error = vn_create(IPL_NAME, UIO_SYSSPACE, &vattr, EXCL, 0, &vp); - if (error == 0) - VN_RELE(vp); - return ipfattach(0); -} diff --git a/contrib/ipfilter/mlf_ipl.c b/contrib/ipfilter/mlf_ipl.c deleted file mode 100644 index 93995af956f0..000000000000 --- a/contrib/ipfilter/mlf_ipl.c +++ /dev/null @@ -1,596 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include <sys/param.h> - -#ifdef IPFILTER_LKM -# ifndef __FreeBSD_cc_version -# include <osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <osreldate.h> -# endif -# endif -# define ACTUALLY_LKM_NOT_KERNEL -#else -# ifndef __FreeBSD_cc_version -# include <sys/osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <sys/osreldate.h> -# endif -# endif -#endif -#include <sys/systm.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -# ifndef ACTUALLY_LKM_NOT_KERNEL -# include "opt_devfs.h" -# endif -# include <sys/conf.h> -# include <sys/kernel.h> -# ifdef DEVFS -# include <sys/devfsext.h> -# endif /*DEVFS*/ -#endif -#include <sys/conf.h> -#include <sys/file.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include <sys/lock.h> -#endif -#include <sys/stat.h> -#include <sys/proc.h> -#include <sys/kernel.h> -#include <sys/vnode.h> -#include <sys/namei.h> -#include <sys/malloc.h> -#include <sys/mount.h> -#include <sys/exec.h> -#include <sys/mbuf.h> -#if BSD >= 199506 -# include <sys/sysctl.h> -#endif -#if (__FreeBSD_version >= 300000) -# include <sys/socket.h> -#endif -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <sys/sysent.h> -#include <sys/lkm.h> -#include "netinet/ipl.h" -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_state.h" -#include "netinet/ip_nat.h" -#include "netinet/ip_auth.h" -#include "netinet/ip_frag.h" - - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - -int xxxinit __P((struct lkm_table *, int, int)); - -#ifdef SYSCTL_OID -int sysctl_ipf_int SYSCTL_HANDLER_ARGS; -# define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_ipf_int, "I", descr); -# define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ -# define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) -SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipf_flags, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipf_pass, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipf_active, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipf_chksrc, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipf_minttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, - &ipf_tcpidletimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, - &ipf_tcphalfclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, - &ipf_tcpclosewait, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, - &ipf_tcplastack, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, - &ipf_tcptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, - &ipf_tcpclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, - &ipf_udptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, - &ipf_icmptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RWO, - &ipf_defnatage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, - &ipf_ipfrttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_running, CTLFLAG_RD, - &ipf_running, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statesize, CTLFLAG_RWO, - &ipf_statesize, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statemax, CTLFLAG_RWO, - &ipf_statemax, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RWO, - &ipf_authsize, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, - &ipf_authused, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, - &ipf_defaultauthage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ippr_ftp_pasvonly, CTLFLAG_RW, - &ippr_ftp_pasvonly, 0, ""); -#endif - -#ifdef DEVFS -static void *ipf_devfs[IPL_LOGSIZE]; -#endif - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) -int ipf_major = 0; - -static struct cdevsw ipfdevsw = -{ - ipfopen, /* open */ - ipfclose, /* close */ - ipfread, /* read */ - (void *)nullop, /* write */ - ipfioctl, /* ioctl */ - (void *)nullop, /* stop */ - (void *)nullop, /* reset */ - (void *)NULL, /* tty */ - (void *)nullop, /* select */ - (void *)nullop, /* mmap */ - NULL /* strategy */ -}; - -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipfdevsw); - -extern struct cdevsw cdevsw[]; -extern int vd_unuseddev __P((void)); -extern int nchrdev; -#else - -static struct cdevsw ipf_cdevsw = { - ipfopen, ipfclose, ipfread, nowrite, /* 79 */ - ipfioctl, nostop, noreset, nodevtotty, -#if (__FreeBSD_version >= 300000) - seltrue, nommap, nostrategy, "ipf", -#else - noselect, nommap, nostrategy, "ipf", -#endif - NULL, -1 -}; -#endif - -static void ipf_drvinit __P((void *)); - -#ifdef ACTUALLY_LKM_NOT_KERNEL -static int if_ipf_unload __P((struct lkm_table *, int)); -static int if_ipf_load __P((struct lkm_table *, int)); -static int if_ipf_remove __P((void)); -static int ipf_major = CDEV_MAJOR; - -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPL_NAT, IPL_STATE, IPL_AUTH, - IPL_SCAN, IPL_SYNC, IPL_POOL, NULL }; - -extern int lkmenodev __P((void)); - -static int ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) - int i = ipf_major; - struct lkm_dev *args = lkmtp->private.lkm_dev; -#endif - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ -#endif - printf("IP Filter: loaded into slot %d\n", ipf_major); - err = if_ipf_load(lkmtp, cmd); - if (!err) - ipf_drvinit((void *)NULL); - return err; - break; - case LKM_E_UNLOAD : - err = if_ipf_unload(lkmtp, cmd); - if (!err) { - printf("IP Filter: unloaded from slot %d\n", - ipf_major); -#ifdef DEVFS - if (ipf_devfs[IPL_LOGIPF]) - devfs_remove_dev(ipf_devfs[IPL_LOGIPF]); - if (ipf_devfs[IPL_LOGNAT]) - devfs_remove_dev(ipf_devfs[IPL_LOGNAT]); - if (ipf_devfs[IPL_LOGSTATE]) - devfs_remove_dev(ipf_devfs[IPL_LOGSTATE]); - if (ipf_devfs[IPL_LOGAUTH]) - devfs_remove_dev(ipf_devfs[IPL_LOGAUTH]); - if (ipf_devfs[IPL_LOGSCAN]) - devfs_remove_dev(ipf_devfs[IPL_LOGSCAN]); - if (ipf_devfs[IPL_LOGSYNC]) - devfs_remove_dev(ipf_devfs[IPL_LOGSYNC]); - if (ipf_devfs[IPL_LOGLOOKUP]) - devfs_remove_dev(ipf_devfs[IPL_LOGLOOKUP]); -#endif - } - return err; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return 0; -} - - -static int if_ipf_remove __P((void)) -{ - char *name; - struct nameidata nd; - int error, i; - - for (i = 0; (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - return (error); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#if (__FreeBSD_version >= 300000) - VOP_LOCK(nd.ni_vp, LK_RETRY | LK_EXCLUSIVE, curproc); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp != NULLVP) - vput(nd.ni_vp); -#else - VOP_LOCK(nd.ni_vp); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); -#endif - } - - return 0; -} - - -static int if_ipf_unload(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int error = 0; - - error = ipfdetach(); - if (!error) - error = if_ipf_remove(); - return error; -} - - -static int if_ipf_load(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - error = ipfattach(); - if (error) - return error; - (void) if_ipf_remove(); - - for (i = 0; (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - return error; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - return (EEXIST); - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); -#if (__FreeBSD_version >= 300000) - vput(nd.ni_dvp); -#endif - if (error) - return error; - } - return 0; -} - -#endif /* actually LKM */ - -#if defined(__FreeBSD_version) && (__FreeBSD_version < 220000) -/* - * strlen isn't present in 2.1.* kernels. - */ -size_t strlen(string) - char *string; -{ - register char *s; - - for (s = string; *s; s++) - ; - return (size_t)(s - string); -} - - -int xxxinit(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} -#else /* __FREEBSD_version >= 220000 */ -# ifdef IPFILTER_LKM -# include <sys/exec.h> - -# if (__FreeBSD_version >= 300000) -MOD_DEV(if_ipf, LM_DT_CHAR, CDEV_MAJOR, &ipf_cdevsw); -# else -MOD_DECL(if_ipf); - - -static struct lkm_dev _module = { - LM_DEV, - LKM_VERSION, - IPL_VERSION, - CDEV_MAJOR, - LM_DT_CHAR, - { (void *)&ipf_cdevsw } -}; -# endif - - -int if_ipf __P((struct lkm_table *, int, int)); - - -int if_ipf(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ -# if (__FreeBSD_version >= 300000) - MOD_DISPATCH(if_ipf, lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -# else - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -# endif -} -# endif /* IPFILTER_LKM */ -static ipf_devsw_installed = 0; - -static void ipf_drvinit __P((void *unused)) -{ - dev_t dev; -# ifdef DEVFS - void **tp = ipf_devfs; -# endif - - if (!ipf_devsw_installed ) { - dev = makedev(CDEV_MAJOR, 0); - cdevsw_add(&dev, &ipf_cdevsw, NULL); - ipf_devsw_installed = 1; - -# ifdef DEVFS - tp[IPL_LOGIPF] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGIPF, - DV_CHR, 0, 0, 0600, "ipf"); - tp[IPL_LOGNAT] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGNAT, - DV_CHR, 0, 0, 0600, "ipnat"); - tp[IPL_LOGSTATE] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGSTATE, - DV_CHR, 0, 0, 0600, - "ipstate"); - tp[IPL_LOGAUTH] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGAUTH, - DV_CHR, 0, 0, 0600, - "ipauth"); -# endif - } -} - - -#ifdef SYSCTL_IPF -int -sysctl_ipf_int SYSCTL_HANDLER_ARGS -{ - int error = 0; - - if (arg1) - error = SYSCTL_OUT(req, arg1, sizeof(int)); - else - error = SYSCTL_OUT(req, &arg2, sizeof(int)); - - if (error || !req->newptr) - return (error); - - if (!arg1) - error = EPERM; - else { - if ((oidp->oid_kind & CTLFLAG_OFF) && (ipf_running > 0)) - error = EBUSY; - else - error = SYSCTL_IN(req, arg1, sizeof(int)); - } - return (error); -} -#endif - - -# if defined(IPFILTER_LKM) || \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -SYSINIT(ipfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,ipf_drvinit,NULL) -# endif /* IPFILTER_LKM */ -#endif /* _FreeBSD_version */ - - -/* - * routines below for saving IP headers to buffer - */ -int ipfopen(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - - -int ipfclose(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -int ipfread(dev, uio, ioflag) - int ioflag; -#else -int ipfread(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - register struct uio *uio; -{ - u_int unit = GET_MINOR(dev); - - if (unit < 0) - return ENXIO; - - if (ipf_running < 1) - return EIO; - - if (unit == IPL_LOGSYNC) - return ipfsync_read(uio); - -#ifdef IPFILTER_LOG - return ipflog_read(unit, uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -int ipfwrite(dev, uio, ioflag) - int ioflag; -#else -int ipfwrite(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - register struct uio *uio; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlf_rule.c b/contrib/ipfilter/mlf_rule.c deleted file mode 100644 index babd2c64a93b..000000000000 --- a/contrib/ipfilter/mlf_rule.c +++ /dev/null @@ -1,168 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include <sys/param.h> - -#if defined(__FreeBSD__) && (__FreeBSD__ > 1) -# ifdef IPFILTER_LKM -# include <osreldate.h> -# define ACTUALLY_LKM_NOT_KERNEL -# else -# include <sys/osreldate.h> -# endif -#endif -#include <sys/systm.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -# include <sys/conf.h> -# include <sys/kernel.h> -# ifdef DEVFS -# include <sys/devfsext.h> -# endif /*DEVFS*/ -#endif -#include <sys/conf.h> -#include <sys/file.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include <sys/lock.h> -#endif -#include <sys/stat.h> -#include <sys/proc.h> -#include <sys/kernel.h> -#include <sys/vnode.h> -#include <sys/namei.h> -#include <sys/malloc.h> -#include <sys/mount.h> -#include <sys/exec.h> -#include <sys/mbuf.h> -#if BSD >= 199506 -# include <sys/sysctl.h> -#endif -#if (__FreeBSD_version >= 300000) -# include <sys/socket.h> -#endif -#if (__FreeBSD_version >= 199511) -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#endif -#if (__FreeBSD__ > 1) -# include <sys/sysent.h> -#endif -#include <sys/lkm.h> -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_rules.h" - - -int xxxinit __P((struct lkm_table *, int, int)); - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipldevsw); -#endif - -static int ipfrule_ioctl __P((struct lkm_table *, int)); - -#if defined(__FreeBSD_version) && (__FreeBSD_version < 220000) - -int xxxinit(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, ipfrule_ioctl); -} -#else /* __FREEBSD_version >= 220000 */ -# ifdef IPFILTER_LKM -# include <sys/exec.h> - -# if (__FreeBSD_version >= 300000) -MOD_MISC(ipfrule); -# else -MOD_DECL(ipfrule); - - -static struct lkm_misc _module = { - LM_MISC, - LKM_VERSION, - "IP Filter rules", - 0, -}; -# endif - - -int ipfrule __P((struct lkm_table *, int, int)); - - -int ipfrule(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ -# if (__FreeBSD_version >= 300000) - MOD_DISPATCH(ipfrule, lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, - ipfrule_ioctl); -# else - DISPATCH(lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, ipfrule_ioctl); -# endif -} -# endif /* IPFILTER_LKM */ - - -int ipfrule_load(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - return ipfrule_add(); -} - - -int ipfrule_unload(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - return ipfrule_remove(); -} - - -static int ipfrule_ioctl(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_load(lkmtp, cmd); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_unload(lkmtp, cmd); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* _FreeBSD_version */ diff --git a/contrib/ipfilter/mlfk_ipl.c b/contrib/ipfilter/mlfk_ipl.c deleted file mode 100644 index ba1f44f0c105..000000000000 --- a/contrib/ipfilter/mlfk_ipl.c +++ /dev/null @@ -1,529 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ - - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/conf.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/select.h> -#if __FreeBSD_version >= 500000 -# include <sys/selinfo.h> -#endif -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> - - -#include "netinet/ipl.h" -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_state.h" -#include "netinet/ip_nat.h" -#include "netinet/ip_auth.h" -#include "netinet/ip_frag.h" -#include "netinet/ip_sync.h" - -extern ipf_main_softc_t ipfmain; - -#if __FreeBSD_version >= 502116 -static struct cdev *ipf_devs[IPL_LOGSIZE]; -#else -static dev_t ipf_devs[IPL_LOGSIZE]; -#endif - -#if 0 -static int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ); -#endif -static int ipf_modload(void); -static int ipf_modunload(void); - -#if (__FreeBSD_version >= 500024) -# if (__FreeBSD_version >= 502116) -static int ipfopen __P((struct cdev*, int, int, struct thread *)); -static int ipfclose __P((struct cdev*, int, int, struct thread *)); -# else -static int ipfopen __P((dev_t, int, int, struct thread *)); -static int ipfclose __P((dev_t, int, int, struct thread *)); -# endif /* __FreeBSD_version >= 502116 */ -#else -static int ipfopen __P((dev_t, int, int, struct proc *)); -static int ipfclose __P((dev_t, int, int, struct proc *)); -#endif -#if (__FreeBSD_version >= 502116) -static int ipfread __P((struct cdev*, struct uio *, int)); -static int ipfwrite __P((struct cdev*, struct uio *, int)); -#else -static int ipfread __P((dev_t, struct uio *, int)); -static int ipfwrite __P((dev_t, struct uio *, int)); -#endif /* __FreeBSD_version >= 502116 */ - - - -SYSCTL_DECL(_net_inet); -#define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_ipf_int, "I", descr); -#define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ -#define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) -SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -#if 0 -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipf_flags, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipf_pass, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipf_active, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, - &ipf_tcpidletimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, - &ipf_tcphalfclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, - &ipf_tcpclosewait, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, - &ipf_tcplastack, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, - &ipf_tcptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, - &ipf_tcpclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, - &ipf_udptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RWO, - &ipf_udpacktimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, - &ipf_icmptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RWO, - &ipf_nat_defage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, - &ipf_ipfrttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_running, CTLFLAG_RD, - &ipf_running, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statesize, CTLFLAG_RWO, - &ipf_state_size, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statemax, CTLFLAG_RWO, - &ipf_state_max, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_nattable_sz, CTLFLAG_RWO, - &ipf_nat_table_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_natrules_sz, CTLFLAG_RWO, - &ipf_nat_maprules_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_rdrrules_sz, CTLFLAG_RWO, - &ipf_nat_rdrrules_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_hostmap_sz, CTLFLAG_RWO, - &ipf_nat_hostmap_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RWO, - &ipf_auth_size, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, - &ipf_auth_used, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, - &ipf_auth_defaultage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipf_chksrc, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipf_minttl, 0, ""); -#endif - -#define CDEV_MAJOR 79 -#include <sys/poll.h> -#if __FreeBSD_version >= 500043 -# include <sys/select.h> -static int ipfpoll(struct cdev *dev, int events, struct thread *td); - -static struct cdevsw ipf_cdevsw = { -#if __FreeBSD_version >= 502103 - .d_version = D_VERSION, - .d_flags = 0, /* D_NEEDGIANT - Should be SMP safe */ -#endif - .d_open = ipfopen, - .d_close = ipfclose, - .d_read = ipfread, - .d_write = ipfwrite, - .d_ioctl = ipfioctl, - .d_poll = ipfpoll, - .d_name = "ipf", -#if __FreeBSD_version < 600000 - .d_maj = CDEV_MAJOR, -#endif -}; -#else -static int ipfpoll(dev_t dev, int events, struct proc *td); - -static struct cdevsw ipf_cdevsw = { - /* open */ ipfopen, - /* close */ ipfclose, - /* read */ ipfread, - /* write */ ipfwrite, - /* ioctl */ ipfioctl, - /* poll */ ipfpoll, - /* mmap */ nommap, - /* strategy */ nostrategy, - /* name */ "ipf", - /* maj */ CDEV_MAJOR, - /* dump */ nodump, - /* psize */ nopsize, - /* flags */ 0, -# if (__FreeBSD_version < 500043) - /* bmaj */ -1, -# endif -# if (__FreeBSD_version >= 430000) - /* kqfilter */ NULL -# endif -}; -#endif - -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, - IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; - - -static int -ipfilter_modevent(module_t mod, int type, void *unused) -{ - int error = 0; - - switch (type) - { - case MOD_LOAD : - error = ipf_modload(); - break; - - case MOD_UNLOAD : - error = ipf_modunload(); - break; - default: - error = EINVAL; - break; - } - return error; -} - - -static int -ipf_modload() -{ - char *defpass, *c, *str; - int i, j, error; - - if (ipf_load_all() != 0) - return EIO; - - if (ipf_create_all(&ipfmain) == NULL) - return EIO; - - error = ipfattach(&ipfmain); - if (error) - return error; - - for (i = 0; i < IPL_LOGSIZE; i++) - ipf_devs[i] = NULL; - - for (i = 0; (str = ipf_devfiles[i]); i++) { - c = NULL; - for(j = strlen(str); j > 0; j--) - if (str[j] == '/') { - c = str + j + 1; - break; - } - if (!c) - c = str; - ipf_devs[i] = make_dev(&ipf_cdevsw, i, 0, 0, 0600, c); - } - - error = ipf_pfil_hook(); - if (error != 0) - return error; - ipf_event_reg(); - - if (FR_ISPASS(ipfmain.ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipfmain.ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - return 0; -} - - -static int -ipf_modunload() -{ - int error, i; - - if (ipfmain.ipf_refcnt) - return EBUSY; - - error = ipf_pfil_unhook(); - if (error != 0) - return error; - - if (ipfmain.ipf_running >= 0) { - error = ipfdetach(&ipfmain); - if (error != 0) - return error; - - ipf_destroy_all(&ipfmain); - ipf_unload_all(); - } else - error = 0; - - ipfmain.ipf_running = -2; - - for (i = 0; ipf_devfiles[i]; i++) { - if (ipf_devs[i] != NULL) - destroy_dev(ipf_devs[i]); - } - - printf("%s unloaded\n", ipfilter_version); - - return error; -} - - -static moduledata_t ipfiltermod = { - "ipfilter", - ipfilter_modevent, - 0 -}; - - -DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); -#ifdef MODULE_VERSION -MODULE_VERSION(ipfilter, 1); -#endif - - -#if 0 -#ifdef SYSCTL_IPF -int -sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ) -{ - int error = 0; - - if (arg1) - error = SYSCTL_OUT(req, arg1, sizeof(int)); - else - error = SYSCTL_OUT(req, &arg2, sizeof(int)); - - if (error || !req->newptr) - return (error); - - if (!arg1) - error = EPERM; - else { - if ((oidp->oid_kind & CTLFLAG_OFF) && (ipfmain.ipf_running > 0)) - error = EBUSY; - else - error = SYSCTL_IN(req, arg1, sizeof(int)); - } - return (error); -} -#endif -#endif - - -static int -#if __FreeBSD_version >= 500043 -ipfpoll(struct cdev *dev, int events, struct thread *td) -#else -ipfpoll(dev_t dev, int events, struct proc *td) -#endif -{ - u_int unit = GET_MINOR(dev); - int revents; - - if (unit < 0 || unit > IPL_LOGMAX) - return 0; - - revents = 0; - - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : -#ifdef IPFILTER_LOG - if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&ipfmain, unit)) - revents |= events & (POLLIN | POLLRDNORM); -#endif - break; - case IPL_LOGAUTH : - if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&ipfmain)) - revents |= events & (POLLIN | POLLRDNORM); - break; - case IPL_LOGSYNC : - if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&ipfmain)) - revents |= events & (POLLIN | POLLRDNORM); - if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&ipfmain)) - revents |= events & (POLLOUT | POLLWRNORM); - break; - case IPL_LOGSCAN : - case IPL_LOGLOOKUP : - default : - break; - } - - if ((revents == 0) && ((events & (POLLIN|POLLRDNORM)) != 0)) - selrecord(td, &ipfmain.ipf_selwait[unit]); - - return revents; -} - - -/* - * routines below for saving IP headers to buffer - */ -static int ipfopen(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < unit) - error = ENXIO; - else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -static int ipfclose(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -static int ipfread(dev, uio, ioflag) - int ioflag; -#else -static int ipfread(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - struct uio *uio; -{ - u_int unit = GET_MINOR(dev); - - if (unit < 0) - return ENXIO; - - if (ipfmain.ipf_running < 1) - return EIO; - - if (unit == IPL_LOGSYNC) - return ipf_sync_read(&ipfmain, uio); - -#ifdef IPFILTER_LOG - return ipf_log_read(&ipfmain, unit, uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -static int ipfwrite(dev, uio, ioflag) - int ioflag; -#else -static int ipfwrite(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - struct uio *uio; -{ - - if (ipfmain.ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipf_sync_write(&ipfmain, uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlh_rule.c b/contrib/ipfilter/mlh_rule.c deleted file mode 100644 index cc2a74c86264..000000000000 --- a/contrib/ipfilter/mlh_rule.c +++ /dev/null @@ -1,114 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ -/* #pragma ident "@(#)solaris.c 1.12 6/5/96 (C) 1995 Darren Reed"*/ - -/*typedef unsigned int spustate_t;*/ -struct uio; - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/kernel.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/conf.h> -#include <sys/callout.h> -#include <sys/moddefs.h> -#include <sys/io.h> -#include <sys/wsio.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/byteorder.h> -#include <sys/socket.h> -#include <sys/stropts.h> -#include <net/if.h> -#include <net/af.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/if_ether.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <netinet/tcpip.h> -#include <netinet/ip_icmp.h> - -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -/* - * Driver Header - */ -static drv_info_t ipf_drv_info = { - "IP Filter Rules", /* type */ - "pseudo", /* class */ - DRV_PSEUDO|DRV_SAVE_CONF|DRV_MP_SAFE, /* flags */ - -1, /* b_major */ - -1, /* c_major */ - NULL, /* cdio */ - NULL, /* gio_private */ - NULL, /* cdio_private */ -}; - - -extern struct mod_operations gio_mod_ops; -static drv_info_t ipf_drv_info; -extern struct mod_conf_data ipf_conf_data; - -static struct mod_type_data ipf_drv_link = { - IPL_VERSION, (void *)NULL -}; - -static struct modlink ipf_mod_link[] = { - { &gio_mod_ops, (void *)&ipf_drv_link }, - { NULL, (void *)NULL } -}; - -struct modwrapper ipf_wrapper = { - MODREV, - ipf_load, - ipf_unload, - (void (*)())NULL, - (void *)&ipf_conf_data, - ipf_mod_link -}; - - -static int ipf_load(void *arg) -{ - int i; - - i = ipfrule_add(); - if (!i) - ipf_refcnt--; -#ifdef IPFDEBUG - printf("IP Filter Rules: ipfrule_add() = %d\n", i); -#endif - if (!i) - cmn_err(CE_CONT, "IP Filter Rules: Loaded\n"); - return i; -} - - -static int ipf_unload(void *arg) -{ - int i; - - i = ipfrule_remove(); - if (!i) - ipf_refcnt--; -#ifdef IPFDEBUG - printf("IP Filter Rules: ipfrule_remove() = %d\n", i); -#endif - if (!i) - cmn_err(CE_CONT, "IP Filter Rules: Unloaded\n"); - return i; -} diff --git a/contrib/ipfilter/mln_ipl.c b/contrib/ipfilter/mln_ipl.c deleted file mode 100644 index 28b54071634d..000000000000 --- a/contrib/ipfilter/mln_ipl.c +++ /dev/null @@ -1,355 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include <sys/param.h> - -/* - * Post NetBSD 1.2 has the PFIL interface for packet filters. This turns - * on those hooks. We don't need any special mods with this! - */ -#if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \ - (defined(NetBSD1_2) && NetBSD1_2 > 1) -# define NETBSD_PF -#endif - -#include <sys/systm.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/stat.h> -#include <sys/proc.h> -#include <sys/uio.h> -#include <sys/kernel.h> -#include <sys/vnode.h> -#include <sys/namei.h> -#include <sys/malloc.h> -#include <sys/mount.h> -#include <sys/exec.h> -#include <sys/mbuf.h> -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <sys/lkm.h> -#include <sys/poll.h> -#include <sys/select.h> -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_auth.h" -#include "ip_state.h" -#include "ip_nat.h" -#include "ip_sync.h" - -#if !defined(__NetBSD_Version__) || __NetBSD_Version__ < 103050000 -#define vn_lock(v,f) VOP_LOCK(v) -#endif - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - - -extern int lkmenodev __P((void)); - -#if NetBSD >= 199706 -int ipflkm_lkmentry __P((struct lkm_table *, int, int)); -#else -int xxxinit __P((struct lkm_table *, int, int)); -#endif -static int ipf_unload __P((void)); -static int ipf_load __P((void)); -static int ipf_remove __P((void)); -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; - -int ipf_major = 0; -extern ipf_main_softc_t ipfmain; -extern const struct cdevsw ipl_cdevsw; - -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) -MOD_DEV(IPL_VERSION, "ipf", NULL, -1, &ipl_cdevsw, -1); -#else -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipldevsw); -#endif - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - - -int -#if NetBSD >= 199706 -ipflkm_lkmentry(lkmtp, cmd, ver) -#else -xxxinit(lkmtp, cmd, ver) -#endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} - - -static int -ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ -#if !defined(__NetBSD__) || (__NetBSD_Version__ < 106080000) - int i; -#endif - struct lkm_dev *args = lkmtp->private.lkm_dev; - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) -# if (__NetBSD_Version__ < 200000000) - err = devsw_attach(args->lkm_devname, - args->lkm_bdev, &args->lkm_bdevmaj, - args->lkm_cdev, &args->lkm_cdevmaj); - if (err != 0) - return (err); -# endif - ipf_major = args->lkm_cdevmaj; -#else - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == (dev_type_open((*)))lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ -#endif - printf("IP Filter: loaded into slot %d\n", ipf_major); - return ipf_load(); - case LKM_E_UNLOAD : -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) - devsw_detach(args->lkm_bdev, args->lkm_cdev); - args->lkm_bdevmaj = -1; - args->lkm_cdevmaj = -1; -#endif - err = ipf_unload(); - if (!err) - printf("IP Filter: unloaded from slot %d\n", - ipf_major); - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} - - -static int -ipf_remove() -{ - char *name; - struct nameidata nd; - int error, i; - - for (i = 0; (name = ipf_devfiles[i]); i++) { -#if (__NetBSD_Version__ > 106009999) -# if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 499001400) - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name); -# else - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name, curlwp); -# endif -# else - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name, curproc); -# endif -#else - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - return (error); -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -# else - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# endif -#else - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif -#if !defined(__NetBSD_Version__) || (__NetBSD_Version__ < 106000000) - vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); -#endif -#if (__NetBSD_Version__ >= 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_vp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -#else -# if (__NetBSD_Version__ > 399001400) - VOP_LEASE(nd.ni_vp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# else - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -# endif -#endif - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } - return 0; -} - - -static int -ipf_unload() -{ - int error = 0; - - /* - * Unloading - remove the filter rule check from the IP - * input/output stream. - */ - if (ipfmain.ipf_refcnt) - error = EBUSY; - else if (ipfmain.ipf_running >= 0) { - error = ipfdetach(&ipfmain); - if (error == 0) { - ipf_destroy_all(&ipfmain); - ipf_unload_all(); - } - } - - if (error == 0) { - ipfmain.ipf_running = -2; - error = ipf_remove(); - printf("%s unloaded\n", ipfilter_version); - } - return error; -} - - -static int -ipf_load() -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - /* - * XXX Remove existing device nodes prior to creating new ones - * XXX using the assigned LKM device slot's major number. In a - * XXX perfect world we could use the ones specified by cdevsw[]. - */ - (void)ipf_remove(); - - bzero((char *)&ipfmain, sizeof(ipfmain)); - error = ipf_load_all(); - if (error != 0) - return error; - if (ipf_create_all(&ipfmain) == NULL) { - ipf_unload_all(); - return EIO; - } - - error = ipfattach(&ipfmain); - if (error != 0) { - (void) ipf_unload(); - return error; - } - - for (i = 0; (error == 0) && (name = ipf_devfiles[i]); i++) { -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 499001400) - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name); -# else - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curlwp); -# endif -#else - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - break; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; - break; - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ >= 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -# else - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# endif -#else - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (error == 0) - vput(nd.ni_vp); - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipfmain.ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipfmain.ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipfmain.ipf_running = 1; - } - return error; -} diff --git a/contrib/ipfilter/mln_rule.c b/contrib/ipfilter/mln_rule.c deleted file mode 100644 index 2df3376816b4..000000000000 --- a/contrib/ipfilter/mln_rule.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/conf.h> -#include <sys/proc.h> -#include <sys/ioctl.h> -#include <sys/kernel.h> -#include <sys/mbuf.h> -#include <sys/exec.h> -#include <sys/socket.h> -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <sys/lkm.h> -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -static int ipfruleaction __P((struct lkm_table *, int)); - -#ifdef IPFILTER_LKM -# if NetBSD >= 199706 -int ipfrule_lkmentry __P((struct lkm_table *, int, int)); -# else -int xxxinit __P((struct lkm_table *, int, int)); -# endif - - -MOD_MISC("IPFilter Rules"); - -# if NetBSD >= 199706 -int ipfrule_lkmentry(lkmtp, cmd, ver) -# else -int xxxinit(lkmtp, cmd, ver) -# endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfruleaction, ipfruleaction, ipfruleaction); -} - -static int ipfruleaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* IPFILTER_LKM */ diff --git a/contrib/ipfilter/mlo_ipl.c b/contrib/ipfilter/mlo_ipl.c deleted file mode 100644 index 35556fa33f54..000000000000 --- a/contrib/ipfilter/mlo_ipl.c +++ /dev/null @@ -1,364 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/stat.h> -#include <sys/proc.h> -#include <sys/uio.h> -#include <sys/kernel.h> -#include <sys/vnode.h> -#include <sys/namei.h> -#include <sys/malloc.h> -#include <sys/mount.h> -#include <sys/exec.h> -#include <sys/mbuf.h> -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <sys/lkm.h> -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" - -#define vn_lock(v,f) VOP_LOCK(v) - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - - -extern int lkmenodev __P((void)); - -#if OpenBSD >= 200311 -int if_ipf_lkmentry __P((struct lkm_table *, int, int)); -#else -int if_ipf __P((struct lkm_table *, int, int)); -#endif -static int ipf_unload __P((void)); -static int ipf_load __P((void)); -static int ipf_remove __P((void)); -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; - - -struct cdevsw ipfdevsw = -{ - ipfopen, /* open */ - ipfclose, /* close */ - ipfread, /* read */ - (void *)nullop, /* write */ - ipfioctl, /* ioctl */ - (void *)nullop, /* stop */ - (void *)NULL, /* tty */ - (void *)nullop, /* select */ - (void *)nullop, /* mmap */ - NULL /* strategy */ -}; - -int ipf_major = 0; - -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipfdevsw); - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - - -#if OpenBSD >= 200311 -int if_ipf_lkmentry (lkmtp, cmd, ver) -#else -int if_ipf(lkmtp, cmd, ver) -#endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} - -int lkmexists __P((struct lkm_table *)); /* defined in /sys/kern/kern_lkm.c */ - -static int ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int i; - struct lkm_dev *args = lkmtp->private.lkm_dev; - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == (dev_type_open((*)))lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ - printf("IP Filter: loaded into slot %d\n", ipf_major); - return ipf_load(); - case LKM_E_UNLOAD : - err = ipf_unload(); - if (!err) - printf("IP Filter: unloaded from slot %d\n", - ipf_major); - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} - - -static int ipf_remove() -{ - struct nameidata nd; - int error, i; - char *name; - - for (i = 0; (name = ipf_devfiles[i]); i++) { -#if OpenBSD >= 200311 - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, - name, curproc); -#else - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - return (error); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#if OpenBSD < 200311 - VOP_LOCK(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY, curproc); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#else - (void)uvm_vnp_uncache(nd.ni_vp); - - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } - return 0; -} - - -static int ipf_unload() -{ - int error = 0; - - /* - * Unloading - remove the filter rule check from the IP - * input/output stream. - */ - if (ipf_refcnt) - error = EBUSY; - else if (ipf_running >= 0) - error = ipfdetach(); - - if (error == 0) { - ipf_running = -2; - error = ipf_remove(); - printf("%s unloaded\n", ipfilter_version); - } - return error; -} - - -static int ipf_load() -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - /* - * XXX Remove existing device nodes prior to creating new ones - * XXX using the assigned LKM device slot's major number. In a - * XXX perfect world we could use the ones specified by cdevsw[]. - */ - (void)ipf_remove(); - - error = ipfattach(); - - for (i = 0; (error == 0) && (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - break; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; - break; - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipf_running = 1; - } - return error; -} - - -/* - * routines below for saving IP headers to buffer - */ -int -ipfopen(dev, flags, devtype, p) - dev_t dev; - int flags; - int devtype; - struct proc *p; -{ - u_int min = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < min) { - error = ENXIO; - } else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -int -ipfclose(dev, flags, devtype, p) - dev_t dev; - int flags; - int devtype; - struct proc *p; -{ - u_int min = GET_MINOR(dev); - - if (IPL_LOGMAX < min) - min = ENXIO; - else - min = 0; - return min; -} - - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -int -ipfread(dev, uio, ioflag) - dev_t dev; - register struct uio *uio; - int ioflag; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_read(uio); - -#ifdef IPFILTER_LOG - return ipflog_read(GET_MINOR(dev), uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -int -#if (BSD >= 199306) -ipfwrite(dev, uio, ioflag) - int ioflag; -#else -ipfwrite(dev, uio) -#endif - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlo_rule.c b/contrib/ipfilter/mlo_rule.c deleted file mode 100644 index dbd4305970ee..000000000000 --- a/contrib/ipfilter/mlo_rule.c +++ /dev/null @@ -1,80 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/conf.h> -#include <sys/proc.h> -#include <sys/ioctl.h> -#include <sys/kernel.h> -#include <sys/mbuf.h> -#include <sys/exec.h> -#include <sys/socket.h> -#include <net/if.h> -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <net/route.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <sys/lkm.h> -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -#ifdef IPFILTER_LKM - -static int ipfruleaction __P((struct lkm_table *, int)); - -int ipfrule __P((struct lkm_table *, int, int)); - - -MOD_MISC("IPFilter Rules"); - -int ipfrule(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfruleaction, ipfruleaction, ipfruleaction); -} - -int lkmexists __P((struct lkm_table *)); /* defined in /sys/kern/kern_lkm.c */ - -static int ipfruleaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* IPFILTER_LKM */ diff --git a/contrib/ipfilter/mls_ipl.c b/contrib/ipfilter/mls_ipl.c deleted file mode 100644 index 4388b617e631..000000000000 --- a/contrib/ipfilter/mls_ipl.c +++ /dev/null @@ -1,351 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/file.h> -#include <sys/socket.h> -#include <sys/conf.h> -#include <sys/syslog.h> -#include <sys/buf.h> -#include <sys/mbuf.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/uio.h> -#include <sys/vnode.h> -#include <sundev/mbvar.h> -#include <sun/autoconf.h> -#include <sun/vddrv.h> -#if defined(sun4c) || defined(sun4m) -# include <sun/openprom.h> -#endif -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <net/if.h> -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" - - -#if !defined(lint) -static const char sccsid[] = "@(#)mls_ipl.c 2.6 10/15/95 (C) 1993-2000 Darren Reed"; -static const char rcsid[] = "@(#)$Id$"; -#endif - -extern int ipfdetach __P((void)); -#ifndef IPFILTER_LOG -#define ipfread nulldev -#endif -extern int nulldev __P((void)); -extern int errno; - -extern int nodev __P((void)); - -static int unload __P((void)); -static int ipf_attach __P((void)); -int xxxinit __P((u_int, struct vddrv *, caddr_t, struct vdstat *)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; -static int ipfopen __P((dev_t, int)); -static int ipfclose __P((dev_t, int)); -static int ipfread __P((dev_t, struct uio *)); -static int ipfwrite __P((dev_t, struct uio *)); - - -struct cdevsw ipfdevsw = -{ - ipfopen, ipfclose, ipfread, nulldev, - ipfioctl, nulldev, nulldev, nulldev, - 0, nulldev, -}; - - -struct dev_ops ipf_ops = -{ - 1, - ipfidentify, - ipfattach, - ipfopen, - ipfclose, - ipfread, - ipfwrite, - NULL, /* strategy */ - NULL, /* dump */ - 0, /* psize */ - ipfioctl, - NULL, /* reset */ - NULL /* mmap */ -}; - -int ipf_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, - IPL_VERSION, - &ipf_ops, - NULL, - &ipfdevsw, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, /* magic */ - IPL_VERSION, -#ifdef sun4c - &ipf_ops, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - &ipfdevsw, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - -xxxinit(fc, vdp, data, vds) - u_int fc; - struct vddrv *vdp; - caddr_t data; - struct vdstat *vds; -{ - struct vdioctl_load *vdi = (struct vdioctl_load *)data; - - switch (fc) - { - case VDLOAD: - { - struct vdconf *vdc; - if (vdi && vdi->vdi_userconf) - for (vdc = vdi->vdi_userconf; vdc->vdc_type; vdc++) - if (vdc->vdc_type == VDCCHARMAJOR) { - ipf_major = vdc->vdc_data; - break; - } - - if (!ipf_major) { - while (ipf_major < nchrdev && - cdevsw[ipf_major].d_open != vd_unuseddev) - ipf_major++; - if (ipf_major == nchrdev) - return ENODEV; - } - vdp->vdd_vdtab = (struct vdlinkage *)&vd; - vd.Drv_charmajor = ipf_major; - return ipf_attach(); - } - case VDUNLOAD: - return unload(); - case VDSTAT: - return 0; - default: - return EIO; - } -} - - -static int -unload() -{ - int err = 0, i; - char *name; - - if (ipf_refcnt != 0) - err = EBUSY; - else if (ipf_running >= 0) - err = ipfdetach(); - if (err) - return err; - - ipf_running = -2; - for (i = 0; (name = ipf_devfiles[i]); i++) - (void) vn_remove(name, UIO_SYSSPACE, FILE); - printf("%s unloaded\n", ipfilter_version); - return 0; -} - - -static int -ipf_attach() -{ - struct vnode *vp; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - error = ipfattach(); - if (error) - return error; - - for (i = 0; (name = ipf_devfiles[i]); i++) { - (void) vn_remove(name, UIO_SYSSPACE, FILE); - vattr_null(&vattr); - vattr.va_type = MFTOVT(fmode); - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - - error = vn_create(name, UIO_SYSSPACE, &vattr, EXCL, 0, &vp); - if (error) { - printf("IP Filter: vn_create(%s) = %d\n", name, error); - break; - } else { - VN_RELE(vp); - } - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipf_running = 1; - } - return error; -} - - -/* - * routines below for saving IP headers to buffer - */ -static int -ipfopen(dev, flags) - dev_t dev; - int flags; -{ - u_int unit = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < unit) { - error = ENXIO; - } else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -static int -ipfclose(dev, flags) - dev_t dev; - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -static int -ipfread(dev, uio) - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) { - ipfmain.ipf_interror = 130006; - return EIO; - } - -#ifdef IPFILTER_LOG - return ipflog_read(GET_MINOR(dev), uio); -#else - ipfmain.ipf_interror = 130007; - return ENXIO; -#endif -} - - -/* - * ipfwrite - */ -static int -ipfwrite(dev, uio) - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) { - ipfmain.ipf_interror = 130008; - return EIO; - } - - if (getminor(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - ipfmain.ipf_interror = 130009; - return ENXIO; -} diff --git a/contrib/ipfilter/mls_rule.c b/contrib/ipfilter/mls_rule.c deleted file mode 100644 index e37df0c89314..000000000000 --- a/contrib/ipfilter/mls_rule.c +++ /dev/null @@ -1,116 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber <huber@fzi.de> to allow it to allocate - * its own major char number! Way cool patch! - */ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/file.h> -#include <sys/socket.h> -#include <sys/conf.h> -#include <sys/syslog.h> -#include <sys/buf.h> -#include <sys/mbuf.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/uio.h> -#include <sys/vnode.h> -#include <sundev/mbvar.h> -#include <sun/autoconf.h> -#include <sun/vddrv.h> -#if defined(sun4c) || defined(sun4m) -# include <sun/openprom.h> -#endif -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcpip.h> -#include <net/if.h> -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -extern int errno; - - -int xxxinit __P((u_int, struct vddrv *, caddr_t, struct vdstat *)); - -int ipl_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_USER, - "IP Filter rules", - NULL, - NULL, - NULL, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_USER, /* magic */ - "IP Filter rules", -#ifdef sun4c - NULL, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - NULL, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - - -xxxinit(fc, vdp, data, vds) - u_int fc; - struct vddrv *vdp; - caddr_t data; - struct vdstat *vds; -{ - struct vdioctl_load *vdi = (struct vdioctl_load *)data; - int err; - - switch (fc) - { - case VDLOAD: - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case VDUNLOAD: - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case VDSTAT: - err = 0; - break; - default: - err = EIO; - break; - } -} diff --git a/contrib/ipfilter/mlso_rule.c b/contrib/ipfilter/mlso_rule.c deleted file mode 100644 index a9395f2d2f71..000000000000 --- a/contrib/ipfilter/mlso_rule.c +++ /dev/null @@ -1,130 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -#pragma ident "@(#)$Id$" - -#include <sys/systm.h> -#include <sys/types.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/uio.h> -#include <sys/buf.h> -#include <sys/modctl.h> -#include <sys/open.h> -#include <sys/kmem.h> -#include <sys/conf.h> -#include <sys/cmn_err.h> -#include <sys/stat.h> -#include <sys/cred.h> -#include <sys/dditypes.h> -#include <sys/stream.h> -#include <sys/poll.h> -#include <sys/autoconf.h> -#include <sys/byteorder.h> -#include <sys/socket.h> -#include <sys/dlpi.h> -#include <sys/stropts.h> -#include <sys/sockio.h> -#include <net/if.h> -#if SOLARIS2 >= 6 -# include <net/if_types.h> -#endif -#include <net/af.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/if_ether.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <netinet/tcpip.h> -#include <netinet/ip_icmp.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - -char _depends_on[] = "drv/ipf"; - - -extern ipf_main_softc_t ipfmain; -extern struct mod_ops mod_miscops; -static struct modlmisc ipfrulemod = { - &mod_miscops, - "IP Filter rules" -}; - -static struct modlinkage modlink1 = { - MODREV_1, - &ipfrulemod, - NULL -}; - - -int _init() -{ - int ipfruleinst; - - ipfruleinst = mod_install(&modlink1); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _init() = %d", ipfruleinst); -#endif - - if (ipfruleinst == 0) { - if (ipfmain.ipf_running >= 0) { - ipfruleinst = ipfrule_add(); - if (!ipfruleinst) - ipfmain.ipf_refcnt++; - else { - cmn_err(CE_NOTE, - "IP Filter Rules: ipfrule_add failed"); - ipfruleinst = -1; - } - } else - ipfruleinst = -1; - } - if (ipfruleinst == 0) - cmn_err(CE_CONT, "IP Filter Rules: loaded\n"); - return ipfruleinst; -} - - -int _fini(void) -{ - int ipfruleinst; - - ipfruleinst = mod_remove(&modlink1); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _fini() = %d", ipfruleinst); -#endif - if (ipfruleinst == 0) { - ipfruleinst = ipfrule_remove(); - if (!ipfruleinst) - ipfmain.ipf_refcnt--; - else - ipfruleinst = -1; - } - if (ipfruleinst == 0) - cmn_err(CE_CONT, "IP Filter Rules: unloaded\n"); - return ipfruleinst; -} - - -int _info(modinfop) - struct modinfo *modinfop; -{ - int ipfruleinst; - - ipfruleinst = mod_info(&modlink1, modinfop); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _info(%x) = %x", - modinfop, ipfruleinst); -#endif - return ipfruleinst; -} diff --git a/contrib/ipfilter/tools/ipf.c b/contrib/ipfilter/tools/ipf.c index 166063173b20..0551108488e0 100644 --- a/contrib/ipfilter/tools/ipf.c +++ b/contrib/ipfilter/tools/ipf.c @@ -5,15 +5,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include <osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <osreldate.h> -# endif -# endif -#endif #include "ipf.h" #include <fcntl.h> #include <ctype.h> diff --git a/contrib/ipfilter/tools/ipfs.c b/contrib/ipfilter/tools/ipfs.c index 43abd748f59f..7a2fe0252498 100644 --- a/contrib/ipfilter/tools/ipfs.c +++ b/contrib/ipfilter/tools/ipfs.c @@ -5,15 +5,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include <osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <osreldate.h> -# endif -# endif -#endif #include <stdio.h> #include <unistd.h> #include <string.h> diff --git a/contrib/ipfilter/tools/ipfstat.c b/contrib/ipfilter/tools/ipfstat.c index 3f0060189f23..e18eecaabe28 100644 --- a/contrib/ipfilter/tools/ipfstat.c +++ b/contrib/ipfilter/tools/ipfstat.c @@ -5,58 +5,26 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include <osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <osreldate.h> -# endif -# endif -#endif #include <sys/ioctl.h> #include <ctype.h> #include <fcntl.h> -#ifdef linux -# include <linux/a.out.h> -#else # include <nlist.h> -#endif #include <ctype.h> -#if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +#if defined(sun) && defined(__SVR4) # include <stddef.h> #endif #include "ipf.h" #include "netinet/ipl.h" -#if defined(STATETOP) -# if defined(_BSDI_VERSION) -# undef STATETOP -# endif -# if defined(__FreeBSD__) && \ - (!defined(__FreeBSD_version) || (__FreeBSD_version < 430000)) -# undef STATETOP -# endif -# if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105000000) -# undef STATETOP -# endif -# if defined(sun) -# if defined(__svr4__) || defined(__SVR4) +#if defined(STATETOP) +# if defined(sun) && defined(__SVR4) # include <sys/select.h> -# else -# undef STATETOP /* NOT supported on SunOS4 */ -# endif # endif -#endif -#if defined(STATETOP) && !defined(linux) # include <netinet/ip_var.h> # include <netinet/tcp_fsm.h> -#endif -#ifdef STATETOP # include <ctype.h> # include <signal.h> # include <time.h> -# if SOLARIS || defined(__NetBSD__) || defined(_BSDI_VERSION) || \ - defined(__sgi) +# if SOLARIS || defined(__NetBSD__) # ifdef ERR # undef ERR # endif @@ -66,7 +34,7 @@ # endif /* SOLARIS */ #endif /* STATETOP */ #include "kmem.h" -#if defined(__NetBSD__) || (__OpenBSD__) +#if defined(__NetBSD__) # include <paths.h> #endif @@ -75,9 +43,6 @@ static const char sccsid[] = "@(#)fils.c 1.21 4/20/96 (C) 1993-2000 Darren Reed" static const char rcsid[] = "@(#)$Id$"; #endif -#ifdef __hpux -# define nlist nlist64 -#endif extern char *optarg; extern int optind; diff --git a/contrib/ipfilter/tools/ipftest.c b/contrib/ipfilter/tools/ipftest.c index 378523d3bdf3..f9d45f71cbb2 100644 --- a/contrib/ipfilter/tools/ipftest.c +++ b/contrib/ipfilter/tools/ipftest.c @@ -43,9 +43,6 @@ void dumprules __P((frentry_t *)); void drain_log __P((char *)); void fixv4sums __P((mb_t *, ip_t *)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - defined(__osf__) || defined(linux) int ipftestioctl __P((int, ioctlcmd_t, ...)); int ipnattestioctl __P((int, ioctlcmd_t, ...)); int ipstatetestioctl __P((int, ioctlcmd_t, ...)); @@ -53,15 +50,6 @@ int ipauthtestioctl __P((int, ioctlcmd_t, ...)); int ipscantestioctl __P((int, ioctlcmd_t, ...)); int ipsynctestioctl __P((int, ioctlcmd_t, ...)); int ipooltestioctl __P((int, ioctlcmd_t, ...)); -#else -int ipftestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipnattestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipstatetestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipauthtestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipsynctestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipscantestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipooltestioctl __P((dev_t, ioctlcmd_t, void *)); -#endif static ioctlfunc_t iocfunctions[IPL_LOGSIZE] = { ipftestioctl, ipnattestioctl, @@ -292,15 +280,7 @@ main(argc,argv) ipf_state_flush(softc, 1, 0); if (dir && (ifp != NULL) && IP_V(ip) && (m != NULL)) -#if defined(__sgi) && (IRIX < 60500) - (*ifp->if_output)(ifp, (void *)m, NULL); -#else -# if TRU64 >= 1885 - (*ifp->if_output)(ifp, (void *)m, NULL, 0, 0); -# else (*ifp->if_output)(ifp, (void *)m, NULL, 0); -# endif -#endif while ((m != NULL) && (m != &mb)) { n = m->mb_next; @@ -351,9 +331,6 @@ main(argc,argv) } -#if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - defined(__osf__) || defined(linux) int ipftestioctl(int dev, ioctlcmd_t cmd, ...) { caddr_t data; @@ -513,141 +490,6 @@ int ipooltestioctl(int dev, ioctlcmd_t cmd, ...) } return 0; } -#else -int ipftestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGIPF, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(IPF,%#x,%p) = %d (%d)\n", - cmd, data, i, softc->ipf_interror); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipnattestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGNAT, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(NAT,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipstatetestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSTATE, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(STATE,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipauthtestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGAUTH, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(AUTH,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipsynctestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSYNC, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(SYNC,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipscantestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSCAN, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(SCAN,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipooltestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGLOOKUP, cmd, data, FWRITE|FREAD); - if (opts & OPT_DEBUG) - fprintf(stderr, "ipfioctl(POOL,%#x,%p) = %d (%d)\n", - cmd, data, i, softc->ipf_interror); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} -#endif int kmemcpy(addr, offset, size) diff --git a/contrib/ipfilter/tools/ipmon.c b/contrib/ipfilter/tools/ipmon.c index 1c52e7fd87ac..4e4d9cc28f9e 100644 --- a/contrib/ipfilter/tools/ipmon.c +++ b/contrib/ipfilter/tools/ipmon.c @@ -20,12 +20,7 @@ static const char rcsid[] = "@(#)$Id$"; #endif -#if defined(sun) && !defined(SOLARIS2) -#define STRERROR(x) sys_errlist[x] -extern char *sys_errlist[]; -#else #define STRERROR(x) strerror(x) -#endif extern int optind; extern char *optarg; @@ -116,11 +111,7 @@ char *reasons[] = { #ifdef MENTAT static char *pidfile = "/etc/opt/ipf/ipmon.pid"; #else -# if BSD >= 199306 static char *pidfile = "/var/run/ipmon.pid"; -# else -static char *pidfile = "/etc/ipmon.pid"; -# endif #endif static char line[2048]; @@ -138,11 +129,7 @@ static char *icmpname __P((u_int, u_int)); static char *icmpname6 __P((u_int, u_int)); static icmp_type_t *find_icmptype __P((int, icmp_type_t *, size_t)); static icmp_subtype_t *find_icmpsubtype __P((int, icmp_subtype_t *, size_t)); -#ifdef __hpux -static struct tm *get_tm __P((u_32_t)); -#else static struct tm *get_tm __P((time_t)); -#endif char *portlocalname __P((int, char *, u_int)); int main __P((int, char *[])); @@ -400,11 +387,6 @@ static void init_tabs() if (protocols[0]) free(protocols[0]); protocols[0] = strdup("ip"); -#if defined(_AIX51) - if (protocols[252]) - free(protocols[252]); - protocols[252] = NULL; -#endif } if (udp_ports != NULL) { @@ -643,11 +625,7 @@ void dumphex(log, dopts, buf, len) static struct tm *get_tm(sec) -#ifdef __hpux - u_32_t sec; -#else time_t sec; -#endif { struct tm *tm; time_t t; @@ -1123,10 +1101,6 @@ static void print_ipflog(conf, buf, blen) sprintf(t, "%dx ", ipl->ipl_count); t += strlen(t); } -#if (defined(MENTAT) || \ - (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603))) || defined(linux) { char ifname[sizeof(ipf->fl_ifname) + 1]; @@ -1134,28 +1108,13 @@ static void print_ipflog(conf, buf, blen) ifname[sizeof(ipf->fl_ifname)] = '\0'; sprintf(t, "%s", ifname); t += strlen(t); -# if defined(MENTAT) || defined(linux) -# if defined(linux) - /* - * On Linux, the loopback interface is just "lo", not "lo0". - */ - if (strcmp(ifname, "lo") != 0) -# endif +# if defined(MENTAT) if (ISALPHA(*(t - 1))) { sprintf(t, "%d", ipf->fl_unit); t += strlen(t); } # endif } -#else - for (len = 0; len < 3; len++) - if (ipf->fl_ifname[len] == '\0') - break; - if (ipf->fl_ifname[len]) - len++; - sprintf(t, "%*.*s%u", len, len, ipf->fl_ifname, ipf->fl_unit); - t += strlen(t); -#endif if ((ipf->fl_group[0] == (char)~0) && (ipf->fl_group[1] == '\0')) strcat(t, " @-1:"); else if (ipf->fl_group[0] == '\0') diff --git a/contrib/ipfilter/tools/ipnat.c b/contrib/ipfilter/tools/ipnat.c index c3a715698036..54a71653f3e2 100644 --- a/contrib/ipfilter/tools/ipnat.c +++ b/contrib/ipfilter/tools/ipnat.c @@ -12,7 +12,7 @@ #include <fcntl.h> #include <errno.h> #include <sys/types.h> -#if !defined(__SVR4) && !defined(__svr4__) +#if !defined(__SVR4) #include <strings.h> #else #include <sys/byteorder.h> @@ -28,7 +28,7 @@ #undef _KERNEL #include <sys/socket.h> #include <sys/ioctl.h> -#if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +#if defined(sun) && defined(__SVR4) # include <sys/ioccom.h> # include <sys/sysmacros.h> #endif @@ -42,25 +42,13 @@ #include <arpa/inet.h> #include <resolv.h> #include <ctype.h> -#if defined(linux) -# include <linux/a.out.h> -#else # include <nlist.h> -#endif #include "ipf.h" #include "netinet/ipl.h" #include "kmem.h" -#ifdef __hpux -# define nlist nlist64 -#endif -#if defined(sun) && !SOLARIS2 -# define STRERROR(x) sys_errlist[x] -extern char *sys_errlist[]; -#else # define STRERROR(x) strerror(x) -#endif #if !defined(lint) static const char sccsid[] ="@(#)ipnat.c 1.9 6/5/96 (C) 1993 Darren Reed"; diff --git a/contrib/ipfilter/tools/ipnat_y.y b/contrib/ipfilter/tools/ipnat_y.y index 39e6a92bdf67..e24641306634 100644 --- a/contrib/ipfilter/tools/ipnat_y.y +++ b/contrib/ipfilter/tools/ipnat_y.y @@ -6,15 +6,6 @@ * See the IPFILTER.LICENCE file for details on licencing. */ %{ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include <osreldate.h> -# else -# if __FreeBSD_cc_version < 430000 -# include <osreldate.h> -# endif -# endif -#endif #include <stdio.h> #include <unistd.h> #include <string.h> diff --git a/contrib/ipfilter/tools/ippool.c b/contrib/ipfilter/tools/ippool.c index ea2ef910cb68..01d8fe236e5a 100644 --- a/contrib/ipfilter/tools/ippool.c +++ b/contrib/ipfilter/tools/ippool.c @@ -9,9 +9,7 @@ #include <sys/time.h> #include <sys/param.h> #include <sys/socket.h> -#if defined(BSD) && (BSD >= 199306) # include <sys/cdefs.h> -#endif #include <sys/ioctl.h> #include <net/if.h> @@ -26,11 +24,7 @@ #include <netdb.h> #include <ctype.h> #include <unistd.h> -#ifdef linux -# include <linux/a.out.h> -#else # include <nlist.h> -#endif #include "ipf.h" #include "netinet/ipl.h" diff --git a/contrib/ipfilter/tools/ippool_y.y b/contrib/ipfilter/tools/ippool_y.y index 2c7574f9432d..2a9d8ee3b079 100644 --- a/contrib/ipfilter/tools/ippool_y.y +++ b/contrib/ipfilter/tools/ippool_y.y @@ -10,9 +10,7 @@ #include <sys/time.h> #include <sys/param.h> #include <sys/socket.h> -#if defined(BSD) && (BSD >= 199306) # include <sys/cdefs.h> -#endif #include <sys/ioctl.h> #include <net/if.h> diff --git a/lib/libc/tests/stdlib/Makefile b/lib/libc/tests/stdlib/Makefile index bb9542b185ed..9f7afa112491 100644 --- a/lib/libc/tests/stdlib/Makefile +++ b/lib/libc/tests/stdlib/Makefile @@ -2,6 +2,7 @@ .include <src.opts.mk> +ATF_TESTS_C+= dynthr_test ATF_TESTS_C+= heapsort_test ATF_TESTS_C+= mergesort_test ATF_TESTS_C+= qsort_test @@ -62,4 +63,6 @@ LIBADD.${t}+= netbsd util LIBADD.strtod_test+= m +SUBDIR+= dynthr_mod + .include <bsd.test.mk> diff --git a/lib/libc/tests/stdlib/dynthr_mod/Makefile b/lib/libc/tests/stdlib/dynthr_mod/Makefile new file mode 100644 index 000000000000..b2a93b3af6d6 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_mod/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +SHLIB_NAME= dynthr_mod.so +SHLIBDIR= ${TESTSDIR} +SRCS= dynthr_mod.c +LIBADD= pthread + +TESTSDIR:= ${TESTSBASE}/${RELDIR:C/libc\/tests/libc/:H} + + +.include <bsd.lib.mk> diff --git a/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c b/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c new file mode 100644 index 000000000000..c455808f6b92 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2019 Andrew Gierth + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Though this file is initially distributed under the 2-clause BSD license, + * the author grants permission for its redistribution under alternative + * licenses as set forth at <https://rhodiumtoad.github.io/RELICENSE.txt>. + * This paragraph and the RELICENSE.txt file are not part of the license and + * may be omitted in redistributions. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> + +static pthread_t thr; + +static void * +mod_thread(void *ptr) +{ + char *volatile dummy; + + dummy = malloc(500); + return (NULL); +} + +void +mod_main(int op) +{ + int rc; + + switch (op) { + case 1: + rc = pthread_create(&thr, NULL, mod_thread, NULL); + if (rc != 0) + _exit(1); + break; + case 0: + pthread_join(thr, NULL); + break; + } +} + diff --git a/lib/libc/tests/stdlib/dynthr_test.c b/lib/libc/tests/stdlib/dynthr_test.c new file mode 100644 index 000000000000..d72ee280ccb2 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_test.c @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2019 Andrew Gierth + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Though this file is initially distributed under the 2-clause BSD license, + * the author grants permission for its redistribution under alternative + * licenses as set forth at <https://rhodiumtoad.github.io/RELICENSE.txt>. + * This paragraph and the RELICENSE.txt file are not part of the license and + * may be omitted in redistributions. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <dlfcn.h> + +#include <atf-c.h> + +typedef void (modfunc_t)(int op); + +/* + * Minimal test case for PR 235158; mutual dependencies between jemalloc and + * libthr causing issues in thread creation. Specifically to this case, libthr + * uses calloc to initialize pthread mutexes, and jemalloc uses pthread mutexes. + * + * Deferred initialization provided by jemalloc proved to be fragile, causing + * issues like in the referenced PR where thread creation in a shared object + * loaded via dlopen(3) would stall unless the calling application also linked + * against pthread. + */ +ATF_TC(maintc); +ATF_TC_HEAD(maintc, tc) +{ + + atf_tc_set_md_var(tc, "timeout", "3"); +} + +ATF_TC_BODY(maintc, tc) +{ + char *libpath; + modfunc_t *func; + void *mod_handle; + const char *srcdir; + dlfunc_t rawfunc; + + srcdir = atf_tc_get_config_var(tc, "srcdir"); + if (asprintf(&libpath, "%s/dynthr_mod.so", srcdir) < 0) + atf_tc_fail("failed to construct path to libthr"); + mod_handle = dlopen(libpath, RTLD_LOCAL); + free(libpath); + if (mod_handle == NULL) + atf_tc_fail("failed to open dynthr_mod.so: %s", dlerror()); + rawfunc = dlfunc(mod_handle, "mod_main"); + if (rawfunc == NULL) + atf_tc_fail("failed to resolve function mod_main"); + func = (modfunc_t *)rawfunc; + func(1); + func(0); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, maintc); + return (atf_no_error()); +} diff --git a/lib/libsdp/sdp.h b/lib/libsdp/sdp.h index b6d533898582..f120fc50f774 100644 --- a/lib/libsdp/sdp.h +++ b/lib/libsdp/sdp.h @@ -586,6 +586,24 @@ void sdp_print (uint32_t level, uint8_t const *start, #define SDP_PDU_SERVICE_UNREGISTER_REQUEST 0x82 #define SDP_PDU_SERVICE_CHANGE_REQUEST 0x83 +struct sdp_audio_sink_profile +{ + uint16_t psm; + uint16_t protover; + uint16_t features; +}; +typedef struct sdp_audio_sink_profile sdp_audio_sink_profile_t; +typedef struct sdp_audio_sink_profile *sdp_audio_sink_profile_p; + +struct sdp_audio_source_profile +{ + uint16_t psm; + uint16_t protover; + uint16_t features; +}; +typedef struct sdp_audio_source_profile sdp_audio_source_profile_t; +typedef struct sdp_audio_source_profile *sdp_audio_source_profile_p; + struct sdp_dun_profile { uint8_t server_channel; diff --git a/lib/libthr/Makefile b/lib/libthr/Makefile index 56905d36e8c5..4a11cdb6c969 100644 --- a/lib/libthr/Makefile +++ b/lib/libthr/Makefile @@ -27,7 +27,7 @@ CFLAGS+=-I${SRCTOP}/lib/libthread_db CFLAGS+=-Winline CFLAGS.thr_stack.c+= -Wno-cast-align -CFLAGS.malloc.c+= -Wno-cast-align +CFLAGS.rtld_malloc.c+= -Wno-cast-align .include <bsd.compiler.mk> .if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 40300) CFLAGS.thr_symbols.c+= -Wno-missing-variable-declarations @@ -58,7 +58,7 @@ PRECIOUSLIB= .endif .include "${.CURDIR}/sys/Makefile.inc" .include "${.CURDIR}/thread/Makefile.inc" -SRCS+= malloc.c +SRCS+= rtld_malloc.c .if ${MK_INSTALLLIB} != "no" SYMLINKS+=lib${LIB}.a ${LIBDIR}/libpthread.a diff --git a/lib/msun/Makefile b/lib/msun/Makefile index 0cba3fc8b53b..6c9af4c018c8 100644 --- a/lib/msun/Makefile +++ b/lib/msun/Makefile @@ -108,6 +108,15 @@ COMMON_SRCS+= catrigl.c \ s_nextafterl.c s_nexttoward.c s_remquol.c s_rintl.c s_roundl.c \ s_scalbnl.c s_sinl.c s_sincosl.c \ s_tanhl.c s_tanl.c s_truncl.c w_cabsl.c +# Work around this warning from gcc 6: +# lib/msun/ld80/e_powl.c:275:1: error: floating constant exceeds range of +# 'long double' [-Werror=overflow] +# if( y >= LDBL_MAX ) +# See also: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=130067 +.include <bsd.compiler.mk> +.if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000 +CFLAGS.e_powl.c+= -Wno-error=overflow +.endif .endif # C99 complex functions diff --git a/libexec/rtld-elf/Makefile b/libexec/rtld-elf/Makefile index b0f02a7205e8..71b75b9273b3 100644 --- a/libexec/rtld-elf/Makefile +++ b/libexec/rtld-elf/Makefile @@ -14,9 +14,17 @@ PROG?= ld-elf.so.1 .if (${PROG:M*ld-elf32*} != "") TAGS+= lib32 .endif -SRCS= rtld_start.S \ - reloc.c rtld.c rtld_lock.c rtld_printf.c map_object.c \ - malloc.c xmalloc.c debug.c libmap.c +SRCS= \ + rtld_start.S \ + reloc.c \ + rtld.c \ + rtld_lock.c \ + rtld_malloc.c \ + rtld_printf.c \ + map_object.c \ + xmalloc.c \ + debug.c \ + libmap.c MAN= rtld.1 CSTD?= gnu99 CFLAGS+= -Wall -DFREEBSD_ELF -DIN_RTLD -ffreestanding diff --git a/libexec/rtld-elf/i386/reloc.c b/libexec/rtld-elf/i386/reloc.c index 84bbaf5f2f89..ef6e805aeeb4 100644 --- a/libexec/rtld-elf/i386/reloc.c +++ b/libexec/rtld-elf/i386/reloc.c @@ -146,6 +146,10 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, } else cache = NULL; + /* Appease some compilers. */ + symval = 0; + def = NULL; + rellim = (const Elf_Rel *)((const char *)obj->rel + obj->relsize); for (rel = obj->rel; rel < rellim; rel++) { switch (ELF_R_TYPE(rel->r_info)) { diff --git a/libexec/rtld-elf/malloc.c b/libexec/rtld-elf/rtld_malloc.c index 020d2fe112da..020d2fe112da 100644 --- a/libexec/rtld-elf/malloc.c +++ b/libexec/rtld-elf/rtld_malloc.c diff --git a/sbin/Makefile b/sbin/Makefile index 4f08a82fe572..2c4b042d91ec 100644 --- a/sbin/Makefile +++ b/sbin/Makefile @@ -52,6 +52,7 @@ SUBDIR=adjkerntz \ newfs_msdos \ nfsiod \ nos-tun \ + pfilctl \ ping \ rcorder \ reboot \ diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c index ce1950e65b89..c0649e33e783 100644 --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -4301,7 +4301,10 @@ list_roam(int s) rp = &roamparams.params[mode]; if (rp->rssi == 0 && rp->rate == 0) continue; - if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { + if (mode == IEEE80211_MODE_11NA || + mode == IEEE80211_MODE_11NG || + mode == IEEE80211_MODE_VHT_2GHZ || + mode == IEEE80211_MODE_VHT_5GHZ) { if (rp->rssi & 1) LINE_CHECK("roam:%-7.7s rssi %2u.5dBm MCS %2u ", modename[mode], rp->rssi/2, @@ -4321,6 +4324,21 @@ list_roam(int s) } } +/* XXX TODO: rate-to-string method... */ +static const char* +get_mcs_mbs_rate_str(uint8_t rate) +{ + return (rate & IEEE80211_RATE_MCS) ? "MCS " : "Mb/s"; +} + +static uint8_t +get_rate_value(uint8_t rate) +{ + if (rate & IEEE80211_RATE_MCS) + return (rate &~ IEEE80211_RATE_MCS); + return (rate / 2); +} + static void list_txparams(int s) { @@ -4332,21 +4350,28 @@ list_txparams(int s) tp = &txparams.params[mode]; if (tp->mgmtrate == 0 && tp->mcastrate == 0) continue; - if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { + if (mode == IEEE80211_MODE_11NA || + mode == IEEE80211_MODE_11NG || + mode == IEEE80211_MODE_VHT_2GHZ || + mode == IEEE80211_MODE_VHT_5GHZ) { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) - LINE_CHECK("%-7.7s ucast NONE mgmt %2u MCS " - "mcast %2u MCS maxretry %u", + LINE_CHECK("%-7.7s ucast NONE mgmt %2u %s " + "mcast %2u %s maxretry %u", modename[mode], - tp->mgmtrate &~ IEEE80211_RATE_MCS, - tp->mcastrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mgmtrate), + get_mcs_mbs_rate_str(tp->mgmtrate), + get_rate_value(tp->mcastrate), + get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); else - LINE_CHECK("%-7.7s ucast %2u MCS mgmt %2u MCS " - "mcast %2u MCS maxretry %u", + LINE_CHECK("%-7.7s ucast %2u MCS mgmt %2u %s " + "mcast %2u %s maxretry %u", modename[mode], tp->ucastrate &~ IEEE80211_RATE_MCS, - tp->mgmtrate &~ IEEE80211_RATE_MCS, - tp->mcastrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mgmtrate), + get_mcs_mbs_rate_str(tp->mgmtrate), + get_rate_value(tp->mcastrate), + get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); } else { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) diff --git a/sbin/pfilctl/Makefile b/sbin/pfilctl/Makefile new file mode 100644 index 000000000000..04f0a622ce14 --- /dev/null +++ b/sbin/pfilctl/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +PROG= pfilctl +SRCS= pfilctl.c +WARNS?= 6 + +MAN= pfilctl.8 + +.include <bsd.prog.mk> diff --git a/sbin/pfilctl/pfilctl.8 b/sbin/pfilctl/pfilctl.8 new file mode 100644 index 000000000000..d0a50e489a03 --- /dev/null +++ b/sbin/pfilctl/pfilctl.8 @@ -0,0 +1,117 @@ +.\" Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 28, 2019 +.Dt PFILCTL 8 +.Os +.Sh NAME +.Nm pfilctl +.Nd pfil(9) control utility +.Sh SYNOPSIS +.Nm +.Cm heads +.Nm +.Cm hooks +.Nm +.Cm link +.Aq Fl i | Fl o +.Op Fl a +.Ar hook Ar head +.Nm +.Cm unlink +.Aq Fl i | Fl o +.Ar hook Ar head +.Sh DESCRIPTION +The +.Nm +utility is intended to view and change configuration of the +.Xr pfil 9 +packet filtering hooks and filters on them. +.Sh COMMANDS +.Bl -tag -width "unlink" +.It Cm heads +List available packet filtering points. +.It Cm hooks +List available packet filters. +.It Xo +.Cm link +.Aq Fl i | Fl o +.Op Fl a +.Ar hook Ar head +.Xc +Link +.Ar hook +to +.Ar head . +With the +.Fl i +flag the hook will be connected as input and with +.Fl o +as output hook. +At least one of +.Fl i +or +.Fl o +is required. +By default +.Nm +will prepend the hook in front of other hooks if any present: +new hook will be as close to the wire as possible, so that on input +it will be the first filter and on output it will be the last. +Adding the +.Fl a +flag switches to appending new hook instead of prepending. +.It Xo +.Cm unlink +.Aq Fl i | Fl o +.Ar hook Ar head +.Xc +Unlink +.Ar hook +on +.Ar head . +At least one of +.Fl i +or +.Fl o +is required. +With the +.Fl i +flag the hook will be removed from the input list of hooks +and with +.Fl o +on output list. +.El +.Sh SEE ALSO +.Xr ipfilter 4 , +.Xr ipfw 4 , +.Xr pf 4 , +.Xr pfil 9 +.Sh AUTHORS +.An -nosplit +The +.Nm +utility was written by +.An Gleb Smirnoff Aq Mt glebius@FreeBSD.org . diff --git a/sbin/pfilctl/pfilctl.c b/sbin/pfilctl/pfilctl.c new file mode 100644 index 000000000000..e360c73cb279 --- /dev/null +++ b/sbin/pfilctl/pfilctl.c @@ -0,0 +1,229 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/ioctl.h> +#include <net/if.h> +#include <net/pfil.h> + +#include <err.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static int dev; + +static const char * const typenames[] = { + [PFIL_TYPE_IP4] = "IPv4", + [PFIL_TYPE_IP6] = "IPv6", + [PFIL_TYPE_ETHERNET] = "Ethernet", +}; + +static void listheads(int argc, char *argv[]); +static void listhooks(int argc, char *argv[]); +static void hook(int argc, char *argv[]); +static void help(void); + +static const struct cmd { + const char *cmd_name; + void (*cmd_func)(int argc, char *argv[]); +} cmds[] = { + { "heads", listheads }, + { "hooks", listhooks }, + { "link", hook }, + { "unlink", hook }, + { NULL, NULL }, +}; + +int +main(int argc __unused, char *argv[] __unused) +{ + int cmd = -1; + + if (--argc == 0) + help(); + argv++; + + for (int i = 0; cmds[i].cmd_name != NULL; i++) + if (!strncmp(argv[0], cmds[i].cmd_name, strlen(argv[0]))) { + if (cmd != -1) + errx(1, "ambiguous command: %s", argv[0]); + cmd = i; + } + if (cmd == -1) + errx(1, "unknown command: %s", argv[0]); + + dev = open("/dev/" PFILDEV, O_RDWR); + if (dev == -1) + err(1, "open(%s)", "/dev/" PFILDEV); + + (*cmds[cmd].cmd_func)(argc, argv); + + return (0); +} + +static void +help(void) +{ + + fprintf(stderr, "usage: %s (heads|hooks|link|unlink)\n", getprogname()); + exit(0); +} + +static void +listheads(int argc __unused, char *argv[] __unused) +{ + struct pfilioc_list plh; + u_int nheads, nhooks, i; + int j, h; + + plh.pio_nheads = 0; + plh.pio_nhooks = 0; + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); + +retry: + plh.pio_heads = calloc(plh.pio_nheads, sizeof(struct pfilioc_head)); + if (plh.pio_heads == NULL) + err(1, "malloc"); + plh.pio_hooks = calloc(plh.pio_nhooks, sizeof(struct pfilioc_hook)); + if (plh.pio_hooks == NULL) + err(1, "malloc"); + + nheads = plh.pio_nheads; + nhooks = plh.pio_nhooks; + + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); + + if (plh.pio_nheads > nheads || plh.pio_nhooks > nhooks) { + free(plh.pio_heads); + free(plh.pio_hooks); + goto retry; + } + +#define FMTHD "%16s %8s\n" +#define FMTHK "%29s %16s %16s\n" + printf(FMTHD, "Intercept point", "Type"); + for (i = 0, h = 0; i < plh.pio_nheads; i++) { + printf(FMTHD, plh.pio_heads[i].pio_name, + typenames[plh.pio_heads[i].pio_type]); + for (j = 0; j < plh.pio_heads[i].pio_nhooksin; j++, h++) + printf(FMTHK, "In", plh.pio_hooks[h].pio_module, + plh.pio_hooks[h].pio_ruleset); + for (j = 0; j < plh.pio_heads[i].pio_nhooksout; j++, h++) + printf(FMTHK, "Out", plh.pio_hooks[h].pio_module, + plh.pio_hooks[h].pio_ruleset); + } +} + +static void +listhooks(int argc __unused, char *argv[] __unused) +{ + struct pfilioc_list plh; + u_int nhooks, i; + + plh.pio_nhooks = 0; + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); +retry: + plh.pio_hooks = calloc(plh.pio_nhooks, sizeof(struct pfilioc_hook)); + if (plh.pio_hooks == NULL) + err(1, "malloc"); + + nhooks = plh.pio_nhooks; + + if (ioctl(dev, PFILIOC_LISTHOOKS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHOOKS)"); + + if (plh.pio_nhooks > nhooks) { + free(plh.pio_hooks); + goto retry; + } + + printf("Available hooks:\n"); + for (i = 0; i < plh.pio_nhooks; i++) { + printf("\t%s:%s %s\n", plh.pio_hooks[i].pio_module, + plh.pio_hooks[i].pio_ruleset, + typenames[plh.pio_hooks[i].pio_type]); + } +} + +static void +hook(int argc, char *argv[]) +{ + struct pfilioc_link req; + int c; + char *ruleset; + + if (argv[0][0] == 'u') + req.pio_flags = PFIL_UNLINK; + else + req.pio_flags = 0; + + while ((c = getopt(argc, argv, "ioa")) != -1) + switch (c) { + case 'i': + req.pio_flags |= PFIL_IN; + break; + case 'o': + req.pio_flags |= PFIL_OUT; + break; + case 'a': + req.pio_flags |= PFIL_APPEND; + break; + default: + help(); + } + + if (!PFIL_DIR(req.pio_flags)) + help(); + + argc -= optind; + argv += optind; + + if (argc != 2) + help(); + + /* link mod:ruleset head */ + if ((ruleset = strchr(argv[0], ':')) == NULL) + help(); + *ruleset = '\0'; + ruleset++; + + strlcpy(req.pio_name, argv[1], sizeof(req.pio_name)); + strlcpy(req.pio_module, argv[0], sizeof(req.pio_module)); + strlcpy(req.pio_ruleset, ruleset, sizeof(req.pio_ruleset)); + + if (ioctl(dev, PFILIOC_LINK, &req) != 0) + err(1, "ioctl(PFILIOC_LINK)"); +} diff --git a/share/man/man4/bnxt.4 b/share/man/man4/bnxt.4 index 12c2f9a0626a..915e35cc55d8 100644 --- a/share/man/man4/bnxt.4 +++ b/share/man/man4/bnxt.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 20, 2018 +.Dd January 30, 2019 .Dt BNXT 4 .Os .Sh NAME @@ -36,6 +36,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device bnxt" .Ed .Pp diff --git a/share/man/man4/em.4 b/share/man/man4/em.4 index 7f84be1fbbf4..c79e88cc9ed2 100644 --- a/share/man/man4/em.4 +++ b/share/man/man4/em.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 20, 2018 +.Dd January 30, 2019 .Dt EM 4 .Os .Sh NAME @@ -39,9 +39,10 @@ .Nd "Intel(R) PRO/1000 Gigabit Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device em" .Ed .Pp diff --git a/share/man/man4/iavf.4 b/share/man/man4/iavf.4 index 0531c89ed8b3..c870284f2836 100644 --- a/share/man/man4/iavf.4 +++ b/share/man/man4/iavf.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 5, 2018 +.Dd January 30, 2019 .Dt IAVF 4 .Os .Sh NAME @@ -41,6 +41,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device iavf" .Ed .Pp diff --git a/share/man/man4/imcsmb.4 b/share/man/man4/imcsmb.4 index 221f9362ed52..b1acb8bca00d 100644 --- a/share/man/man4/imcsmb.4 +++ b/share/man/man4/imcsmb.4 @@ -2,7 +2,6 @@ .\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD .\" .\" Copyright (c) 2018 Panasas -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions diff --git a/share/man/man4/ixgbe.4 b/share/man/man4/ixgbe.4 index 99c1cc7f8e27..f51ac8dbf262 100644 --- a/share/man/man4/ixgbe.4 +++ b/share/man/man4/ixgbe.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 19, 2010 +.Dd January 30, 2019 .Dt IXGBE 4 .Os .Sh NAME @@ -39,9 +39,10 @@ .Nd "Intel(R) 10Gb Ethernet driver for the FreeBSD operating system" .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device ixgbe" .Ed .Pp diff --git a/share/man/man4/ixl.4 b/share/man/man4/ixl.4 index d98ffd365c96..c8e674332f42 100644 --- a/share/man/man4/ixl.4 +++ b/share/man/man4/ixl.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 5, 2018 +.Dd January 30, 2019 .Dt IXL 4 .Os .Sh NAME @@ -41,6 +41,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device ixl" .Ed .Pp diff --git a/share/man/man4/jedec_dimm.4 b/share/man/man4/jedec_dimm.4 index c7c57b64c028..ea4183fafc1a 100644 --- a/share/man/man4/jedec_dimm.4 +++ b/share/man/man4/jedec_dimm.4 @@ -3,7 +3,6 @@ .\" .\" Copyright (c) 2016 Andriy Gapon <avg@FreeBSD.org> .\" Copyright (c) 2018 Ravi Pokala <rpokala@freebsd.org> -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions diff --git a/share/man/man4/vmx.4 b/share/man/man4/vmx.4 index 974c3f840a90..09795c3e7fbc 100644 --- a/share/man/man4/vmx.4 +++ b/share/man/man4/vmx.4 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 17, 2014 +.Dd January 30, 2019 .Dt VMX 4 .Os .Sh NAME @@ -25,9 +25,10 @@ .Nd VMware VMXNET3 Virtual Interface Controller device .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device vmx" .Ed .Pp diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 59374785d793..a4cb42b41e68 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd December 15, 2018 +.Dd January 31, 2019 .Dt SRC.CONF 5 .Os .Sh NAME @@ -335,6 +335,8 @@ When set, it enforces these options: .It .Va WITHOUT_CTF .It +.Va WITHOUT_LOADER_ZFS +.It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG @@ -1045,12 +1047,12 @@ amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386. Set to use GNU binutils ld as the system linker, instead of LLVM's LLD. .Pp This is a default setting on -arm/arm, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLD_IS_LD Set to use LLVM's LLD as the system linker, instead of GNU binutils ld. .Pp This is a default setting on -amd64/amd64, arm/armv7 and arm64/aarch64. +amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386. .It Va WITHOUT_LLVM_COV Set to not build the .Xr llvm-cov 1 @@ -1082,7 +1084,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_AARCH64 Set to build LLVM target support for AArch64. The @@ -1090,7 +1092,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_ALL Set to only build the required LLVM target support. This option is preferred to specific target support options. @@ -1156,7 +1158,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_MIPS Set to build LLVM target support for MIPS. The @@ -1164,7 +1166,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_POWERPC Set to not build LLVM target support for PowerPC. The @@ -1172,7 +1174,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_POWERPC Set to build LLVM target support for PowerPC. The @@ -1180,7 +1182,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_SPARC Set to not build LLVM target support for SPARC. The @@ -1188,7 +1190,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_SPARC Set to build LLVM target support for SPARC. The @@ -1196,7 +1198,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_X86 Set to not build LLVM target support for X86. The @@ -1204,7 +1206,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_X86 Set to build LLVM target support for X86. The @@ -1212,7 +1214,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITH_LOADER_FIREWIRE Enable firewire support in /boot/loader on x86. This option is a nop on all other platforms. @@ -1259,6 +1261,13 @@ Set to build ubldr. .Pp This is a default setting on arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +.It Va WITH_LOADER_VERBOSE +Set to build with extra verbose debugging in the loader. +May explode already nearly too large loader over the limit. +Use with care. + +.It Va WITHOUT_LOADER_ZFS +Set to not build ZFS file system boot loader support. .It Va WITHOUT_LOCALES Set to not build localization files; see .Xr locale 1 . @@ -1890,7 +1899,7 @@ without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS -Set to not build ZFS file system. +Set to not build ZFS file system kernel module, libraries, and user commands. .It Va WITHOUT_ZONEINFO Set to not build the timezone database. When set, it enforces these options: diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 3034c24c6adb..bd816bf519fa 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1635,13 +1635,9 @@ MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \ MLINKS+=pfil.9 pfil_add_hook.9 \ pfil.9 pfil_head_register.9 \ pfil.9 pfil_head_unregister.9 \ - pfil.9 pfil_hook_get.9 \ pfil.9 pfil_remove_hook.9 \ - pfil.9 pfil_rlock.9 \ pfil.9 pfil_run_hooks.9 \ - pfil.9 pfil_runlock.9 \ - pfil.9 pfil_wlock.9 \ - pfil.9 pfil_wunlock.9 + pfil.9 pfil_link.9 MLINKS+=pfind.9 zpfind.9 MLINKS+=PHOLD.9 PRELE.9 \ PHOLD.9 _PHOLD.9 \ diff --git a/share/man/man9/pfil.9 b/share/man/man9/pfil.9 index 843191e0b4ab..c2186cf1b540 100644 --- a/share/man/man9/pfil.9 +++ b/share/man/man9/pfil.9 @@ -1,5 +1,6 @@ .\" $NetBSD: pfil.9,v 1.22 2003/07/01 13:04:06 wiz Exp $ .\" +.\" Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org> .\" Copyright (c) 1996 Matthew R. Green .\" All rights reserved. .\" @@ -28,194 +29,127 @@ .\" .\" $FreeBSD$ .\" -.Dd March 10, 2018 +.Dd January 28, 2019 .Dt PFIL 9 .Os .Sh NAME .Nm pfil , .Nm pfil_head_register , .Nm pfil_head_unregister , -.Nm pfil_head_get , -.Nm pfil_add_hook , -.Nm pfil_add_hook_flags , -.Nm pfil_remove_hook , -.Nm pfil_remove_hook_flags , -.Nm pfil_run_hooks , -.Nm pfil_rlock , -.Nm pfil_runlock , -.Nm pfil_wlock , -.Nm pfil_wunlock +.Nm pfil_link , +.Nm pfil_run_hooks .Nd packet filter interface .Sh SYNOPSIS .In sys/param.h .In sys/mbuf.h -.In net/if.h .In net/pfil.h -.Bd -literal -typedef int (*pfil_func_t)(void *arg, struct mbuf **mp, struct ifnet *, int dir, struct inpcb); -.Bd -literal -typedef int (*pfil_func_flags_t)(void *arg, struct mbuf **mp, struct ifnet *, int dir, int flags, struct inpcb); -.Ft int -.Fn pfil_head_register "struct pfil_head *head" -.Ft int -.Fn pfil_head_unregister "struct pfil_head *head" -.Ft "struct pfil_head *" -.Fn pfil_head_get "int af" "u_long dlt" -.Ft int -.Fn pfil_add_hook "pfil_func_t" "void *arg" "struct pfil_head *" -.Ft int -.Fn pfil_add_hook_flags "pfil_func_flags_t" "void *arg" "int flags" "struct pfil_head *" -.Ft int -.Fn pfil_remove_hook "pfil_func_t" "void *arg" "struct pfil_head *" -.Ft int -.Fn pfil_remove_hook_flags "pfil_func_flags_t" "void *arg" "int flags" "struct pfil_head *" -.Ft int -.Fn pfil_run_hooks "struct pfil_head *head" "struct mbuf **mp" "struct ifnet *" "int dir" "int flags" "struct inpcb *" -.Ft void -.Fn pfil_rlock "struct pfil_head *" "struct rm_priotracker *" -.Ft void -.Fn pfil_runlock "struct pfil_head *" "struct rm_priotracker *" +.Ft pfil_head_t +.Fn pfil_head_register "struct pfil_head_args *args" .Ft void -.Fn pfil_wlock "struct pfil_head *" +.Fn pfil_head_unregister "struct pfil_head_t *head" +.Ft pfil_hook_t +.Fn pfil_add_hook "struct pfil_hook_args *" .Ft void -.Fn pfil_wunlock "struct pfil_head *" -.Ed +.Fn pfil_remove_hook "pfil_hook_t" +.Ft int +.Fn pfil_link "struct pfil_link_args *args" +.Ft int +.Fn pfil_run_hooks "phil_head_t *" "pfil_packet_t" "struct ifnet *" "int" "struct inpcb *" .Sh DESCRIPTION The .Nm -framework allows for a specified function to be invoked for every -incoming or outgoing packet for a particular network I/O stream. +framework allows for a specified function or a list of functions +to be invoked for every incoming or outgoing packet for a particular +network I/O stream. These hooks may be used to implement a firewall or perform packet transformations. .Pp -Packet filtering points are registered with +Packet filtering points, for historical reasons named +.Em heads , +are registered with .Fn pfil_head_register . -Filtering points are identified by a key -.Pq Vt "void *" -and a data link type -.Pq Vt int -in the -.Vt pfil_head -structure. -Packet filters use the key and data link type to look up the filtering -point with which they register themselves. -The key is unique to the filtering point. -The data link type is a -.Xr bpf 4 -DLT constant indicating what kind of header is present on the packet -at the filtering point. -Each filtering point uses common per-VNET rmlock by default. -This can be changed by specifying -.Vt PFIL_FLAG_PRIVATE_LOCK -as -.Vt "flags" -field in the -.Vt pfil_head -structure. -Note that specifying private lock can break filters sharing the same -ruleset and/or state between different data link types. -Filtering points may be unregistered with the -.Fn pfil_head_unregister -function. +The function is supplied with special versioned +.Vt struct pfil_head_args +structure that specifies type and features of the head as well as +human readable name. +If the filtering point to be ever destroyed, the subsystem that +created it must unregister it with call to +.Fn pfil_head_unregister . .Pp -Packet filters register/unregister themselves with a filtering point -with the +Packet filtering systems may register arbitrary number of filters, +for historical reasons named +.Em hooks . +To register a new hook .Fn pfil_add_hook -and +with special versioned +.Vt struct pfil_hook_args +structure is called. +The structure specifies type and features of the hook, pointer to +the actual filtering function and user readable name of the filtering +module and ruleset name. +Later hooks can be removed with .Fn pfil_remove_hook -functions, respectively. -.I -The head is looked up using the -.Fn pfil_head_get -function, which takes the key and data link type that the packet filter -expects. -Filters may provide an argument to be passed to the filter when -invoked on a packet. -.Pp -When a filter is invoked, the packet appears just as if it -.Dq came off the wire . -That is, all protocol fields are in network byte order. -The filter is called with its specified argument, the pointer to the -pointer to the -.Vt mbuf -containing the packet, the pointer to the network -interface that the packet is traversing, and the direction -.Dv ( PFIL_IN -or -.Dv PFIL_OUT ) -that the packet is traveling. -The -.Vt flags -argument will indicate if an outgoing packet is simply being forwarded with the -value PFIL_FWD. -The filter may change which mbuf the -.Vt "mbuf\ **" -argument references. -The filter returns an error (errno) if the packet processing is to stop, or 0 -if the processing is to continue. -If the packet processing is to stop, it is the responsibility of the -filter to free the packet. +functions. .Pp -Every filter hook is called with -.Nm -read lock held. -All heads uses the same lock within the same VNET instance. -Packet filter can use this lock instead of own locking model to -improve performance. -Since +To connect existing +.Em hook +to an existing +.Em head +function +.Fn pfil_link +shall be used. +The function is supplied with versioned +.Vt struct pfil_link_args +structure that specifies either literal names of hook and head or +pointers to them. +Typically +.Fn pfil_link +is called by filtering modules to autoregister their default ruleset +and default filtering points. +It also serves on the kernel side of +.Xr ioctl 2 +when user changes .Nm -uses -.Xr rmlock 9 -.Fn pfil_rlock -and -.Fn pfil_runlock -require -.Va struct rm_priotracker -to be passed as argument. -Filter can acquire and release writer lock via -.Fn pfil_wlock -and -.Fn pfil_wunlock -functions. -See -.Xr rmlock 9 -for more details. -.Sh FILTERING POINTS -Currently, filtering points are implemented for the following link types: +configuration with help of +.Xr pfilctl 8 +utility. .Pp -.Bl -tag -width "AF_INET6" -offset XXX -compact -.It AF_INET +For every packet traveling through a +.Em head +the latter shall invoke +.Fn pfil_run_hooks . +The function can accept either +.Vt struct mbuf * +pointer or a +.Vt void * +pointer and length. +In case if a hooked filtering module cannot understand +.Vt void * +pointer +.Nm +will provide it with a fake one. +All calls to +.Fn pfil_run_hooks +are performed in network +.Xr epoch 9 . +.Sh HEADS (filtering points) +By default kernel creates the following heads: +.Bl -tag -width "ethernet" +.It inet IPv4 packets. -.It AF_INET6 +.It inet6 IPv6 packets. -.It AF_LINK +.It ethernet Link-layer packets. .El -.Sh RETURN VALUES -If successful, -.Fn pfil_head_get -returns the -.Vt pfil_head -structure for the given key/dlt. -The -.Fn pfil_add_hook -and -.Fn pfil_remove_hook -functions -return 0 if successful. -If called with flag -.Dv PFIL_WAITOK , -.Fn pfil_remove_hook -is expected to always succeed. .Pp -The -.Fn pfil_head_unregister -function -might sleep! +Default rulesets are automatically linked to these heads to preserve +historical behavavior. .Sh SEE ALSO -.Xr bpf 4 , -.Xr if_bridge 4 , -.Xr rmlock 9 +.Xr ipfilter 4 , +.Xr ipfw 4 , +.Xr pf 4 , +.Xr pfilctl 8 .Sh HISTORY The .Nm @@ -223,45 +157,8 @@ interface first appeared in .Nx 1.3 . The .Nm -input and output lists were originally implemented as -.In sys/queue.h -.Dv LIST -structures; -however this was changed in -.Nx 1.4 -to -.Dv TAILQ -structures. -This change was to allow the input and output filters to be processed in -reverse order, to allow the same path to be taken, in or out of the kernel. -.Pp -The -.Nm -interface was changed in 1.4T to accept a 3rd parameter to both -.Fn pfil_add_hook -and -.Fn pfil_remove_hook , -introducing the capability of per-protocol filtering. -This was done primarily in order to support filtering of IPv6. -.Pp -In 1.5K, the -.Nm -framework was changed to work with an arbitrary number of filtering points, -as well as be less IP-centric. -.Pp -Fine-grained locking was added in +interface was imported into .Fx 5.2 . -.Nm -lock export was added in -.Fx 10.0 . -.Sh BUGS -When a -.Vt pfil_head -is being modified, no traffic is diverted -(to avoid deadlock). -This means that traffic may be dropped unconditionally for a short period -of time. -.Fn pfil_run_hooks -will return -.Er ENOBUFS -to indicate this. +In +.Fx 13.0 +the interface was significantly rewritten. diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index 7aa53f35e5f4..be1d4efb8dde 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -321,11 +321,9 @@ __DEFAULT_YES_OPTIONS+=LLVM_LIBUNWIND .else __DEFAULT_NO_OPTIONS+=LLVM_LIBUNWIND .endif -.if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "armv7" +.if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "armv7" || \ + ${__T} == "i386" __DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD -.elif ${__T} == "i386" -__DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP -__DEFAULT_NO_OPTIONS+=LLD_IS_LD .else __DEFAULT_NO_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD .endif diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 2a6d980d4ffe..9a8532ba6f03 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -234,14 +234,18 @@ device ppi # Parallel port interface device device puc # Multi I/O cards and multi-channel UARTs -# PCI Ethernet NICs. -device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE -device de # DEC/Intel DC21x4x (``Tulip'') +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet device ixv # Intel PRO/10GbE PCIE VF Ethernet device ixl # Intel 700 Series Physical Function device iavf # Intel Adaptive Virtual Function +device vmx # VMware VMXNET3 Ethernet + +# PCI Ethernet NICs. +device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE +device de # DEC/Intel DC21x4x (``Tulip'') device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') @@ -369,9 +373,6 @@ device hyperv # HyperV drivers options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver -# VMware support -device vmx # VMware VMXNET3 Ethernet - # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index 5178455c364c..641ad1cac97e 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -145,14 +145,17 @@ device al_pci # Annapurna Alpine PCI-E options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel PRO/1000 Gigabit Ethernet Family +device ix # Intel 10Gb Ethernet Family + # Ethernet NICs device mdio device mii device miibus # MII bus support device awg # Allwinner EMAC Gigabit Ethernet device axgbe # AMD Opteron A1100 integrated NIC -device em # Intel PRO/1000 Gigabit Ethernet Family -device ix # Intel 10Gb Ethernet Family device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device neta # Marvell Armada 370/38x/XP/3700 NIC device smc # SMSC LAN91C111 diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index a3160d613979..c8be3a01f851 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3314,14 +3314,12 @@ more: /* * BIO_FLUSH doesn't currently communicate * range data, so we synchronize the cache - * over the whole disk. We also force - * ordered tag semantics the flush applies - * to all previously queued I/O. + * over the whole disk. */ scsi_synchronize_cache(&start_ccb->csio, /*retries*/1, /*cbfcnp*/dadone, - MSG_ORDERED_Q_TAG, + /*tag_action*/tag_code, /*begin_lba*/0, /*lb_count*/0, SSD_FULL_SIZE, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 7794bd505525..639f48906aca 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -1097,7 +1097,6 @@ sendreq: break; case ZIO_TYPE_IOCTL: bp->bio_cmd = BIO_FLUSH; - bp->bio_flags |= BIO_ORDERED; bp->bio_data = NULL; bp->bio_offset = cp->provider->mediasize; bp->bio_length = 0; diff --git a/sys/compat/ndis/ndis_var.h b/sys/compat/ndis/ndis_var.h index 0782853cf661..3ca050c0ada4 100644 --- a/sys/compat/ndis/ndis_var.h +++ b/sys/compat/ndis/ndis_var.h @@ -1736,8 +1736,6 @@ extern int ndis_get_supported_oids(void *, ndis_oid **, int *); extern int ndis_send_packets(void *, ndis_packet **, int); extern int ndis_send_packet(void *, ndis_packet *); extern int ndis_convert_res(void *); -extern int ndis_alloc_amem(void *); -extern void ndis_free_amem(void *); extern void ndis_free_packet(ndis_packet *); extern void ndis_free_bufs(ndis_buffer *); extern int ndis_reset_nic(void *); diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 9a43bd26164d..97ec484ae0d9 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2092,15 +2092,18 @@ device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel Pro/1000 Gigabit Ethernet +device ix # Intel Pro/10Gbe PCIE Ethernet +device ixv # Intel Pro/10Gbe PCIE Ethernet VF + # PCI Ethernet NICs. device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet device cxgbev # Chelsio T4-T6 Virtual Functions device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel Pro/1000 Gigabit Ethernet -device ix # Intel Pro/10Gbe PCIE Ethernet -device ixv # Intel Pro/10Gbe PCIE Ethernet VF device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device oce # Emulex 10 GbE (OneConnect Ethernet) diff --git a/sys/conf/files b/sys/conf/files index 92a3068664a7..c7c15cd3738c 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4132,10 +4132,10 @@ net/if_tun.c optional tun net/if_tap.c optional tap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 -net/ifdi_if.m optional ether pci -net/iflib.c optional ether pci -net/iflib_clone.c optional ether pci -net/mp_ring.c optional ether +net/ifdi_if.m optional ether pci iflib +net/iflib.c optional ether pci iflib +net/iflib_clone.c optional ether pci iflib +net/mp_ring.c optional ether iflib net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 60c6f2304bdd..47287515b5b2 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -201,6 +201,7 @@ powerpc/powernv/opalcall.S optional powernv powerpc/powernv/platform_powernv.c optional powernv powerpc/powernv/powernv_centaur.c optional powernv powerpc/powernv/powernv_xscom.c optional powernv +powerpc/powernv/xive.c optional powernv powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bus_machdep.c standard diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 2370d9479322..0036c3a55917 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -20,26 +20,18 @@ #include <sys/types.h> #include <sys/param.h> #include <sys/time.h> -#if defined(_KERNEL) && defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 220000) -# if (__FreeBSD_version >= 400000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # if !defined(IPFILTER_LKM) # include "opt_inet6.h" # endif -# if (__FreeBSD_version == 400019) -# define CSUM_DELAY_DATA -# endif -# endif # include <sys/filio.h> #else # include <sys/ioctl.h> #endif -#if (defined(__SVR4) || defined(__svr4__)) && defined(sun) +#if defined(__SVR4) || defined(sun) /* SOLARIS */ # include <sys/filio.h> #endif -#if !defined(_AIX51) # include <sys/fcntl.h> -#endif #if defined(_KERNEL) # include <sys/systm.h> # include <sys/file.h> @@ -50,29 +42,18 @@ # include <stddef.h> # include <sys/file.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif -#if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \ - !defined(linux) +#if !defined(__SVR4) # include <sys/mbuf.h> #else -# if !defined(linux) # include <sys/byteorder.h> -# endif # if (SOLARIS2 < 5) && defined(sun) # include <sys/dditypes.h> # endif #endif -#ifdef __hpux -# define _NET_ROUTE_INCLUDED -#endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #include <net/if.h> #ifdef sun @@ -81,25 +62,13 @@ struct file; #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */ -# include <sys/hashing.h> -# include <netinet/in_var.h> -#endif #include <netinet/tcp.h> -#if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL) # include <netinet/udp.h> # include <netinet/ip_icmp.h> -#endif -#ifdef __hpux -# undef _NET_ROUTE_INCLUDED -#endif -#ifdef __osf__ -# undef _RADIX_H_ -#endif #include "netinet/ip_compat.h" #ifdef USE_INET6 # include <netinet/icmp6.h> -# if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) +# if !SOLARIS && defined(_KERNEL) # include <netinet6/in6_var.h> # endif #endif @@ -122,7 +91,7 @@ struct file; #if defined(IPFILTER_BPF) && defined(_KERNEL) # include <net/bpf.h> #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif #include "netinet/ipl.h" @@ -131,10 +100,6 @@ struct file; # include <sys/callout.h> extern struct callout ipf_slowtimer_ch; #endif -#if defined(__OpenBSD__) -# include <sys/timeout.h> -extern struct timeout ipf_slowtimer_ch; -#endif /* END OF INCLUDES */ #if !defined(lint) @@ -214,10 +179,7 @@ static int ipf_updateipid __P((fr_info_t *)); static int ipf_settimeout __P((struct ipf_main_softc_s *, struct ipftuneable *, ipftuneval_t *)); -#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \ - !defined(__FreeBSD__)) || \ - FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \ - OPENBSD_LT_REV(200006) +#if !defined(_KERNEL) || SOLARIS static int ppsratecheck(struct timeval *, int *, int); #endif @@ -2815,13 +2777,14 @@ ipf_firewall(fin, passp) /* -2 == requires authentication */ /* Kernel: */ /* > 0 == filter error # for packet */ -/* Parameters: ip(I) - pointer to start of IPv4/6 packet */ +/* Parameters: ctx(I) - pointer to the instance context */ +/* ip(I) - pointer to start of IPv4/6 packet */ /* hlen(I) - length of header */ /* ifp(I) - pointer to interface this packet is on */ /* out(I) - 0 == packet going in, 1 == packet going out */ /* mp(IO) - pointer to caller's buffer pointer that holds this */ /* IP packet. */ -/* Solaris & HP-UX ONLY : */ +/* Solaris: */ /* qpi(I) - pointer to STREAMS queue information for this */ /* interface & direction. */ /* */ @@ -3266,12 +3229,6 @@ finished: } } else { LBUMP(ipf_stats[out].fr_pass); -#if defined(_KERNEL) && defined(__sgi) - if ((fin->fin_hbuf != NULL) && - (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) { - COPYBACK(fin->fin_m, 0, fin->fin_plen, fin->fin_hbuf); - } -#endif } SPL_X(s); @@ -5482,10 +5439,7 @@ ipf_resolvefunc(softc, data) } -#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \ - !defined(__FreeBSD__)) || \ - FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \ - OPENBSD_LT_REV(200006) +#if !defined(_KERNEL) || SOLARIS /* * From: NetBSD * ppsratecheck(): packets (or events) per second limitation. @@ -10110,9 +10064,6 @@ ipf_slowtimer(softc) ipf_rule_expire(softc); ipf_sync_expire(softc); softc->ipf_ticks++; -# if defined(__OpenBSD__) - timeout_add(&ipf_slowtimer_ch, hz/2); -# endif } diff --git a/sys/contrib/ipfilter/netinet/ip_auth.c b/sys/contrib/ipfilter/netinet/ip_auth.c index 8624c3ba064c..f08d0b2fd1d9 100644 --- a/sys/contrib/ipfilter/netinet/ip_auth.c +++ b/sys/contrib/ipfilter/netinet/ip_auth.c @@ -24,29 +24,24 @@ # endif # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) && !defined(linux) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -55,11 +50,10 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if (defined(_BSDI_VERSION) && (_BSDI_VERSION >= 199802)) || \ - (defined(__FreeBSD_version) &&(__FreeBSD_version >= 400000)) +#if defined(__FreeBSD_version) # include <sys/queue.h> #endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi) +#if defined(__NetBSD__) # include <machine/cpu.h> #endif #if defined(_KERNEL) && defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) @@ -76,10 +70,8 @@ struct file; #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#if !defined(linux) # include <netinet/ip_var.h> -#endif -#if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi) +#if !defined(_KERNEL) # define KERNEL # define _KERNEL # define NOT_KERNEL @@ -89,34 +81,26 @@ struct file; # undef KERNEL #endif #include <netinet/tcp.h> -#if defined(IRIX) && (IRIX < 60516) /* IRIX < 6 */ -extern struct ifqueue ipintrq; /* ip packet input queue */ -#else -# if !defined(__hpux) && !defined(linux) -# if __FreeBSD_version >= 300000 +# if defined(__FreeBSD_version) # include <net/if_var.h> -# if __FreeBSD_version >= 500042 # define IF_QFULL _IF_QFULL # define IF_DROP _IF_DROP -# endif /* __FreeBSD_version >= 500042 */ # endif # include <netinet/in_var.h> # include <netinet/tcp_fsm.h> -# endif -#endif #include <netinet/udp.h> #include <netinet/ip_icmp.h> #include "netinet/ip_compat.h" #include <netinet/tcpip.h> #include "netinet/ip_fil.h" #include "netinet/ip_auth.h" -#if !defined(MENTAT) && !defined(linux) +#if !defined(MENTAT) # include <net/netisr.h> # ifdef __FreeBSD__ # include <machine/cpufunc.h> # endif #endif -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> # if defined(_KERNEL) && !defined(IPFILTER_LKM) # include <sys/libkern.h> @@ -232,9 +216,6 @@ ipf_auth_soft_init(softc, arg) bzero((char *)softa->ipf_auth_pkts, softa->ipf_auth_size * sizeof(*softa->ipf_auth_pkts)); -#if defined(linux) && defined(_KERNEL) - init_waitqueue_head(&softa->ipf_auth_next_linux); -#endif return 0; } @@ -1106,22 +1087,7 @@ ipf_auth_ioctlloop: error = EINTR; } # else /* SOLARIS */ -# ifdef __hpux - { - lock_t *l; - - l = get_sleep_lock(&softa->ipf_auth_next); - error = sleep(&softa->ipf_auth_next, PZERO+1); - spinunlock(l); - } -# else -# ifdef __osf__ - error = mpsleep(&softa->ipf_auth_next, PSUSP|PCATCH, "ipf_auth_next", - 0, &softa->ipf_auth_mx, MS_LOCK_SIMPLE); -# else error = SLEEP(&softa->ipf_auth_next, "ipf_auth_next"); -# endif /* __osf__ */ -# endif /* __hpux */ # endif /* SOLARIS */ #endif MUTEX_EXIT(&softa->ipf_auth_mx); diff --git a/sys/contrib/ipfilter/netinet/ip_compat.h b/sys/contrib/ipfilter/netinet/ip_compat.h index 061e4113cc73..777495b39aed 100644 --- a/sys/contrib/ipfilter/netinet/ip_compat.h +++ b/sys/contrib/ipfilter/netinet/ip_compat.h @@ -33,7 +33,7 @@ #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -41,7 +41,7 @@ #endif -#if defined(__SVR4) || defined(__svr4__) || defined(__sgi) +#if defined(__SVR4) # define index strchr # if !defined(_KERNEL) # define bzero(a,b) memset(a,0,b) @@ -62,11 +62,6 @@ # endif #endif -#if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux) -struct ether_addr { - u_char ether_addr_octet[6]; -}; -#endif # ifdef __STDC__ # define IPL_EXTERN(ep) ipl##ep @@ -100,15 +95,6 @@ struct ether_addr { (__FreeBSD_version > (x))) #define FREEBSD_LT_REV(x) (defined(__FreeBSD_version) && \ (__FreeBSD_version < (x))) -#define BSDOS_GE_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION >= (x))) -#define BSDOS_GT_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION > (x))) -#define BSDOS_LT_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION < (x))) -#define OPENBSD_GE_REV(x) (defined(OpenBSD) && (OpenBSD >= (x))) -#define OPENBSD_GT_REV(x) (defined(OpenBSD) && (OpenBSD > (x))) -#define OPENBSD_LT_REV(x) (defined(OpenBSD) && (OpenBSD < (x))) #define BSD_GE_YEAR(x) (defined(BSD) && (BSD >= (x))) #define BSD_GT_YEAR(x) (defined(BSD) && (BSD > (x))) #define BSD_LT_YEAR(x) (defined(BSD) && (BSD < (x))) @@ -321,8 +307,7 @@ typedef union { #define ipf_isw ipf_lkun_s.ipf_sw #define ipf_magic ipf_lkun_s.ipf_magic -#if !defined(__GNUC__) || \ - (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000)) +#if !defined(__GNUC__) || defined(__FreeBSD_version) # ifndef INLINE # define INLINE # endif @@ -473,11 +458,10 @@ extern mb_t *allocmbt(size_t); #ifdef USE_INET6 -# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ - defined(__osf__) || defined(linux) +# if defined(__NetBSD__) || defined(__FreeBSD__) # include <netinet/ip6.h> # include <netinet/icmp6.h> -# if defined(_KERNEL) && !defined(__osf__) +# if defined(_KERNEL) # include <netinet6/ip6_var.h> # endif typedef struct ip6_hdr ip6_t; @@ -497,21 +481,16 @@ typedef struct ip6_hdr ip6_t; # define COPYBACK m_copyback # endif # if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \ - defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \ - defined(_BSDI_VERSION) + defined(__FreeBSD__) # include <vm/vm.h> # endif -# if !defined(__FreeBSD__) || FREEBSD_GE_REV(300000) -# if NETBSD_GE_REV(105180000) || OPENBSD_GE_REV(200111) +# if NETBSD_GE_REV(105180000) # include <uvm/uvm_extern.h> # else # include <vm/vm_extern.h> extern vm_map_t kmem_map; # endif # include <sys/proc.h> -# else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ -# include <vm/vm_kern.h> -# endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ # ifdef IPFILTER_M_IPFILTER # include <sys/malloc.h> @@ -611,7 +590,7 @@ MALLOC_DECLARE(M_IPFILTER); # define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # endif -# ifndef KMALLOC +# if SOLARIS && !defined(KMALLOC) # define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ KMEM_NOSLEEP) # define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) diff --git a/sys/contrib/ipfilter/netinet/ip_dstlist.c b/sys/contrib/ipfilter/netinet/ip_dstlist.c index d97dad2c8f35..4f2e3bb05a18 100644 --- a/sys/contrib/ipfilter/netinet/ip_dstlist.c +++ b/sys/contrib/ipfilter/netinet/ip_dstlist.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include <sys/errno.h> #include <sys/types.h> #include <sys/param.h> @@ -21,9 +18,6 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #else @@ -33,14 +27,12 @@ struct file; # endif #endif #include <sys/time.h> -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> -#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__)) +#if defined(_KERNEL) && !defined(__SVR4) # include <sys/mbuf.h> #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -49,7 +41,7 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif diff --git a/sys/contrib/ipfilter/netinet/ip_fil.h b/sys/contrib/ipfilter/netinet/ip_fil.h index 55415a9591b7..daaaa6a0776f 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil.h +++ b/sys/contrib/ipfilter/netinet/ip_fil.h @@ -11,9 +11,7 @@ #ifndef __IP_FIL_H__ #define __IP_FIL_H__ -#if !defined(linux) || !defined(_KERNEL) # include <netinet/in.h> -#endif #include "netinet/ip_compat.h" #include "netinet/ipf_rb.h" @@ -21,15 +19,11 @@ # include <sys/callout.h> #endif #if defined(BSD) && defined(_KERNEL) -# if NETBSD_LT_REV(399000000) || defined(__osf__) || FREEBSD_LT_REV(500043) -# include <sys/select.h> -# else # include <sys/selinfo.h> -# endif #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -44,7 +38,7 @@ # endif #endif -#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +#if defined(__STDC__) || defined(__GNUC__) # define SIOCADAFR _IOW('r', 60, struct ipfobj) # define SIOCRMAFR _IOW('r', 61, struct ipfobj) # define SIOCSETFF _IOW('r', 62, u_int) @@ -459,9 +453,6 @@ typedef struct fr_info { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif void *fin_fraghdr; /* pointer to start of ipv6 frag hdr */ } fr_info_t; @@ -1425,10 +1416,6 @@ typedef struct ipftune { /* ** HPUX Port */ -#ifdef __hpux -/* HP-UX locking sequence deadlock detection module lock MAJOR ID */ -# define IPF_SMAJ 0 /* temp assignment XXX, not critical */ -#endif #if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \ (__FreeBSD_version >= 220000) @@ -1624,22 +1611,14 @@ typedef struct ipf_main_softc_s { frentry_t *ipf_rule_explist[2]; ipftoken_t *ipf_token_head; ipftoken_t **ipf_token_tail; -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) && \ - defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) struct callout ipf_slow_ch; #endif -#if defined(linux) && defined(_KERNEL) - struct timer_list ipf_timer; -#endif #if NETBSD_GE_REV(104040000) struct callout ipf_slow_ch; #endif #if SOLARIS -# if SOLARIS2 >= 7 timeout_id_t ipf_slow_ch; -# else - int ipf_slow_ch; -# endif #endif #if defined(_KERNEL) # if SOLARIS @@ -1662,12 +1641,7 @@ typedef struct ipf_main_softc_s { hook_t *ipf_hk_loop_v6_out; # endif # else -# if defined(linux) && defined(_KERNEL) - struct poll_table_struct ipf_selwait[IPL_LOGSIZE]; - wait_queue_head_t iplh_linux[IPL_LOGSIZE]; -# else struct selinfo ipf_selwait[IPL_LOGSIZE]; -# endif # endif #endif void *ipf_slow; @@ -1697,67 +1671,27 @@ extern void ipfilterattach __P((int)); extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); # ifdef MENTAT +/* XXX MENTAT is always defined for Solaris */ extern int ipf_check __P((void *, struct ip *, int, void *, int, void *, mblk_t **)); # if SOLARIS extern void ipf_prependmbt(fr_info_t *, mblk_t *); -# if SOLARIS2 >= 7 extern int ipfioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); -# else -extern int ipfioctl __P((dev_t, int, int *, int, cred_t *, int *)); -# endif -# endif -# ifdef __hpux -extern int ipfioctl __P((dev_t, int, caddr_t, int)); -extern int ipf_select __P((dev_t, int)); # endif extern int ipf_qout __P((queue_t *, mblk_t *)); # else /* MENTAT */ +/* XXX MENTAT is never defined for FreeBSD & NetBSD */ extern int ipf_check __P((void *, struct ip *, int, void *, int, mb_t **)); extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); extern size_t mbufchainlen __P((mb_t *)); -# ifdef __sgi -# include <sys/cred.h> -extern int ipfioctl __P((dev_t, int, caddr_t, int, cred_t *, int *)); -extern int ipfilter_sgi_attach __P((void)); -extern void ipfilter_sgi_detach __P((void)); -extern void ipfilter_sgi_intfsync __P((void)); -# else # ifdef IPFILTER_LKM extern int ipf_identify __P((char *)); # endif -# if BSDOS_GE_REV(199510) || FREEBSD_GE_REV(220000) || \ - (defined(NetBSD) && (NetBSD >= 199511)) || defined(__OpenBSD__) -# if defined(__NetBSD__) || BSDOS_GE_REV(199701) || \ - defined(__OpenBSD__) || FREEBSD_GE_REV(300000) -# if (__FreeBSD_version >= 500024) -# if (__FreeBSD_version >= 502116) +# if defined(__FreeBSD_version) extern int ipfioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *)); -# else -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); -# endif /* __FreeBSD_version >= 502116 */ -# else -# if NETBSD_GE_REV(499001000) +# elif defined(__NetBSD__) extern int ipfioctl __P((dev_t, u_long, void *, int, struct lwp *)); -# else -# if NETBSD_GE_REV(399001400) -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct lwp *)); -# else -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); -# endif -# endif -# endif /* __FreeBSD_version >= 500024 */ -# else -extern int ipfioctl __P((dev_t, int, caddr_t, int, struct proc *)); -# endif -# else -# ifdef linux -extern int ipfioctl __P((struct inode *, struct file *, u_int, u_long)); -# else -extern int ipfioctl __P((dev_t, int, caddr_t, int)); -# endif -# endif /* (_BSDI_VERSION >= 199510) */ -# endif /* __ sgi */ +# endif # endif /* MENTAT */ # if defined(__FreeBSD_version) diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 309027b500cc..dce75517dd63 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -25,6 +25,7 @@ static const char rcsid[] = "@(#)$Id$"; # include "opt_random_ip_id.h" #endif #include <sys/param.h> +#include <sys/conf.h> #include <sys/errno.h> #include <sys/types.h> #include <sys/file.h> @@ -126,32 +127,33 @@ static void ipf_ifevent(arg, ifp) -static int -ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) +static pfil_return_t +ipf_check_wrapper(struct mbuf **mp, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { struct ip *ip = mtod(*mp, struct ip *); - int rv; + pfil_return_t rv; CURVNET_SET(ifp->if_vnet); - rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), - mp); + rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, + !!(flags & PFIL_OUT), mp); CURVNET_RESTORE(); - return rv; + return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); } -# ifdef USE_INET6 -# include <netinet/ip6.h> - -static int -ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) +#ifdef USE_INET6 +static pfil_return_t +ipf_check_wrapper6(struct mbuf **mp, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { - int error; + pfil_return_t rv; CURVNET_SET(ifp->if_vnet); - error = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), - sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp); + rv = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), + sizeof(struct ip6_hdr), ifp, !!(flags & PFIL_OUT), mp); CURVNET_RESTORE(); - return (error); + + return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); } # endif #if defined(IPFILTER_LKM) @@ -1318,53 +1320,62 @@ ipf_inject(fin, m) return error; } +VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet_hook); +VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet6_hook); +#define V_ipf_inet_hook VNET(ipf_inet_hook) +#define V_ipf_inet6_hook VNET(ipf_inet6_hook) + int ipf_pfil_unhook(void) { - struct pfil_head *ph_inet; + + pfil_remove_hook(V_ipf_inet_hook); + #ifdef USE_INET6 - struct pfil_head *ph_inet6; + pfil_remove_hook(V_ipf_inet6_hook); #endif - ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (ph_inet != NULL) - pfil_remove_hook((void *)ipf_check_wrapper, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); -# ifdef USE_INET6 - ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (ph_inet6 != NULL) - pfil_remove_hook((void *)ipf_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); -# endif - return (0); } int ipf_pfil_hook(void) { - struct pfil_head *ph_inet; + struct pfil_hook_args pha; + struct pfil_link_args pla; + int error, error6; + + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "ipfilter"; + pha.pa_rulname = "default"; + pha.pa_func = ipf_check_wrapper; + pha.pa_ruleset = NULL; + pha.pa_type = PFIL_TYPE_IP4; + V_ipf_inet_hook = pfil_add_hook(&pha); + #ifdef USE_INET6 - struct pfil_head *ph_inet6; + pha.pa_func = ipf_check_wrapper6; + pha.pa_type = PFIL_TYPE_IP6; + V_ipf_inet6_hook = pfil_add_hook(&pha); #endif - ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -# ifdef USE_INET6 - ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -# endif - if (ph_inet == NULL -# ifdef USE_INET6 - && ph_inet6 == NULL -# endif - ) { - return ENODEV; - } + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_ipf_inet_hook; + error = pfil_link(&pla); - if (ph_inet != NULL) - pfil_add_hook((void *)ipf_check_wrapper, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); -# ifdef USE_INET6 - if (ph_inet6 != NULL) - pfil_add_hook((void *)ipf_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); -# endif - return (0); + error6 = 0; +#ifdef USE_INET6 + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_ipf_inet6_hook; + error6 = pfil_link(&pla); +#endif + + if (error || error6) + error = ENODEV; + else + error = 0; + + return (error); } void diff --git a/sys/contrib/ipfilter/netinet/ip_frag.c b/sys/contrib/ipfilter/netinet/ip_frag.c index 14b75e2d6a90..80cd5eccaa79 100644 --- a/sys/contrib/ipfilter/netinet/ip_frag.c +++ b/sys/contrib/ipfilter/netinet/ip_frag.c @@ -16,30 +16,21 @@ #include <sys/param.h> #include <sys/time.h> #include <sys/file.h> -#ifdef __hpux -# include <sys/timeout.h> -#endif #if !defined(_KERNEL) # include <stdio.h> # include <string.h> # include <stdlib.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif -#if defined(_KERNEL) && \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) # include <sys/systm.h> @@ -48,7 +39,7 @@ struct file; # endif #endif #if !defined(__SVR4) && !defined(__svr4__) -# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX) +# if defined(_KERNEL) # include <sys/kernel.h> # endif #else @@ -66,9 +57,7 @@ struct file; #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#if !defined(linux) # include <netinet/ip_var.h> -#endif #include <netinet/tcp.h> #include <netinet/udp.h> #include <netinet/ip_icmp.h> diff --git a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c index 0fc008d6d897..c2cd6fc31e40 100644 --- a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c @@ -1308,11 +1308,7 @@ ipf_p_ftp_process(softf, fin, nat, ftp, rv) t = &ftp->ftp_side[1 - rv]; thseq = ntohl(tcp->th_seq); thack = ntohl(tcp->th_ack); -#ifdef __sgi - mlen = fin->fin_plen - off; -#else mlen = MSGDSIZE(m) - off; -#endif DT3(process_debug, tcphdr_t *, tcp, int, off, int, mlen); if (softf->ipf_p_ftp_debug & DEBUG_INFO) @@ -1609,11 +1605,7 @@ whilemore: if (tcp->th_flags & TH_FIN) f->ftps_seq[1]++; if (softf->ipf_p_ftp_debug & DEBUG_PARSE_INFO) { -#ifdef __sgi - mlen = fin->fin_plen; -#else mlen = MSGDSIZE(m); -#endif mlen -= off; printf("ftps_seq[1] = %x inc %d len %d\n", f->ftps_seq[1], inc, mlen); diff --git a/sys/contrib/ipfilter/netinet/ip_htable.c b/sys/contrib/ipfilter/netinet/ip_htable.c index 62707f40edd2..0786355cd87a 100644 --- a/sys/contrib/ipfilter/netinet/ip_htable.c +++ b/sys/contrib/ipfilter/netinet/ip_htable.c @@ -20,22 +20,18 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif #include <sys/socket.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif #if defined(__FreeBSD__) # include <sys/cdefs.h> # include <sys/proc.h> #endif -#if !defined(__svr4__) && !defined(__SVR4) && !defined(__hpux) && \ - !defined(linux) +#if !defined(__SVR4) # include <sys/mbuf.h> #endif #if defined(_KERNEL) diff --git a/sys/contrib/ipfilter/netinet/ip_irc_pxy.c b/sys/contrib/ipfilter/netinet/ip_irc_pxy.c index b9954b4c067a..1b788720f3f7 100644 --- a/sys/contrib/ipfilter/netinet/ip_irc_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_irc_pxy.c @@ -278,11 +278,7 @@ ipf_p_irc_send(fin, nat) bzero(ctcpbuf, sizeof(ctcpbuf)); off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; COPYDATA(m, off, MIN(sizeof(ctcpbuf), dlen), ctcpbuf); @@ -361,7 +357,7 @@ ipf_p_irc_send(fin, nat) fin->fin_flx |= FI_DOCKSUM; if (inc != 0) { -#if defined(MENTAT) || defined(__sgi) +#if defined(MENTAT) register u_32_t sum1, sum2; sum1 = fin->fin_plen; diff --git a/sys/contrib/ipfilter/netinet/ip_log.c b/sys/contrib/ipfilter/netinet/ip_log.c index 2d600840a971..76d2e3def1fa 100644 --- a/sys/contrib/ipfilter/netinet/ip_log.c +++ b/sys/contrib/ipfilter/netinet/ip_log.c @@ -19,7 +19,7 @@ # include <osreldate.h> #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -35,15 +35,11 @@ # include <ctype.h> # define _KERNEL # define KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL # undef KERNEL #endif -#if (defined(__FreeBSD_version) && (__FreeBSD_version >= 220000)) && \ - defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include <sys/fcntl.h> # include <sys/filio.h> #else @@ -56,21 +52,15 @@ struct file; # include <sys/proc.h> # endif #endif /* _KERNEL */ -#if !SOLARIS && !defined(__hpux) && !defined(linux) -# if (defined(NetBSD) && (NetBSD > 199609)) || \ - (defined(OpenBSD) && (OpenBSD > 199603)) || \ - (defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)) +# if defined(NetBSD) || defined(__FreeBSD_version) # include <sys/dirent.h> -# else -# include <sys/dir.h> -# endif # include <sys/mbuf.h> # include <sys/select.h> -# if __FreeBSD_version >= 500000 +# endif +# if defined(__FreeBSD_version) # include <sys/selinfo.h> # endif -#else -# if !defined(__hpux) && defined(_KERNEL) +#if SOLARIS && defined(_KERNEL) # include <sys/filio.h> # include <sys/cred.h> # include <sys/ddi.h> @@ -80,24 +70,18 @@ struct file; # include <sys/mkdev.h> # include <sys/dditypes.h> # include <sys/cmn_err.h> -# endif /* !__hpux */ -#endif /* !SOLARIS && !__hpux */ -#if !defined(linux) +#endif /* SOLARIS && _KERNEL */ # include <sys/protosw.h> -#endif #include <sys/socket.h> #include <net/if.h> #ifdef sun # include <net/af.h> #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include <net/if_var.h> #endif #include <netinet/in.h> -#ifdef __sgi -# include <sys/ddi.h> -#endif # include <netinet/in_var.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -107,9 +91,7 @@ struct file; #ifdef USE_INET6 # include <netinet/icmp6.h> #endif -#if !defined(linux) # include <netinet/ip_var.h> -#endif #ifndef _KERNEL # include <syslog.h> #endif @@ -120,7 +102,7 @@ struct file; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" -#if (__FreeBSD_version >= 300000) || defined(__NetBSD__) +#if defined(__FreeBSD_version) || defined(__NetBSD__) # include <sys/malloc.h> #endif /* END OF INCLUDES */ @@ -139,12 +121,6 @@ typedef struct ipf_log_softc_s { # if SOLARIS && defined(_KERNEL) kcondvar_t ipl_wait[IPL_LOGSIZE]; # endif -# if defined(linux) && defined(_KERNEL) - wait_queue_head_t iplh_linux[IPL_LOGSIZE]; -# endif -# if defined(__hpux) && defined(_KERNEL) - iplog_select_t ipl_ss[IPL_LOGSIZE]; -# endif iplog_t **iplh[IPL_LOGSIZE]; iplog_t *iplt[IPL_LOGSIZE]; iplog_t *ipll[IPL_LOGSIZE]; @@ -386,11 +362,11 @@ ipf_log_pkt(fin, flags) ipflog_t ipfl; u_char p; mb_t *m; -# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) && !defined(FW_HOOKS) +# if SOLARIS && defined(_KERNEL) && !defined(FW_HOOKS) qif_t *ifp; # else struct ifnet *ifp; -# endif /* SOLARIS || __hpux */ +# endif /* SOLARIS */ m = fin->fin_m; if (m == NULL) @@ -460,14 +436,14 @@ ipf_log_pkt(fin, flags) * Get the interface number and name to which this packet is * currently associated. */ -# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) +# if SOLARIS && defined(_KERNEL) # if !defined(FW_HOOKS) ipfl.fl_unit = (u_int)ifp->qf_ppa; # endif COPYIFNAME(fin->fin_v, ifp, ipfl.fl_ifname); # else # if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ - OPENBSD_GE_REV(199603) || defined(linux) || FREEBSD_GE_REV(501113) + defined(__FreeBSD_version) COPYIFNAME(fin->fin_v, ifp, ipfl.fl_ifname); # else ipfl.fl_unit = (u_int)ifp->if_unit; @@ -738,32 +714,9 @@ ipf_log_read(softc, unit, uio) return EINTR; } # else -# if defined(__hpux) && defined(_KERNEL) - lock_t *l; - -# ifdef IPL_SELECT - if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) { - /* this is no blocking system call */ - softl->ipl_readers[unit]--; - MUTEX_EXIT(&softl->ipl_mutex[unit]); - return 0; - } -# endif - - MUTEX_EXIT(&softl->ipl_mutex[unit]); - l = get_sleep_lock(&softl->iplh[unit]); - error = sleep(&softl->iplh[unit], PZERO+1); - spinunlock(l); -# else -# if defined(__osf__) && defined(_KERNEL) - error = mpsleep(&softl->iplh[unit], PSUSP|PCATCH, "ipfread", 0, - &softl->ipl_mutex, MS_LOCK_SIMPLE); -# else MUTEX_EXIT(&softl->ipl_mutex[unit]); SPL_X(s); error = SLEEP(unit + softl->iplh, "ipl sleep"); -# endif /* __osf__ */ -# endif /* __hpux */ SPL_NET(s); MUTEX_ENTER(&softl->ipl_mutex[unit]); if (error) { @@ -781,8 +734,7 @@ ipf_log_read(softc, unit, uio) return EIO; } -# if (defined(BSD) && (BSD >= 199101)) || defined(__FreeBSD__) || \ - defined(__osf__) +# if (defined(BSD) && (BSD >= 199101)) || defined(__FreeBSD__) uio->uio_rw = UIO_READ; # endif diff --git a/sys/contrib/ipfilter/netinet/ip_lookup.c b/sys/contrib/ipfilter/netinet/ip_lookup.c index 45999e0447ff..046939146075 100644 --- a/sys/contrib/ipfilter/netinet/ip_lookup.c +++ b/sys/contrib/ipfilter/netinet/ip_lookup.c @@ -10,15 +10,12 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include <sys/param.h> #include <sys/errno.h> #include <sys/types.h> #include <sys/time.h> #include <sys/file.h> -#if __FreeBSD_version >= 220000 && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include <sys/fcntl.h> # include <sys/filio.h> #else @@ -29,9 +26,6 @@ # include <string.h> # include <stdlib.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif @@ -43,7 +37,7 @@ struct file; #endif #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #else diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index 7c3e0c9fcee4..9139ff495f44 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -31,27 +31,22 @@ struct file; # include <sys/uio.h> # undef KERNEL #endif -#if defined(_KERNEL) && \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif -#if !defined(AIX) # include <sys/fcntl.h> -#endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef KERNEL @@ -60,11 +55,11 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include <sys/queue.h> #endif #include <net/if.h> -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include <net/if_var.h> #endif #ifdef sun @@ -80,9 +75,7 @@ struct file; extern struct ifnet vpnif; #endif -#if !defined(linux) # include <netinet/ip_var.h> -#endif #include <netinet/tcp.h> #include <netinet/udp.h> #include <netinet/ip_icmp.h> @@ -97,7 +90,7 @@ extern struct ifnet vpnif; #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" -#if FREEBSD_GE_REV(300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif #ifdef HAS_SYS_MD5_H @@ -1024,7 +1017,7 @@ ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) # else -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034) +# if defined(__FreeBSD_version) if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) # else if ((securelevel >= 3) && (mode & FWRITE)) @@ -1036,11 +1029,7 @@ ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) } #endif -#if defined(__osf__) && defined(_KERNEL) - getlock = 0; -#else getlock = (mode & NAT_LOCKHELD) ? 0 : 1; -#endif n = NULL; nt = NULL; @@ -1866,7 +1855,7 @@ ipf_nat_getent(softc, data, getlock) */ if (nat->nat_ptr != NULL) bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, - ipn->ipn_ipnat.in_size); + sizeof(nat->nat_ptr)); /* * If we also know the NAT entry has an associated filter rule, @@ -1904,21 +1893,17 @@ ipf_nat_getent(softc, data, getlock) } } if (error == 0) { - if (getlock) { - READ_ENTER(&softc->ipf_nat); - getlock = 0; - } error = ipf_outobjsz(softc, data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); } finished: - if (getlock) { - READ_ENTER(&softc->ipf_nat); - } if (ipn != NULL) { KFREES(ipn, ipns.ipn_dsize); } + if (getlock) { + RWLOCK_EXIT(&softc->ipf_nat); + } return error; } @@ -3306,7 +3291,7 @@ ipf_nat_finalise(fin, nat) u_32_t sum1, sum2, sumd; frentry_t *fr; u_32_t flags; -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) +#if SOLARIS && defined(_KERNEL) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif @@ -5238,8 +5223,8 @@ ipf_nat_out(fin, nat, natadd, nflags) uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) +#if !defined(_KERNEL) || defined(MENTAT) || \ + defined(BRIDGE_IPF) || defined(__FreeBSD__) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif @@ -5659,8 +5644,7 @@ ipf_nat_in(fin, nat, natadd, nflags) } fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_incksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; @@ -5692,8 +5676,7 @@ ipf_nat_in(fin, nat, natadd, nflags) sum2 += ntohs(ip->ip_off) & IP_DF; CALC_SUMD(sum1, sum2, sumd); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); @@ -6212,27 +6195,6 @@ ipf_nat_log(softc, softn, nat, action) } -#if defined(__OpenBSD__) -/* ------------------------------------------------------------------------ */ -/* Function: ipf_nat_ifdetach */ -/* Returns: Nil */ -/* Parameters: ifp(I) - pointer to network interface */ -/* */ -/* Compatibility interface for OpenBSD to trigger the correct updating of */ -/* interface references within IPFilter. */ -/* ------------------------------------------------------------------------ */ -void -ipf_nat_ifdetach(ifp) - void *ifp; -{ - ipf_main_softc_t *softc; - - softc = ipf_get_softc(0); - - ipf_sync(ifp); - return; -} -#endif /* ------------------------------------------------------------------------ */ @@ -7463,8 +7425,7 @@ ipf_nat_decap(fin, nat) CALC_SUMD(sum1, sum2, sumd); fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, sumd, 0); #endif } diff --git a/sys/contrib/ipfilter/netinet/ip_nat.h b/sys/contrib/ipfilter/netinet/ip_nat.h index 6e245f81e727..e65b1681a5f5 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.h +++ b/sys/contrib/ipfilter/netinet/ip_nat.h @@ -14,7 +14,7 @@ #define __IP_NAT_H__ #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -694,9 +694,6 @@ extern int ipf_nat_hostmap_rehash __P((ipf_main_softc_t *, ipftuneable_t *, ipftuneval_t *)); extern nat_t *ipf_nat_icmperrorlookup __P((fr_info_t *, int)); extern nat_t *ipf_nat_icmperror __P((fr_info_t *, u_int *, int)); -#if defined(__OpenBSD__) -extern void ipf_nat_ifdetach __P((void *)); -#endif extern int ipf_nat_init __P((void)); extern nat_t *ipf_nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr, struct in_addr)); diff --git a/sys/contrib/ipfilter/netinet/ip_nat6.c b/sys/contrib/ipfilter/netinet/ip_nat6.c index 5985d6f6566c..19f57868db43 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat6.c +++ b/sys/contrib/ipfilter/netinet/ip_nat6.c @@ -29,26 +29,22 @@ struct file; # include <sys/uio.h> # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif -#if !defined(AIX) # include <sys/fcntl.h> -#endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -57,11 +53,11 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include <sys/queue.h> #endif #include <net/if.h> -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include <net/if_var.h> #endif #ifdef sun @@ -78,9 +74,7 @@ struct file; extern struct ifnet vpnif; #endif -#if !defined(linux) # include <netinet/ip_var.h> -#endif #include <netinet/tcp.h> #include <netinet/udp.h> #include <netinet/ip_icmp.h> @@ -94,7 +88,7 @@ extern struct ifnet vpnif; #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif #ifdef HAS_SYS_MD5_H @@ -970,7 +964,7 @@ ipf_nat6_add(fin, np, natsave, flags, direction) u_int nflags; natinfo_t ni; int move; -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) +#if SOLARIS && defined(_KERNEL) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif diff --git a/sys/contrib/ipfilter/netinet/ip_pool.c b/sys/contrib/ipfilter/netinet/ip_pool.c index 2a43cdb00bfa..fd511fcd2d89 100644 --- a/sys/contrib/ipfilter/netinet/ip_pool.c +++ b/sys/contrib/ipfilter/netinet/ip_pool.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include <sys/errno.h> #include <sys/types.h> #include <sys/param.h> @@ -21,9 +18,6 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #else @@ -36,7 +30,7 @@ struct file; #if defined(_KERNEL) && !defined(SOLARIS2) # include <sys/mbuf.h> #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/byteorder.h> # ifdef _KERNEL # include <sys/dditypes.h> @@ -44,7 +38,7 @@ struct file; # include <sys/stream.h> # include <sys/kmem.h> #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif diff --git a/sys/contrib/ipfilter/netinet/ip_proxy.c b/sys/contrib/ipfilter/netinet/ip_proxy.c index 359c29bdfd3e..29ecdd4201a4 100644 --- a/sys/contrib/ipfilter/netinet/ip_proxy.c +++ b/sys/contrib/ipfilter/netinet/ip_proxy.c @@ -16,43 +16,34 @@ #include <sys/param.h> #include <sys/time.h> #include <sys/file.h> -#if !defined(AIX) # include <sys/fcntl.h> -#endif #if !defined(_KERNEL) && !defined(__KERNEL__) # include <stdio.h> # include <string.h> # include <stdlib.h> # include <ctype.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) -# if !defined(__NetBSD__) && !defined(sun) && !defined(__osf__) && \ - !defined(__OpenBSD__) && !defined(__hpux) && !defined(__sgi) && \ - !defined(AIX) +#ifdef __FreeBSD_version # include <sys/ctype.h> # endif # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/byteorder.h> # ifdef _KERNEL # include <sys/dditypes.h> @@ -64,7 +55,7 @@ struct file; # include <sys/queue.h> #endif #include <net/if.h> -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) #include <net/vnet.h> #else #define CURVNET_SET(arg) @@ -79,9 +70,7 @@ struct file; #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> -#ifndef linux # include <netinet/ip_var.h> -#endif #include <netinet/tcp.h> #include <netinet/udp.h> #include <netinet/ip_icmp.h> @@ -91,7 +80,7 @@ struct file; #include "netinet/ip_nat.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> #endif @@ -925,7 +914,7 @@ ipf_proxy_check(fin, nat) ip_t *ip; short rv; int err; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) +#if !defined(_KERNEL) || defined(MENTAT) u_32_t s1, s2, sd; #endif @@ -1017,7 +1006,7 @@ ipf_proxy_check(fin, nat) * packet. */ adjlen = APR_INC(err); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) +#if !defined(_KERNEL) || defined(MENTAT) s1 = LONG_SUM(fin->fin_plen - adjlen); s2 = LONG_SUM(fin->fin_plen); CALC_SUMD(s1, s2, sd); diff --git a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c index 031363793cea..62202ffc4488 100644 --- a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c @@ -105,11 +105,7 @@ ipf_p_raudio_out(arg, fin, aps, nat) off = (char *)tcp - (char *)fin->fin_ip; off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; @@ -222,11 +218,7 @@ ipf_p_raudio_in(arg, fin, aps, nat) off = (char *)tcp - (char *)fin->fin_ip; off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; diff --git a/sys/contrib/ipfilter/netinet/ip_scan.c b/sys/contrib/ipfilter/netinet/ip_scan.c index 5b7c77e4b102..34bc844eb354 100644 --- a/sys/contrib/ipfilter/netinet/ip_scan.c +++ b/sys/contrib/ipfilter/netinet/ip_scan.c @@ -10,9 +10,6 @@ # define _KERNEL 1 #endif #include <sys/param.h> -#if defined(__hpux) && (HPUXREV >= 1111) && !defined(_KERNEL) -# include <sys/kern_svcs.h> -#endif #include <sys/types.h> #include <sys/time.h> #include <sys/errno.h> @@ -20,21 +17,16 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #else # include <sys/systm.h> -# if !defined(__svr4__) && !defined(__SVR4) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif #include <sys/socket.h> -#if !defined(__hpux) && !defined(__osf__) && !defined(linux) && !defined(AIX) # include <sys/ioccom.h> -#endif #ifdef __FreeBSD__ # include <sys/filio.h> # include <sys/malloc.h> diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 1bf190715340..f364c295e4c0 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -20,7 +20,7 @@ #include <sys/param.h> #include <sys/file.h> #if defined(_KERNEL) && defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) + !defined(KLD_MODULE) #include "opt_inet6.h" #endif #if !defined(_KERNEL) && !defined(__KERNEL__) @@ -28,30 +28,25 @@ # include <stdlib.h> # include <string.h> # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif #include <sys/time.h> -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> #if defined(_KERNEL) # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -69,9 +64,7 @@ struct file; #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/tcp.h> -#if !defined(__hpux) && !defined(linux) # include <netinet/tcp_fsm.h> -#endif #include <netinet/udp.h> #include <netinet/ip_icmp.h> #if !defined(_KERNEL) diff --git a/sys/contrib/ipfilter/netinet/ip_sync.c b/sys/contrib/ipfilter/netinet/ip_sync.c index 59094097864b..04018ed7b621 100644 --- a/sys/contrib/ipfilter/netinet/ip_sync.c +++ b/sys/contrib/ipfilter/netinet/ip_sync.c @@ -21,15 +21,12 @@ # include <string.h> # define _KERNEL # define KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include <sys/uio.h> # undef _KERNEL # undef KERNEL #else # include <sys/systm.h> -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include <sys/mbuf.h> # endif # include <sys/select.h> @@ -40,18 +37,16 @@ struct file; #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) # include <sys/proc.h> #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include <sys/filio.h> # include <sys/fcntl.h> #else # include <sys/ioctl.h> #endif #include <sys/time.h> -#if !defined(linux) # include <sys/protosw.h> -#endif #include <sys/socket.h> -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include <sys/filio.h> # include <sys/byteorder.h> # ifdef _KERNEL @@ -69,12 +64,8 @@ struct file; #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/tcp.h> -#if !defined(linux) # include <netinet/ip_var.h> -#endif -#if !defined(__hpux) && !defined(linux) # include <netinet/tcp_fsm.h> -#endif #include <netinet/udp.h> #include <netinet/ip_icmp.h> #include "netinet/ip_compat.h" @@ -88,7 +79,7 @@ struct file; #ifdef USE_INET6 #include <netinet/icmp6.h> #endif -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include <sys/malloc.h> # if defined(_KERNEL) && !defined(IPFILTER_LKM) # include <sys/libkern.h> @@ -112,9 +103,6 @@ typedef struct ipf_sync_softc_s { #if SOLARIS && defined(_KERNEL) kcondvar_t ipslwait; #endif -#if defined(linux) && defined(_KERNEL) - wait_queue_head_t sl_tail_linux; -#endif synclist_t **syncstatetab; synclist_t **syncnattab; synclogent_t *synclog; @@ -308,7 +296,7 @@ ipf_sync_soft_destroy(softc, arg) } -# if !defined(sparc) && !defined(__hppa) +# if !defined(sparc) /* ------------------------------------------------------------------------ */ /* Function: ipf_sync_tcporder */ /* Returns: Nil */ @@ -418,11 +406,11 @@ ipf_sync_storder(way, ips) ips->is_smsk[1] = ntohl(ips->is_smsk[1]); } } -# else /* !defined(sparc) && !defined(__hppa) */ +# else /* !defined(sparc) */ # define ipf_sync_tcporder(x,y) # define ipf_sync_natorder(x,y) # define ipf_sync_storder(x,y) -# endif /* !defined(sparc) && !defined(__hppa) */ +# endif /* !defined(sparc) */ /* ------------------------------------------------------------------------ */ @@ -449,7 +437,7 @@ ipf_sync_write(softc, uio) int err = 0; -# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__) +# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) uio->uio_rw = UIO_WRITE; # endif @@ -597,7 +585,7 @@ ipf_sync_read(softc, uio) return EINVAL; } -# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__) +# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) uio->uio_rw = UIO_READ; # endif @@ -612,28 +600,6 @@ ipf_sync_read(softc, uio) return EINTR; } # else -# ifdef __hpux - { - lock_t *l; - - l = get_sleep_lock(&softs->sl_tail); - err = sleep(&softs->sl_tail, PZERO+1); - if (err) { - MUTEX_EXIT(&softs->ipsl_mutex); - IPFERROR(110010); - return EINTR; - } - spinunlock(l); - } -# else /* __hpux */ -# ifdef __osf__ - err = mpsleep(&softs->sl_tail, PSUSP|PCATCH, "ipl sleep", 0, - &softs->ipsl_mutex, MS_LOCK_SIMPLE); - if (err) { - IPFERROR(110011); - return EINTR; - } -# else MUTEX_EXIT(&softs->ipsl_mutex); err = SLEEP(&softs->sl_tail, "ipl sleep"); if (err) { @@ -641,8 +607,6 @@ ipf_sync_read(softc, uio) return EINTR; } MUTEX_ENTER(&softs->ipsl_mutex); -# endif /* __osf__ */ -# endif /* __hpux */ # endif /* SOLARIS */ # endif /* _KERNEL */ } diff --git a/sys/dev/bwn/if_bwn.c b/sys/dev/bwn/if_bwn.c index 6f8bac67bc72..d6a1926282ef 100644 --- a/sys/dev/bwn/if_bwn.c +++ b/sys/dev/bwn/if_bwn.c @@ -6211,20 +6211,15 @@ bwn_pio_handle_txeof(struct bwn_mac *mac, tq->tq_used -= roundup(tp->tp_m->m_pkthdr.len + BWN_HDRSIZE(mac), 4); tq->tq_free++; - /* XXX ieee80211_tx_complete()? */ if (tp->tp_ni != NULL) { /* * Do any tx complete callback. Note this must * be done before releasing the node reference. */ - bwn_ratectl_tx_complete(tp->tp_ni, status); - if (tp->tp_m->m_flags & M_TXCB) - ieee80211_process_callback(tp->tp_ni, tp->tp_m, 0); - ieee80211_free_node(tp->tp_ni); - tp->tp_ni = NULL; } - m_freem(tp->tp_m); + ieee80211_tx_complete(tp->tp_ni, tp->tp_m, 0); + tp->tp_ni = NULL; tp->tp_m = NULL; TAILQ_INSERT_TAIL(&tq->tq_pktlist, tp, tp_list); diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 6f9fc82ab9be..e7a890af69d7 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -155,7 +155,7 @@ enum { CHK_MBOX_ACCESS = (1 << 2), MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), - /* TOM_INIT_DONE= (1 << 5), No longer used */ + ADAP_ERR = (1 << 5), BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), @@ -175,6 +175,7 @@ enum { DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */ DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */ DF_DISABLE_CFG_RETRY = (1 << 3), /* Disable fallback config */ + DF_VERBOSE_SLOWINTR = (1 << 4), /* Chatty slow intr handler */ }; #define IS_DOOMED(vi) ((vi)->flags & DOOMED) @@ -932,24 +933,6 @@ struct adapter { #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) -#define CH_DUMP_MBOX(sc, mbox, data_reg) \ - do { \ - if (sc->debug_flags & DF_DUMP_MBOX) { \ - log(LOG_NOTICE, \ - "%s mbox %u: %016llx %016llx %016llx %016llx " \ - "%016llx %016llx %016llx %016llx\n", \ - device_get_nameunit(sc->dev), mbox, \ - (unsigned long long)t4_read_reg64(sc, data_reg), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 8), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 16), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 24), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 32), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 40), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 48), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 56)); \ - } \ - } while (0) - #define for_each_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \ iter < vi->ntxq; ++iter, ++q) @@ -1105,6 +1088,38 @@ t4_use_ldst(struct adapter *sc) #endif } +static inline void +CH_DUMP_MBOX(struct adapter *sc, int mbox, const int reg, + const char *msg, const __be64 *const p, const bool err) +{ + + if (!(sc->debug_flags & DF_DUMP_MBOX) && !err) + return; + if (p != NULL) { + log(err ? LOG_ERR : LOG_DEBUG, + "%s: mbox %u %s %016llx %016llx %016llx %016llx " + "%016llx %016llx %016llx %016llx\n", + device_get_nameunit(sc->dev), mbox, msg, + (long long)be64_to_cpu(p[0]), (long long)be64_to_cpu(p[1]), + (long long)be64_to_cpu(p[2]), (long long)be64_to_cpu(p[3]), + (long long)be64_to_cpu(p[4]), (long long)be64_to_cpu(p[5]), + (long long)be64_to_cpu(p[6]), (long long)be64_to_cpu(p[7])); + } else { + log(err ? LOG_ERR : LOG_DEBUG, + "%s: mbox %u %s %016llx %016llx %016llx %016llx " + "%016llx %016llx %016llx %016llx\n", + device_get_nameunit(sc->dev), mbox, msg, + (long long)t4_read_reg64(sc, reg), + (long long)t4_read_reg64(sc, reg + 8), + (long long)t4_read_reg64(sc, reg + 16), + (long long)t4_read_reg64(sc, reg + 24), + (long long)t4_read_reg64(sc, reg + 32), + (long long)t4_read_reg64(sc, reg + 40), + (long long)t4_read_reg64(sc, reg + 48), + (long long)t4_read_reg64(sc, reg + 56)); + } +} + /* t4_main.c */ extern int t4_ntxq; extern int t4_nrxq; diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index e072a6759a69..62694993237c 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -34,10 +34,6 @@ #include "t4_hw.h" -#define GLBL_INTR_MASK (F_CIM | F_MPS | F_PL | F_PCIE | F_MC0 | F_EDC0 | \ - F_EDC1 | F_LE | F_TP | F_MA | F_PM_TX | F_PM_RX | F_ULP_RX | \ - F_CPL_SWITCH | F_SGE | F_ULP_TX) - enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ @@ -581,7 +577,7 @@ struct fw_filter_wr; void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); void t4_intr_clear(struct adapter *adapter); -int t4_slow_intr_handler(struct adapter *adapter); +int t4_slow_intr_handler(struct adapter *adapter, bool verbose); int t4_hash_mac_addr(const u8 *addr); int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port, @@ -621,9 +617,7 @@ int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap, bool sleep_ok); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id); -void t4_fatal_err(struct adapter *adapter); -void t4_db_full(struct adapter *adapter); -void t4_db_dropped(struct adapter *adapter); +void t4_fatal_err(struct adapter *adapter, bool fw_error); int t4_set_trace_filter(struct adapter *adapter, const struct trace_params *tp, int filter_index, int enable); void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index 7116b38b1639..15b04194cc20 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -212,8 +212,8 @@ static void t4_report_fw_error(struct adapter *adap) pcie_fw = t4_read_reg(adap, A_PCIE_FW); if (pcie_fw & F_PCIE_FW_ERR) { - CH_ERR(adap, "Firmware reports adapter error: %s\n", - reason[G_PCIE_FW_EVAL(pcie_fw)]); + CH_ERR(adap, "firmware reports adapter error: %s (0x%08x)\n", + reason[G_PCIE_FW_EVAL(pcie_fw)], pcie_fw); adap->flags &= ~FW_OK; } } @@ -340,7 +340,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, u32 v; u64 res; int i, ms, delay_idx, ret, next_tx_check; - const __be64 *p = cmd; u32 data_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_DATA); u32 ctl_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_CTRL); u32 ctl; @@ -351,7 +350,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, if (adap->flags & CHK_MBOX_ACCESS) ASSERT_SYNCHRONIZED_OP(adap); - if ((size & 15) || size > MBOX_LEN) + if (size <= 0 || (size & 15) || size > MBOX_LEN) return -EINVAL; if (adap->flags & IS_VF) { @@ -381,8 +380,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, } /* - * If we were unable to gain access, dequeue ourselves from the - * mailbox atomic access list and report the error to our caller. + * If we were unable to gain access, report the error to our caller. */ if (v != X_MBOWNER_PL) { t4_report_fw_error(adap); @@ -398,23 +396,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * presaged the firmware crashing ... */ if (ctl & F_MBMSGVALID) { - CH_ERR(adap, "found VALID command in mbox %u: %016llx %016llx " - "%016llx %016llx %016llx %016llx %016llx %016llx\n", - mbox, (unsigned long long)t4_read_reg64(adap, data_reg), - (unsigned long long)t4_read_reg64(adap, data_reg + 8), - (unsigned long long)t4_read_reg64(adap, data_reg + 16), - (unsigned long long)t4_read_reg64(adap, data_reg + 24), - (unsigned long long)t4_read_reg64(adap, data_reg + 32), - (unsigned long long)t4_read_reg64(adap, data_reg + 40), - (unsigned long long)t4_read_reg64(adap, data_reg + 48), - (unsigned long long)t4_read_reg64(adap, data_reg + 56)); + CH_DUMP_MBOX(adap, mbox, data_reg, "VLD", NULL, true); } /* * Copy in the new mailbox command and send it on its way ... */ - for (i = 0; i < size; i += 8, p++) - t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p)); + memset(cmd_rpl, 0, sizeof(cmd_rpl)); + memcpy(cmd_rpl, cmd, size); + CH_DUMP_MBOX(adap, mbox, 0, "cmd", cmd_rpl, false); + for (i = 0; i < ARRAY_SIZE(cmd_rpl); i++) + t4_write_reg64(adap, data_reg + i * 8, be64_to_cpu(cmd_rpl[i])); if (adap->flags & IS_VF) { /* @@ -432,8 +424,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, t4_read_reg(adap, data_reg); } - CH_DUMP_MBOX(adap, mbox, data_reg); - t4_write_reg(adap, ctl_reg, F_MBMSGVALID | V_MBOWNER(X_MBOWNER_FW)); read_tx_state(adap, &tx_state[0]); /* also flushes the write_reg */ next_tx_check = 1000; @@ -480,10 +470,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * Retrieve the command reply and release the mailbox. */ get_mbox_rpl(adap, cmd_rpl, MBOX_LEN/8, data_reg); + CH_DUMP_MBOX(adap, mbox, 0, "rpl", cmd_rpl, false); t4_write_reg(adap, ctl_reg, V_MBOWNER(X_MBOWNER_NONE)); - CH_DUMP_MBOX(adap, mbox, data_reg); - res = be64_to_cpu(cmd_rpl[0]); if (G_FW_CMD_OP(res >> 32) == FW_DEBUG_CMD) { fw_asrt(adap, (struct fw_debug_cmd *)cmd_rpl); @@ -500,26 +489,13 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * errors ... */ ret = (pcie_fw & F_PCIE_FW_ERR) ? -ENXIO : -ETIMEDOUT; - CH_ERR(adap, "command %#x in mailbox %d timed out\n", - *(const u8 *)cmd, mbox); - - /* If DUMP_MBOX is set the mbox has already been dumped */ - if ((adap->debug_flags & DF_DUMP_MBOX) == 0) { - p = cmd; - CH_ERR(adap, "mbox: %016llx %016llx %016llx %016llx " - "%016llx %016llx %016llx %016llx\n", - (unsigned long long)be64_to_cpu(p[0]), - (unsigned long long)be64_to_cpu(p[1]), - (unsigned long long)be64_to_cpu(p[2]), - (unsigned long long)be64_to_cpu(p[3]), - (unsigned long long)be64_to_cpu(p[4]), - (unsigned long long)be64_to_cpu(p[5]), - (unsigned long long)be64_to_cpu(p[6]), - (unsigned long long)be64_to_cpu(p[7])); - } + CH_ERR(adap, "command %#x in mbox %d timed out (0x%08x).\n", + *(const u8 *)cmd, mbox, pcie_fw); + CH_DUMP_MBOX(adap, mbox, 0, "cmdsent", cmd_rpl, true); + CH_DUMP_MBOX(adap, mbox, data_reg, "current", NULL, true); t4_report_fw_error(adap); - t4_fatal_err(adap); + t4_fatal_err(adap, true); return ret; } @@ -3965,785 +3941,1330 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port) return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } -typedef void (*int_handler_t)(struct adapter *adap); +struct intr_details { + u32 mask; + const char *msg; +}; + +struct intr_action { + u32 mask; + int arg; + bool (*action)(struct adapter *, int, bool); +}; struct intr_info { - unsigned int mask; /* bits to check in interrupt status */ - const char *msg; /* message to print or NULL */ - short stat_idx; /* stat counter to increment or -1 */ - unsigned short fatal; /* whether the condition reported is fatal */ - int_handler_t int_handler; /* platform-specific int handler */ + const char *name; /* name of the INT_CAUSE register */ + int cause_reg; /* INT_CAUSE register */ + int enable_reg; /* INT_ENABLE register */ + u32 fatal; /* bits that are fatal */ + const struct intr_details *details; + const struct intr_action *actions; }; -/** - * t4_handle_intr_status - table driven interrupt handler - * @adapter: the adapter that generated the interrupt - * @reg: the interrupt status register to process - * @acts: table of interrupt actions - * - * A table driven interrupt handler that applies a set of masks to an - * interrupt status word and performs the corresponding actions if the - * interrupts described by the mask have occurred. The actions include - * optionally emitting a warning or alert message. The table is terminated - * by an entry specifying mask 0. Returns the number of fatal interrupt - * conditions. - */ -static int t4_handle_intr_status(struct adapter *adapter, unsigned int reg, - const struct intr_info *acts) +static inline char +intr_alert_char(u32 cause, u32 enable, u32 fatal) +{ + + if (cause & fatal) + return ('!'); + if (cause & enable) + return ('*'); + return ('-'); +} + +static void +t4_show_intr_info(struct adapter *adap, const struct intr_info *ii, u32 cause) { - int fatal = 0; - unsigned int mask = 0; - unsigned int status = t4_read_reg(adapter, reg); + u32 enable, leftover; + const struct intr_details *details; + char alert; + + enable = t4_read_reg(adap, ii->enable_reg); + alert = intr_alert_char(cause, enable, ii->fatal); + CH_ALERT(adap, "%c %s 0x%x = 0x%08x, E 0x%08x, F 0x%08x\n", + alert, ii->name, ii->cause_reg, cause, enable, ii->fatal); - for ( ; acts->mask; ++acts) { - if (!(status & acts->mask)) + leftover = cause; + for (details = ii->details; details && details->mask != 0; details++) { + u32 msgbits = details->mask & cause; + if (msgbits == 0) continue; - if (acts->fatal) { - fatal++; - CH_ALERT(adapter, "%s (0x%x)\n", acts->msg, - status & acts->mask); - } else if (acts->msg) - CH_WARN_RATELIMIT(adapter, "%s (0x%x)\n", acts->msg, - status & acts->mask); - if (acts->int_handler) - acts->int_handler(adapter); - mask |= acts->mask; - } - status &= mask; - if (status) /* clear processed interrupts */ - t4_write_reg(adapter, reg, status); - return fatal; + alert = intr_alert_char(msgbits, enable, ii->fatal); + CH_ALERT(adap, " %c [0x%08x] %s\n", alert, msgbits, + details->msg); + leftover &= ~msgbits; + } + if (leftover != 0 && leftover != cause) + CH_ALERT(adap, " ? [0x%08x]\n", leftover); +} + +/* + * Returns true for fatal error. + */ +static bool +t4_handle_intr(struct adapter *adap, const struct intr_info *ii, + u32 additional_cause, bool verbose) +{ + u32 cause; + bool fatal; + const struct intr_action *action; + + /* read and display cause. */ + cause = t4_read_reg(adap, ii->cause_reg); + if (verbose || cause != 0) + t4_show_intr_info(adap, ii, cause); + fatal = (cause & ii->fatal) != 0; + cause |= additional_cause; + if (cause == 0) + return (false); + + for (action = ii->actions; action && action->mask != 0; action++) { + if (!(action->mask & cause)) + continue; + fatal |= (action->action)(adap, action->arg, verbose); + } + + /* clear */ + t4_write_reg(adap, ii->cause_reg, cause); + (void)t4_read_reg(adap, ii->cause_reg); + + return (fatal); } /* * Interrupt handler for the PCIE module. */ -static void pcie_intr_handler(struct adapter *adapter) +static bool pcie_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info sysbus_intr_info[] = { - { F_RNPP, "RXNP array parity error", -1, 1 }, - { F_RPCP, "RXPC array parity error", -1, 1 }, - { F_RCIP, "RXCIF array parity error", -1, 1 }, - { F_RCCP, "Rx completions control array parity error", -1, 1 }, - { F_RFTP, "RXFT array parity error", -1, 1 }, + static const struct intr_details sysbus_intr_details[] = { + { F_RNPP, "RXNP array parity error" }, + { F_RPCP, "RXPC array parity error" }, + { F_RCIP, "RXCIF array parity error" }, + { F_RCCP, "Rx completions control array parity error" }, + { F_RFTP, "RXFT array parity error" }, { 0 } }; - static const struct intr_info pcie_port_intr_info[] = { - { F_TPCP, "TXPC array parity error", -1, 1 }, - { F_TNPP, "TXNP array parity error", -1, 1 }, - { F_TFTP, "TXFT array parity error", -1, 1 }, - { F_TCAP, "TXCA array parity error", -1, 1 }, - { F_TCIP, "TXCIF array parity error", -1, 1 }, - { F_RCAP, "RXCA array parity error", -1, 1 }, - { F_OTDD, "outbound request TLP discarded", -1, 1 }, - { F_RDPE, "Rx data parity error", -1, 1 }, - { F_TDUE, "Tx uncorrectable data error", -1, 1 }, + static const struct intr_info sysbus_intr_info = { + .name = "PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS", + .cause_reg = A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + .enable_reg = A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_INTERRUPT_ENABLE, + .fatal = F_RFTP | F_RCCP | F_RCIP | F_RPCP | F_RNPP, + .details = sysbus_intr_details, + .actions = NULL, + }; + static const struct intr_details pcie_port_intr_details[] = { + { F_TPCP, "TXPC array parity error" }, + { F_TNPP, "TXNP array parity error" }, + { F_TFTP, "TXFT array parity error" }, + { F_TCAP, "TXCA array parity error" }, + { F_TCIP, "TXCIF array parity error" }, + { F_RCAP, "RXCA array parity error" }, + { F_OTDD, "outbound request TLP discarded" }, + { F_RDPE, "Rx data parity error" }, + { F_TDUE, "Tx uncorrectable data error" }, { 0 } }; - static const struct intr_info pcie_intr_info[] = { - { F_MSIADDRLPERR, "MSI AddrL parity error", -1, 1 }, - { F_MSIADDRHPERR, "MSI AddrH parity error", -1, 1 }, - { F_MSIDATAPERR, "MSI data parity error", -1, 1 }, - { F_MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, - { F_MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, - { F_MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, - { F_MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, - { F_PIOCPLPERR, "PCI PIO completion FIFO parity error", -1, 1 }, - { F_PIOREQPERR, "PCI PIO request FIFO parity error", -1, 1 }, - { F_TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, - { F_CCNTPERR, "PCI CMD channel count parity error", -1, 1 }, - { F_CREQPERR, "PCI CMD channel request parity error", -1, 1 }, - { F_CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, - { F_DCNTPERR, "PCI DMA channel count parity error", -1, 1 }, - { F_DREQPERR, "PCI DMA channel request parity error", -1, 1 }, - { F_DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, - { F_HCNTPERR, "PCI HMA channel count parity error", -1, 1 }, - { F_HREQPERR, "PCI HMA channel request parity error", -1, 1 }, - { F_HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, - { F_CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, - { F_FIDPERR, "PCI FID parity error", -1, 1 }, - { F_INTXCLRPERR, "PCI INTx clear parity error", -1, 1 }, - { F_MATAGPERR, "PCI MA tag parity error", -1, 1 }, - { F_PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, - { F_RXCPLPERR, "PCI Rx completion parity error", -1, 1 }, - { F_RXWRPERR, "PCI Rx write parity error", -1, 1 }, - { F_RPLPERR, "PCI replay buffer parity error", -1, 1 }, - { F_PCIESINT, "PCI core secondary fault", -1, 1 }, - { F_PCIEPINT, "PCI core primary fault", -1, 1 }, - { F_UNXSPLCPLERR, "PCI unexpected split completion error", -1, - 0 }, + static const struct intr_info pcie_port_intr_info = { + .name = "PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS", + .cause_reg = A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + .enable_reg = A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_INTERRUPT_ENABLE, + .fatal = F_TPCP | F_TNPP | F_TFTP | F_TCAP | F_TCIP | F_RCAP | + F_OTDD | F_RDPE | F_TDUE, + .details = pcie_port_intr_details, + .actions = NULL, + }; + static const struct intr_details pcie_intr_details[] = { + { F_MSIADDRLPERR, "MSI AddrL parity error" }, + { F_MSIADDRHPERR, "MSI AddrH parity error" }, + { F_MSIDATAPERR, "MSI data parity error" }, + { F_MSIXADDRLPERR, "MSI-X AddrL parity error" }, + { F_MSIXADDRHPERR, "MSI-X AddrH parity error" }, + { F_MSIXDATAPERR, "MSI-X data parity error" }, + { F_MSIXDIPERR, "MSI-X DI parity error" }, + { F_PIOCPLPERR, "PCIe PIO completion FIFO parity error" }, + { F_PIOREQPERR, "PCIe PIO request FIFO parity error" }, + { F_TARTAGPERR, "PCIe target tag FIFO parity error" }, + { F_CCNTPERR, "PCIe CMD channel count parity error" }, + { F_CREQPERR, "PCIe CMD channel request parity error" }, + { F_CRSPPERR, "PCIe CMD channel response parity error" }, + { F_DCNTPERR, "PCIe DMA channel count parity error" }, + { F_DREQPERR, "PCIe DMA channel request parity error" }, + { F_DRSPPERR, "PCIe DMA channel response parity error" }, + { F_HCNTPERR, "PCIe HMA channel count parity error" }, + { F_HREQPERR, "PCIe HMA channel request parity error" }, + { F_HRSPPERR, "PCIe HMA channel response parity error" }, + { F_CFGSNPPERR, "PCIe config snoop FIFO parity error" }, + { F_FIDPERR, "PCIe FID parity error" }, + { F_INTXCLRPERR, "PCIe INTx clear parity error" }, + { F_MATAGPERR, "PCIe MA tag parity error" }, + { F_PIOTAGPERR, "PCIe PIO tag parity error" }, + { F_RXCPLPERR, "PCIe Rx completion parity error" }, + { F_RXWRPERR, "PCIe Rx write parity error" }, + { F_RPLPERR, "PCIe replay buffer parity error" }, + { F_PCIESINT, "PCIe core secondary fault" }, + { F_PCIEPINT, "PCIe core primary fault" }, + { F_UNXSPLCPLERR, "PCIe unexpected split completion error" }, { 0 } }; - - static const struct intr_info t5_pcie_intr_info[] = { - { F_MSTGRPPERR, "Master Response Read Queue parity error", - -1, 1 }, - { F_MSTTIMEOUTPERR, "Master Timeout FIFO parity error", -1, 1 }, - { F_MSIXSTIPERR, "MSI-X STI SRAM parity error", -1, 1 }, - { F_MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, - { F_MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, - { F_MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, - { F_MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, - { F_PIOCPLGRPPERR, "PCI PIO completion Group FIFO parity error", - -1, 1 }, - { F_PIOREQGRPPERR, "PCI PIO request Group FIFO parity error", - -1, 1 }, - { F_TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, - { F_MSTTAGQPERR, "PCI master tag queue parity error", -1, 1 }, - { F_CREQPERR, "PCI CMD channel request parity error", -1, 1 }, - { F_CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, - { F_DREQWRPERR, "PCI DMA channel write request parity error", - -1, 1 }, - { F_DREQPERR, "PCI DMA channel request parity error", -1, 1 }, - { F_DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, - { F_HREQWRPERR, "PCI HMA channel count parity error", -1, 1 }, - { F_HREQPERR, "PCI HMA channel request parity error", -1, 1 }, - { F_HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, - { F_CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, - { F_FIDPERR, "PCI FID parity error", -1, 1 }, - { F_VFIDPERR, "PCI INTx clear parity error", -1, 1 }, - { F_MAGRPPERR, "PCI MA group FIFO parity error", -1, 1 }, - { F_PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, - { F_IPRXHDRGRPPERR, "PCI IP Rx header group parity error", - -1, 1 }, - { F_IPRXDATAGRPPERR, "PCI IP Rx data group parity error", - -1, 1 }, - { F_RPLPERR, "PCI IP replay buffer parity error", -1, 1 }, - { F_IPSOTPERR, "PCI IP SOT buffer parity error", -1, 1 }, - { F_TRGT1GRPPERR, "PCI TRGT1 group FIFOs parity error", -1, 1 }, - { F_READRSPERR, "Outbound read error", -1, - 0 }, + static const struct intr_details t5_pcie_intr_details[] = { + { F_IPGRPPERR, "Parity errors observed by IP" }, + { F_NONFATALERR, "PCIe non-fatal error" }, + { F_READRSPERR, "Outbound read error" }, + { F_TRGT1GRPPERR, "PCIe TRGT1 group FIFOs parity error" }, + { F_IPSOTPERR, "PCIe IP SOT buffer SRAM parity error" }, + { F_IPRETRYPERR, "PCIe IP replay buffer parity error" }, + { F_IPRXDATAGRPPERR, "PCIe IP Rx data group SRAMs parity error" }, + { F_IPRXHDRGRPPERR, "PCIe IP Rx header group SRAMs parity error" }, + { F_PIOTAGQPERR, "PIO tag queue FIFO parity error" }, + { F_MAGRPPERR, "MA group FIFO parity error" }, + { F_VFIDPERR, "VFID SRAM parity error" }, + { F_FIDPERR, "FID SRAM parity error" }, + { F_CFGSNPPERR, "config snoop FIFO parity error" }, + { F_HRSPPERR, "HMA channel response data SRAM parity error" }, + { F_HREQRDPERR, "HMA channel read request SRAM parity error" }, + { F_HREQWRPERR, "HMA channel write request SRAM parity error" }, + { F_DRSPPERR, "DMA channel response data SRAM parity error" }, + { F_DREQRDPERR, "DMA channel write request SRAM parity error" }, + { F_CRSPPERR, "CMD channel response data SRAM parity error" }, + { F_CREQRDPERR, "CMD channel read request SRAM parity error" }, + { F_MSTTAGQPERR, "PCIe master tag queue SRAM parity error" }, + { F_TGTTAGQPERR, "PCIe target tag queue FIFO parity error" }, + { F_PIOREQGRPPERR, "PIO request group FIFOs parity error" }, + { F_PIOCPLGRPPERR, "PIO completion group FIFOs parity error" }, + { F_MSIXDIPERR, "MSI-X DI SRAM parity error" }, + { F_MSIXDATAPERR, "MSI-X data SRAM parity error" }, + { F_MSIXADDRHPERR, "MSI-X AddrH SRAM parity error" }, + { F_MSIXADDRLPERR, "MSI-X AddrL SRAM parity error" }, + { F_MSIXSTIPERR, "MSI-X STI SRAM parity error" }, + { F_MSTTIMEOUTPERR, "Master timeout FIFO parity error" }, + { F_MSTGRPPERR, "Master response read queue SRAM parity error" }, { 0 } }; + struct intr_info pcie_intr_info = { + .name = "PCIE_INT_CAUSE", + .cause_reg = A_PCIE_INT_CAUSE, + .enable_reg = A_PCIE_INT_ENABLE, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; - int fat; + if (is_t4(adap)) { + fatal |= t4_handle_intr(adap, &sysbus_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &pcie_port_intr_info, 0, verbose); - if (is_t4(adapter)) - fat = t4_handle_intr_status(adapter, - A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - sysbus_intr_info) + - t4_handle_intr_status(adapter, - A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - pcie_port_intr_info) + - t4_handle_intr_status(adapter, A_PCIE_INT_CAUSE, - pcie_intr_info); - else - fat = t4_handle_intr_status(adapter, A_PCIE_INT_CAUSE, - t5_pcie_intr_info); - if (fat) - t4_fatal_err(adapter); + pcie_intr_info.fatal = 0x3fffffc0; + pcie_intr_info.details = pcie_intr_details; + } else { + pcie_intr_info.fatal = is_t5(adap) ? 0xbfffff40 : 0x9fffff40; + pcie_intr_info.details = t5_pcie_intr_details; + } + fatal |= t4_handle_intr(adap, &pcie_intr_info, 0, verbose); + + return (fatal); } /* * TP interrupt handler. */ -static void tp_intr_handler(struct adapter *adapter) +static bool tp_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info tp_intr_info[] = { - { 0x3fffffff, "TP parity error", -1, 1 }, - { F_FLMTXFLSTEMPTY, "TP out of Tx pages", -1, 1 }, + static const struct intr_details tp_intr_details[] = { + { 0x3fffffff, "TP parity error" }, + { F_FLMTXFLSTEMPTY, "TP out of Tx pages" }, { 0 } }; + static const struct intr_info tp_intr_info = { + .name = "TP_INT_CAUSE", + .cause_reg = A_TP_INT_CAUSE, + .enable_reg = A_TP_INT_ENABLE, + .fatal = 0x7fffffff, + .details = tp_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adapter, A_TP_INT_CAUSE, tp_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &tp_intr_info, 0, verbose)); } /* * SGE interrupt handler. */ -static void sge_intr_handler(struct adapter *adapter) +static bool sge_intr_handler(struct adapter *adap, int arg, bool verbose) { - u64 v; - u32 err; - - static const struct intr_info sge_intr_info[] = { + static const struct intr_info sge_int1_info = { + .name = "SGE_INT_CAUSE1", + .cause_reg = A_SGE_INT_CAUSE1, + .enable_reg = A_SGE_INT_ENABLE1, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int2_info = { + .name = "SGE_INT_CAUSE2", + .cause_reg = A_SGE_INT_CAUSE2, + .enable_reg = A_SGE_INT_ENABLE2, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_details sge_int3_details[] = { + { F_ERR_FLM_DBP, + "DBP pointer delivery for invalid context or QID" }, + { F_ERR_FLM_IDMA1 | F_ERR_FLM_IDMA0, + "Invalid QID or header request by IDMA" }, + { F_ERR_FLM_HINT, "FLM hint is for invalid context or QID" }, + { F_ERR_PCIE_ERROR3, "SGE PCIe error for DBP thread 3" }, + { F_ERR_PCIE_ERROR2, "SGE PCIe error for DBP thread 2" }, + { F_ERR_PCIE_ERROR1, "SGE PCIe error for DBP thread 1" }, + { F_ERR_PCIE_ERROR0, "SGE PCIe error for DBP thread 0" }, + { F_ERR_TIMER_ABOVE_MAX_QID, + "SGE GTS with timer 0-5 for IQID > 1023" }, { F_ERR_CPL_EXCEED_IQE_SIZE, - "SGE received CPL exceeding IQE size", -1, 1 }, - { F_ERR_INVALID_CIDX_INC, - "SGE GTS CIDX increment too large", -1, 0 }, - { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 }, - { F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full }, + "SGE received CPL exceeding IQE size" }, + { F_ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large" }, + { F_ERR_ITP_TIME_PAUSED, "SGE ITP error" }, + { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL" }, + { F_ERR_DROPPED_DB, "SGE DB dropped" }, { F_ERR_DATA_CPL_ON_HIGH_QID1 | F_ERR_DATA_CPL_ON_HIGH_QID0, - "SGE IQID > 1023 received CPL for FL", -1, 0 }, - { F_ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX2, "SGE DBP 2 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX1, "SGE DBP 1 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX0, "SGE DBP 0 pidx increment too large", -1, - 0 }, + "SGE IQID > 1023 received CPL for FL" }, + { F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0, "SGE DBP pidx increment too large" }, + { F_ERR_ING_PCIE_CHAN, "SGE Ingress PCIe channel mismatch" }, { F_ERR_ING_CTXT_PRIO, - "SGE too many priority ingress contexts", -1, 0 }, - { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID", -1, 0 }, - { F_EGRESS_SIZE_ERR, "SGE illegal egress QID", -1, 0 }, - { F_ERR_PCIE_ERROR0 | F_ERR_PCIE_ERROR1 | - F_ERR_PCIE_ERROR2 | F_ERR_PCIE_ERROR3, - "SGE PCIe error for a DBP thread", -1, 0 }, - { 0 } - }; - - static const struct intr_info t4t5_sge_intr_info[] = { - { F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped }, - { F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full }, + "Ingress context manager priority user error" }, { F_ERR_EGR_CTXT_PRIO, - "SGE too many priority egress contexts", -1, 0 }, + "Egress context manager priority user error" }, + { F_DBFIFO_HP_INT, "High priority DB FIFO threshold reached" }, + { F_DBFIFO_LP_INT, "Low priority DB FIFO threshold reached" }, + { F_REG_ADDRESS_ERR, "Undefined SGE register accessed" }, + { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID" }, + { F_EGRESS_SIZE_ERR, "SGE illegal egress QID" }, + { 0x0000000f, "SGE context access for invalid queue" }, { 0 } }; - - /* - * For now, treat below interrupts as fatal so that we disable SGE and - * get better debug */ - static const struct intr_info t6_sge_intr_info[] = { + static const struct intr_details t6_sge_int3_details[] = { + { F_ERR_FLM_DBP, + "DBP pointer delivery for invalid context or QID" }, + { F_ERR_FLM_IDMA1 | F_ERR_FLM_IDMA0, + "Invalid QID or header request by IDMA" }, + { F_ERR_FLM_HINT, "FLM hint is for invalid context or QID" }, + { F_ERR_PCIE_ERROR3, "SGE PCIe error for DBP thread 3" }, + { F_ERR_PCIE_ERROR2, "SGE PCIe error for DBP thread 2" }, + { F_ERR_PCIE_ERROR1, "SGE PCIe error for DBP thread 1" }, + { F_ERR_PCIE_ERROR0, "SGE PCIe error for DBP thread 0" }, + { F_ERR_TIMER_ABOVE_MAX_QID, + "SGE GTS with timer 0-5 for IQID > 1023" }, + { F_ERR_CPL_EXCEED_IQE_SIZE, + "SGE received CPL exceeding IQE size" }, + { F_ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large" }, + { F_ERR_ITP_TIME_PAUSED, "SGE ITP error" }, + { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL" }, + { F_ERR_DROPPED_DB, "SGE DB dropped" }, + { F_ERR_DATA_CPL_ON_HIGH_QID1 | F_ERR_DATA_CPL_ON_HIGH_QID0, + "SGE IQID > 1023 received CPL for FL" }, + { F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0, "SGE DBP pidx increment too large" }, + { F_ERR_ING_PCIE_CHAN, "SGE Ingress PCIe channel mismatch" }, + { F_ERR_ING_CTXT_PRIO, + "Ingress context manager priority user error" }, + { F_ERR_EGR_CTXT_PRIO, + "Egress context manager priority user error" }, + { F_DBP_TBUF_FULL, "SGE DBP tbuf full" }, { F_FATAL_WRE_LEN, - "SGE Actual WRE packet is less than advertized length", - -1, 1 }, + "SGE WRE packet less than advertized length" }, + { F_REG_ADDRESS_ERR, "Undefined SGE register accessed" }, + { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID" }, + { F_EGRESS_SIZE_ERR, "SGE illegal egress QID" }, + { 0x0000000f, "SGE context access for invalid queue" }, { 0 } }; + struct intr_info sge_int3_info = { + .name = "SGE_INT_CAUSE3", + .cause_reg = A_SGE_INT_CAUSE3, + .enable_reg = A_SGE_INT_ENABLE3, + .fatal = F_ERR_CPL_EXCEED_IQE_SIZE, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int4_info = { + .name = "SGE_INT_CAUSE4", + .cause_reg = A_SGE_INT_CAUSE4, + .enable_reg = A_SGE_INT_ENABLE4, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int5_info = { + .name = "SGE_INT_CAUSE5", + .cause_reg = A_SGE_INT_CAUSE5, + .enable_reg = A_SGE_INT_ENABLE5, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int6_info = { + .name = "SGE_INT_CAUSE6", + .cause_reg = A_SGE_INT_CAUSE6, + .enable_reg = A_SGE_INT_ENABLE6, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + + bool fatal; + u32 v; - v = (u64)t4_read_reg(adapter, A_SGE_INT_CAUSE1) | - ((u64)t4_read_reg(adapter, A_SGE_INT_CAUSE2) << 32); - if (v) { - CH_ALERT(adapter, "SGE parity error (%#llx)\n", - (unsigned long long)v); - t4_write_reg(adapter, A_SGE_INT_CAUSE1, v); - t4_write_reg(adapter, A_SGE_INT_CAUSE2, v >> 32); + if (chip_id(adap) <= CHELSIO_T5) { + sge_int3_info.details = sge_int3_details; + } else { + sge_int3_info.details = t6_sge_int3_details; } - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, sge_intr_info); - if (chip_id(adapter) <= CHELSIO_T5) - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, - t4t5_sge_intr_info); - else - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, - t6_sge_intr_info); + fatal = false; + fatal |= t4_handle_intr(adap, &sge_int1_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int2_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int3_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int4_info, 0, verbose); + if (chip_id(adap) >= CHELSIO_T5) + fatal |= t4_handle_intr(adap, &sge_int5_info, 0, verbose); + if (chip_id(adap) >= CHELSIO_T6) + fatal |= t4_handle_intr(adap, &sge_int6_info, 0, verbose); - err = t4_read_reg(adapter, A_SGE_ERROR_STATS); - if (err & F_ERROR_QID_VALID) { - CH_ERR(adapter, "SGE error for queue %u\n", G_ERROR_QID(err)); - if (err & F_UNCAPTURED_ERROR) - CH_ERR(adapter, "SGE UNCAPTURED_ERROR set (clearing)\n"); - t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID | - F_UNCAPTURED_ERROR); + v = t4_read_reg(adap, A_SGE_ERROR_STATS); + if (v & F_ERROR_QID_VALID) { + CH_ERR(adap, "SGE error for QID %u\n", G_ERROR_QID(v)); + if (v & F_UNCAPTURED_ERROR) + CH_ERR(adap, "SGE UNCAPTURED_ERROR set (clearing)\n"); + t4_write_reg(adap, A_SGE_ERROR_STATS, + F_ERROR_QID_VALID | F_UNCAPTURED_ERROR); } - if (v != 0) - t4_fatal_err(adapter); + return (fatal); } -#define CIM_OBQ_INTR (F_OBQULP0PARERR | F_OBQULP1PARERR | F_OBQULP2PARERR |\ - F_OBQULP3PARERR | F_OBQSGEPARERR | F_OBQNCSIPARERR) -#define CIM_IBQ_INTR (F_IBQTP0PARERR | F_IBQTP1PARERR | F_IBQULPPARERR |\ - F_IBQSGEHIPARERR | F_IBQSGELOPARERR | F_IBQNCSIPARERR) - /* * CIM interrupt handler. */ -static void cim_intr_handler(struct adapter *adapter) -{ - static const struct intr_info cim_intr_info[] = { - { F_PREFDROPINT, "CIM control register prefetch drop", -1, 1 }, - { CIM_OBQ_INTR, "CIM OBQ parity error", -1, 1 }, - { CIM_IBQ_INTR, "CIM IBQ parity error", -1, 1 }, - { F_MBUPPARERR, "CIM mailbox uP parity error", -1, 1 }, - { F_MBHOSTPARERR, "CIM mailbox host parity error", -1, 1 }, - { F_TIEQINPARERRINT, "CIM TIEQ outgoing parity error", -1, 1 }, - { F_TIEQOUTPARERRINT, "CIM TIEQ incoming parity error", -1, 1 }, - { F_TIMER0INT, "CIM TIMER0 interrupt", -1, 1 }, - { 0 } +static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose) +{ + static const struct intr_details cim_host_intr_details[] = { + /* T6+ */ + { F_PCIE2CIMINTFPARERR, "CIM IBQ PCIe interface parity error" }, + + /* T5+ */ + { F_MA_CIM_INTFPERR, "MA2CIM interface parity error" }, + { F_PLCIM_MSTRSPDATAPARERR, + "PL2CIM master response data parity error" }, + { F_NCSI2CIMINTFPARERR, "CIM IBQ NC-SI interface parity error" }, + { F_SGE2CIMINTFPARERR, "CIM IBQ SGE interface parity error" }, + { F_ULP2CIMINTFPARERR, "CIM IBQ ULP_TX interface parity error" }, + { F_TP2CIMINTFPARERR, "CIM IBQ TP interface parity error" }, + { F_OBQSGERX1PARERR, "CIM OBQ SGE1_RX parity error" }, + { F_OBQSGERX0PARERR, "CIM OBQ SGE0_RX parity error" }, + + /* T4+ */ + { F_TIEQOUTPARERRINT, "CIM TIEQ outgoing FIFO parity error" }, + { F_TIEQINPARERRINT, "CIM TIEQ incoming FIFO parity error" }, + { F_MBHOSTPARERR, "CIM mailbox host read parity error" }, + { F_MBUPPARERR, "CIM mailbox uP parity error" }, + { F_IBQTP0PARERR, "CIM IBQ TP0 parity error" }, + { F_IBQTP1PARERR, "CIM IBQ TP1 parity error" }, + { F_IBQULPPARERR, "CIM IBQ ULP parity error" }, + { F_IBQSGELOPARERR, "CIM IBQ SGE_LO parity error" }, + { F_IBQSGEHIPARERR | F_IBQPCIEPARERR, /* same bit */ + "CIM IBQ PCIe/SGE_HI parity error" }, + { F_IBQNCSIPARERR, "CIM IBQ NC-SI parity error" }, + { F_OBQULP0PARERR, "CIM OBQ ULP0 parity error" }, + { F_OBQULP1PARERR, "CIM OBQ ULP1 parity error" }, + { F_OBQULP2PARERR, "CIM OBQ ULP2 parity error" }, + { F_OBQULP3PARERR, "CIM OBQ ULP3 parity error" }, + { F_OBQSGEPARERR, "CIM OBQ SGE parity error" }, + { F_OBQNCSIPARERR, "CIM OBQ NC-SI parity error" }, + { F_TIMER1INT, "CIM TIMER0 interrupt" }, + { F_TIMER0INT, "CIM TIMER0 interrupt" }, + { F_PREFDROPINT, "CIM control register prefetch drop" }, + { 0} }; - static const struct intr_info cim_upintr_info[] = { - { F_RSVDSPACEINT, "CIM reserved space access", -1, 1 }, - { F_ILLTRANSINT, "CIM illegal transaction", -1, 1 }, - { F_ILLWRINT, "CIM illegal write", -1, 1 }, - { F_ILLRDINT, "CIM illegal read", -1, 1 }, - { F_ILLRDBEINT, "CIM illegal read BE", -1, 1 }, - { F_ILLWRBEINT, "CIM illegal write BE", -1, 1 }, - { F_SGLRDBOOTINT, "CIM single read from boot space", -1, 1 }, - { F_SGLWRBOOTINT, "CIM single write to boot space", -1, 1 }, - { F_BLKWRBOOTINT, "CIM block write to boot space", -1, 1 }, - { F_SGLRDFLASHINT, "CIM single read from flash space", -1, 1 }, - { F_SGLWRFLASHINT, "CIM single write to flash space", -1, 1 }, - { F_BLKWRFLASHINT, "CIM block write to flash space", -1, 1 }, - { F_SGLRDEEPROMINT, "CIM single EEPROM read", -1, 1 }, - { F_SGLWREEPROMINT, "CIM single EEPROM write", -1, 1 }, - { F_BLKRDEEPROMINT, "CIM block EEPROM read", -1, 1 }, - { F_BLKWREEPROMINT, "CIM block EEPROM write", -1, 1 }, - { F_SGLRDCTLINT , "CIM single read from CTL space", -1, 1 }, - { F_SGLWRCTLINT , "CIM single write to CTL space", -1, 1 }, - { F_BLKRDCTLINT , "CIM block read from CTL space", -1, 1 }, - { F_BLKWRCTLINT , "CIM block write to CTL space", -1, 1 }, - { F_SGLRDPLINT , "CIM single read from PL space", -1, 1 }, - { F_SGLWRPLINT , "CIM single write to PL space", -1, 1 }, - { F_BLKRDPLINT , "CIM block read from PL space", -1, 1 }, - { F_BLKWRPLINT , "CIM block write to PL space", -1, 1 }, - { F_REQOVRLOOKUPINT , "CIM request FIFO overwrite", -1, 1 }, - { F_RSPOVRLOOKUPINT , "CIM response FIFO overwrite", -1, 1 }, - { F_TIMEOUTINT , "CIM PIF timeout", -1, 1 }, - { F_TIMEOUTMAINT , "CIM PIF MA timeout", -1, 1 }, - { 0 } + struct intr_info cim_host_intr_info = { + .name = "CIM_HOST_INT_CAUSE", + .cause_reg = A_CIM_HOST_INT_CAUSE, + .enable_reg = A_CIM_HOST_INT_ENABLE, + .fatal = 0, + .details = cim_host_intr_details, + .actions = NULL, + }; + static const struct intr_details cim_host_upacc_intr_details[] = { + { F_EEPROMWRINT, "CIM EEPROM came out of busy state" }, + { F_TIMEOUTMAINT, "CIM PIF MA timeout" }, + { F_TIMEOUTINT, "CIM PIF timeout" }, + { F_RSPOVRLOOKUPINT, "CIM response FIFO overwrite" }, + { F_REQOVRLOOKUPINT, "CIM request FIFO overwrite" }, + { F_BLKWRPLINT, "CIM block write to PL space" }, + { F_BLKRDPLINT, "CIM block read from PL space" }, + { F_SGLWRPLINT, + "CIM single write to PL space with illegal BEs" }, + { F_SGLRDPLINT, + "CIM single read from PL space with illegal BEs" }, + { F_BLKWRCTLINT, "CIM block write to CTL space" }, + { F_BLKRDCTLINT, "CIM block read from CTL space" }, + { F_SGLWRCTLINT, + "CIM single write to CTL space with illegal BEs" }, + { F_SGLRDCTLINT, + "CIM single read from CTL space with illegal BEs" }, + { F_BLKWREEPROMINT, "CIM block write to EEPROM space" }, + { F_BLKRDEEPROMINT, "CIM block read from EEPROM space" }, + { F_SGLWREEPROMINT, + "CIM single write to EEPROM space with illegal BEs" }, + { F_SGLRDEEPROMINT, + "CIM single read from EEPROM space with illegal BEs" }, + { F_BLKWRFLASHINT, "CIM block write to flash space" }, + { F_BLKRDFLASHINT, "CIM block read from flash space" }, + { F_SGLWRFLASHINT, "CIM single write to flash space" }, + { F_SGLRDFLASHINT, + "CIM single read from flash space with illegal BEs" }, + { F_BLKWRBOOTINT, "CIM block write to boot space" }, + { F_BLKRDBOOTINT, "CIM block read from boot space" }, + { F_SGLWRBOOTINT, "CIM single write to boot space" }, + { F_SGLRDBOOTINT, + "CIM single read from boot space with illegal BEs" }, + { F_ILLWRBEINT, "CIM illegal write BEs" }, + { F_ILLRDBEINT, "CIM illegal read BEs" }, + { F_ILLRDINT, "CIM illegal read" }, + { F_ILLWRINT, "CIM illegal write" }, + { F_ILLTRANSINT, "CIM illegal transaction" }, + { F_RSVDSPACEINT, "CIM reserved space access" }, + {0} + }; + static const struct intr_info cim_host_upacc_intr_info = { + .name = "CIM_HOST_UPACC_INT_CAUSE", + .cause_reg = A_CIM_HOST_UPACC_INT_CAUSE, + .enable_reg = A_CIM_HOST_UPACC_INT_ENABLE, + .fatal = 0x3fffeeff, + .details = cim_host_upacc_intr_details, + .actions = NULL, + }; + static const struct intr_info cim_pf_host_intr_info = { + .name = "CIM_PF_HOST_INT_CAUSE", + .cause_reg = MYPF_REG(A_CIM_PF_HOST_INT_CAUSE), + .enable_reg = MYPF_REG(A_CIM_PF_HOST_INT_ENABLE), + .fatal = 0, + .details = NULL, + .actions = NULL, }; u32 val, fw_err; - int fat; + bool fatal; - fw_err = t4_read_reg(adapter, A_PCIE_FW); + fw_err = t4_read_reg(adap, A_PCIE_FW); if (fw_err & F_PCIE_FW_ERR) - t4_report_fw_error(adapter); + t4_report_fw_error(adap); - /* When the Firmware detects an internal error which normally wouldn't + /* + * When the Firmware detects an internal error which normally wouldn't * raise a Host Interrupt, it forces a CIM Timer0 interrupt in order * to make sure the Host sees the Firmware Crash. So if we have a * Timer0 interrupt and don't see a Firmware Crash, ignore the Timer0 * interrupt. */ - val = t4_read_reg(adapter, A_CIM_HOST_INT_CAUSE); - if (val & F_TIMER0INT) - if (!(fw_err & F_PCIE_FW_ERR) || - (G_PCIE_FW_EVAL(fw_err) != PCIE_FW_EVAL_CRASH)) - t4_write_reg(adapter, A_CIM_HOST_INT_CAUSE, - F_TIMER0INT); + val = t4_read_reg(adap, A_CIM_HOST_INT_CAUSE); + if (val & F_TIMER0INT && (!(fw_err & F_PCIE_FW_ERR) || + G_PCIE_FW_EVAL(fw_err) != PCIE_FW_EVAL_CRASH)) { + t4_write_reg(adap, A_CIM_HOST_INT_CAUSE, F_TIMER0INT); + } + + fatal = false; + if (is_t4(adap)) + cim_host_intr_info.fatal = 0x001fffe2; + else if (is_t5(adap)) + cim_host_intr_info.fatal = 0x007dffe2; + else + cim_host_intr_info.fatal = 0x007dffe6; + fatal |= t4_handle_intr(adap, &cim_host_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &cim_host_upacc_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &cim_pf_host_intr_info, 0, verbose); - fat = t4_handle_intr_status(adapter, A_CIM_HOST_INT_CAUSE, - cim_intr_info) + - t4_handle_intr_status(adapter, A_CIM_HOST_UPACC_INT_CAUSE, - cim_upintr_info); - if (fat) - t4_fatal_err(adapter); + return (fatal); } /* * ULP RX interrupt handler. */ -static void ulprx_intr_handler(struct adapter *adapter) +static bool ulprx_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info ulprx_intr_info[] = { - { F_CAUSE_CTX_1, "ULPRX channel 1 context error", -1, 1 }, - { F_CAUSE_CTX_0, "ULPRX channel 0 context error", -1, 1 }, - { 0x7fffff, "ULPRX parity error", -1, 1 }, + static const struct intr_details ulprx_intr_details[] = { + /* T5+ */ + { F_SE_CNT_MISMATCH_1, "ULPRX SE count mismatch in channel 1" }, + { F_SE_CNT_MISMATCH_0, "ULPRX SE count mismatch in channel 0" }, + + /* T4+ */ + { F_CAUSE_CTX_1, "ULPRX channel 1 context error" }, + { F_CAUSE_CTX_0, "ULPRX channel 0 context error" }, + { 0x007fffff, "ULPRX parity error" }, { 0 } }; + static const struct intr_info ulprx_intr_info = { + .name = "ULP_RX_INT_CAUSE", + .cause_reg = A_ULP_RX_INT_CAUSE, + .enable_reg = A_ULP_RX_INT_ENABLE, + .fatal = 0x07ffffff, + .details = ulprx_intr_details, + .actions = NULL, + }; + static const struct intr_info ulprx_intr2_info = { + .name = "ULP_RX_INT_CAUSE_2", + .cause_reg = A_ULP_RX_INT_CAUSE_2, + .enable_reg = A_ULP_RX_INT_ENABLE_2, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; + + fatal |= t4_handle_intr(adap, &ulprx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ulprx_intr2_info, 0, verbose); - if (t4_handle_intr_status(adapter, A_ULP_RX_INT_CAUSE, ulprx_intr_info)) - t4_fatal_err(adapter); + return (fatal); } /* * ULP TX interrupt handler. */ -static void ulptx_intr_handler(struct adapter *adapter) -{ - static const struct intr_info ulptx_intr_info[] = { - { F_PBL_BOUND_ERR_CH3, "ULPTX channel 3 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH2, "ULPTX channel 2 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH1, "ULPTX channel 1 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH0, "ULPTX channel 0 PBL out of bounds", -1, - 0 }, - { 0xfffffff, "ULPTX parity error", -1, 1 }, +static bool ulptx_intr_handler(struct adapter *adap, int arg, bool verbose) +{ + static const struct intr_details ulptx_intr_details[] = { + { F_PBL_BOUND_ERR_CH3, "ULPTX channel 3 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH2, "ULPTX channel 2 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH1, "ULPTX channel 1 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH0, "ULPTX channel 0 PBL out of bounds" }, + { 0x0fffffff, "ULPTX parity error" }, { 0 } }; + static const struct intr_info ulptx_intr_info = { + .name = "ULP_TX_INT_CAUSE", + .cause_reg = A_ULP_TX_INT_CAUSE, + .enable_reg = A_ULP_TX_INT_ENABLE, + .fatal = 0x0fffffff, + .details = ulptx_intr_details, + .actions = NULL, + }; + static const struct intr_info ulptx_intr2_info = { + .name = "ULP_TX_INT_CAUSE_2", + .cause_reg = A_ULP_TX_INT_CAUSE_2, + .enable_reg = A_ULP_TX_INT_ENABLE_2, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; + + fatal |= t4_handle_intr(adap, &ulptx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ulptx_intr2_info, 0, verbose); - if (t4_handle_intr_status(adapter, A_ULP_TX_INT_CAUSE, ulptx_intr_info)) - t4_fatal_err(adapter); + return (fatal); +} + +static bool pmtx_dump_dbg_stats(struct adapter *adap, int arg, bool verbose) +{ + int i; + u32 data[17]; + + t4_read_indirect(adap, A_PM_TX_DBG_CTRL, A_PM_TX_DBG_DATA, &data[0], + ARRAY_SIZE(data), A_PM_TX_DBG_STAT0); + for (i = 0; i < ARRAY_SIZE(data); i++) { + CH_ALERT(adap, " - PM_TX_DBG_STAT%u (0x%x) = 0x%08x\n", i, + A_PM_TX_DBG_STAT0 + i, data[i]); + } + + return (false); } /* * PM TX interrupt handler. */ -static void pmtx_intr_handler(struct adapter *adapter) -{ - static const struct intr_info pmtx_intr_info[] = { - { F_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large", -1, 1 }, - { F_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large", -1, 1 }, - { F_PCMD_LEN_OVFL2, "PMTX channel 2 pcmd too large", -1, 1 }, - { F_ZERO_C_CMD_ERROR, "PMTX 0-length pcmd", -1, 1 }, - { 0xffffff0, "PMTX framing error", -1, 1 }, - { F_OESPI_PAR_ERROR, "PMTX oespi parity error", -1, 1 }, - { F_DB_OPTIONS_PAR_ERROR, "PMTX db_options parity error", -1, - 1 }, - { F_ICSPI_PAR_ERROR, "PMTX icspi parity error", -1, 1 }, - { F_C_PCMD_PAR_ERROR, "PMTX c_pcmd parity error", -1, 1}, +static bool pmtx_intr_handler(struct adapter *adap, int arg, bool verbose) +{ + static const struct intr_action pmtx_intr_actions[] = { + { 0xffffffff, 0, pmtx_dump_dbg_stats }, + { 0 }, + }; + static const struct intr_details pmtx_intr_details[] = { + { F_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large" }, + { F_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large" }, + { F_PCMD_LEN_OVFL2, "PMTX channel 2 pcmd too large" }, + { F_ZERO_C_CMD_ERROR, "PMTX 0-length pcmd" }, + { 0x0f000000, "PMTX icspi FIFO2X Rx framing error" }, + { 0x00f00000, "PMTX icspi FIFO Rx framing error" }, + { 0x000f0000, "PMTX icspi FIFO Tx framing error" }, + { 0x0000f000, "PMTX oespi FIFO Rx framing error" }, + { 0x00000f00, "PMTX oespi FIFO Tx framing error" }, + { 0x000000f0, "PMTX oespi FIFO2X Tx framing error" }, + { F_OESPI_PAR_ERROR, "PMTX oespi parity error" }, + { F_DB_OPTIONS_PAR_ERROR, "PMTX db_options parity error" }, + { F_ICSPI_PAR_ERROR, "PMTX icspi parity error" }, + { F_C_PCMD_PAR_ERROR, "PMTX c_pcmd parity error" }, { 0 } }; + static const struct intr_info pmtx_intr_info = { + .name = "PM_TX_INT_CAUSE", + .cause_reg = A_PM_TX_INT_CAUSE, + .enable_reg = A_PM_TX_INT_ENABLE, + .fatal = 0xffffffff, + .details = pmtx_intr_details, + .actions = pmtx_intr_actions, + }; - if (t4_handle_intr_status(adapter, A_PM_TX_INT_CAUSE, pmtx_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &pmtx_intr_info, 0, verbose)); } /* * PM RX interrupt handler. */ -static void pmrx_intr_handler(struct adapter *adapter) -{ - static const struct intr_info pmrx_intr_info[] = { - { F_ZERO_E_CMD_ERROR, "PMRX 0-length pcmd", -1, 1 }, - { 0x3ffff0, "PMRX framing error", -1, 1 }, - { F_OCSPI_PAR_ERROR, "PMRX ocspi parity error", -1, 1 }, - { F_DB_OPTIONS_PAR_ERROR, "PMRX db_options parity error", -1, - 1 }, - { F_IESPI_PAR_ERROR, "PMRX iespi parity error", -1, 1 }, - { F_E_PCMD_PAR_ERROR, "PMRX e_pcmd parity error", -1, 1}, +static bool pmrx_intr_handler(struct adapter *adap, int arg, bool verbose) +{ + static const struct intr_details pmrx_intr_details[] = { + /* T6+ */ + { 0x18000000, "PMRX ospi overflow" }, + { F_MA_INTF_SDC_ERR, "PMRX MA interface SDC parity error" }, + { F_BUNDLE_LEN_PARERR, "PMRX bundle len FIFO parity error" }, + { F_BUNDLE_LEN_OVFL, "PMRX bundle len FIFO overflow" }, + { F_SDC_ERR, "PMRX SDC error" }, + + /* T4+ */ + { F_ZERO_E_CMD_ERROR, "PMRX 0-length pcmd" }, + { 0x003c0000, "PMRX iespi FIFO2X Rx framing error" }, + { 0x0003c000, "PMRX iespi Rx framing error" }, + { 0x00003c00, "PMRX iespi Tx framing error" }, + { 0x00000300, "PMRX ocspi Rx framing error" }, + { 0x000000c0, "PMRX ocspi Tx framing error" }, + { 0x00000030, "PMRX ocspi FIFO2X Tx framing error" }, + { F_OCSPI_PAR_ERROR, "PMRX ocspi parity error" }, + { F_DB_OPTIONS_PAR_ERROR, "PMRX db_options parity error" }, + { F_IESPI_PAR_ERROR, "PMRX iespi parity error" }, + { F_E_PCMD_PAR_ERROR, "PMRX e_pcmd parity error"}, { 0 } }; + static const struct intr_info pmrx_intr_info = { + .name = "PM_RX_INT_CAUSE", + .cause_reg = A_PM_RX_INT_CAUSE, + .enable_reg = A_PM_RX_INT_ENABLE, + .fatal = 0x1fffffff, + .details = pmrx_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adapter, A_PM_RX_INT_CAUSE, pmrx_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &pmrx_intr_info, 0, verbose)); } /* * CPL switch interrupt handler. */ -static void cplsw_intr_handler(struct adapter *adapter) +static bool cplsw_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info cplsw_intr_info[] = { - { F_CIM_OP_MAP_PERR, "CPLSW CIM op_map parity error", -1, 1 }, - { F_CIM_OVFL_ERROR, "CPLSW CIM overflow", -1, 1 }, - { F_TP_FRAMING_ERROR, "CPLSW TP framing error", -1, 1 }, - { F_SGE_FRAMING_ERROR, "CPLSW SGE framing error", -1, 1 }, - { F_CIM_FRAMING_ERROR, "CPLSW CIM framing error", -1, 1 }, - { F_ZERO_SWITCH_ERROR, "CPLSW no-switch error", -1, 1 }, + static const struct intr_details cplsw_intr_details[] = { + /* T5+ */ + { F_PERR_CPL_128TO128_1, "CPLSW 128TO128 FIFO1 parity error" }, + { F_PERR_CPL_128TO128_0, "CPLSW 128TO128 FIFO0 parity error" }, + + /* T4+ */ + { F_CIM_OP_MAP_PERR, "CPLSW CIM op_map parity error" }, + { F_CIM_OVFL_ERROR, "CPLSW CIM overflow" }, + { F_TP_FRAMING_ERROR, "CPLSW TP framing error" }, + { F_SGE_FRAMING_ERROR, "CPLSW SGE framing error" }, + { F_CIM_FRAMING_ERROR, "CPLSW CIM framing error" }, + { F_ZERO_SWITCH_ERROR, "CPLSW no-switch error" }, { 0 } }; + struct intr_info cplsw_intr_info = { + .name = "CPL_INTR_CAUSE", + .cause_reg = A_CPL_INTR_CAUSE, + .enable_reg = A_CPL_INTR_ENABLE, + .fatal = 0, + .details = cplsw_intr_details, + .actions = NULL, + }; + + if (is_t4(adap)) + cplsw_intr_info.fatal = 0x2f; + else if (is_t5(adap)) + cplsw_intr_info.fatal = 0xef; + else + cplsw_intr_info.fatal = 0xff; - if (t4_handle_intr_status(adapter, A_CPL_INTR_CAUSE, cplsw_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &cplsw_intr_info, 0, verbose)); } +#define T4_LE_FATAL_MASK (F_PARITYERR | F_UNKNOWNCMD | F_REQQPARERR) +#define T6_LE_PERRCRC_MASK (F_PIPELINEERR | F_CLIPTCAMACCFAIL | \ + F_SRVSRAMACCFAIL | F_CLCAMCRCPARERR | F_CLCAMINTPERR | F_SSRAMINTPERR | \ + F_SRVSRAMPERR | F_VFSRAMPERR | F_TCAMINTPERR | F_TCAMCRCERR | \ + F_HASHTBLMEMACCERR | F_MAIFWRINTPERR | F_HASHTBLMEMCRCERR) +#define T6_LE_FATAL_MASK (T6_LE_PERRCRC_MASK | F_T6_UNKNOWNCMD | \ + F_TCAMACCFAIL | F_HASHTBLACCFAIL | F_CMDTIDERR | F_CMDPRSRINTERR | \ + F_TOTCNTERR | F_CLCAMFIFOERR | F_CLIPSUBERR) + /* * LE interrupt handler. */ -static void le_intr_handler(struct adapter *adap) +static bool le_intr_handler(struct adapter *adap, int arg, bool verbose) { - unsigned int chip_ver = chip_id(adap); - static const struct intr_info le_intr_info[] = { - { F_LIPMISS, "LE LIP miss", -1, 0 }, - { F_LIP0, "LE 0 LIP error", -1, 0 }, - { F_PARITYERR, "LE parity error", -1, 1 }, - { F_UNKNOWNCMD, "LE unknown command", -1, 1 }, - { F_REQQPARERR, "LE request queue parity error", -1, 1 }, + static const struct intr_details le_intr_details[] = { + { F_REQQPARERR, "LE request queue parity error" }, + { F_UNKNOWNCMD, "LE unknown command" }, + { F_ACTRGNFULL, "LE active region full" }, + { F_PARITYERR, "LE parity error" }, + { F_LIPMISS, "LE LIP miss" }, + { F_LIP0, "LE 0 LIP error" }, { 0 } }; - - static const struct intr_info t6_le_intr_info[] = { - { F_T6_LIPMISS, "LE LIP miss", -1, 0 }, - { F_T6_LIP0, "LE 0 LIP error", -1, 0 }, - { F_TCAMINTPERR, "LE parity error", -1, 1 }, - { F_T6_UNKNOWNCMD, "LE unknown command", -1, 1 }, - { F_SSRAMINTPERR, "LE request queue parity error", -1, 1 }, + static const struct intr_details t6_le_intr_details[] = { + { F_CLIPSUBERR, "LE CLIP CAM reverse substitution error" }, + { F_CLCAMFIFOERR, "LE CLIP CAM internal FIFO error" }, + { F_CTCAMINVLDENT, "Invalid IPv6 CLIP TCAM entry" }, + { F_TCAMINVLDENT, "Invalid IPv6 TCAM entry" }, + { F_TOTCNTERR, "LE total active < TCAM count" }, + { F_CMDPRSRINTERR, "LE internal error in parser" }, + { F_CMDTIDERR, "Incorrect tid in LE command" }, + { F_T6_ACTRGNFULL, "LE active region full" }, + { F_T6_ACTCNTIPV6TZERO, "LE IPv6 active open TCAM counter -ve" }, + { F_T6_ACTCNTIPV4TZERO, "LE IPv4 active open TCAM counter -ve" }, + { F_T6_ACTCNTIPV6ZERO, "LE IPv6 active open counter -ve" }, + { F_T6_ACTCNTIPV4ZERO, "LE IPv4 active open counter -ve" }, + { F_HASHTBLACCFAIL, "Hash table read error (proto conflict)" }, + { F_TCAMACCFAIL, "LE TCAM access failure" }, + { F_T6_UNKNOWNCMD, "LE unknown command" }, + { F_T6_LIP0, "LE found 0 LIP during CLIP substitution" }, + { F_T6_LIPMISS, "LE CLIP lookup miss" }, + { T6_LE_PERRCRC_MASK, "LE parity/CRC error" }, { 0 } }; + struct intr_info le_intr_info = { + .name = "LE_DB_INT_CAUSE", + .cause_reg = A_LE_DB_INT_CAUSE, + .enable_reg = A_LE_DB_INT_ENABLE, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + + if (chip_id(adap) <= CHELSIO_T5) { + le_intr_info.details = le_intr_details; + le_intr_info.fatal = T4_LE_FATAL_MASK; + if (is_t5(adap)) + le_intr_info.fatal |= F_VFPARERR; + } else { + le_intr_info.details = t6_le_intr_details; + le_intr_info.fatal = T6_LE_FATAL_MASK; + } - if (t4_handle_intr_status(adap, A_LE_DB_INT_CAUSE, - (chip_ver <= CHELSIO_T5) ? - le_intr_info : t6_le_intr_info)) - t4_fatal_err(adap); + return (t4_handle_intr(adap, &le_intr_info, 0, verbose)); } /* * MPS interrupt handler. */ -static void mps_intr_handler(struct adapter *adapter) +static bool mps_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info mps_rx_intr_info[] = { - { 0xffffff, "MPS Rx parity error", -1, 1 }, + static const struct intr_details mps_rx_perr_intr_details[] = { + { 0xffffffff, "MPS Rx parity error" }, { 0 } }; - static const struct intr_info mps_tx_intr_info[] = { - { V_TPFIFO(M_TPFIFO), "MPS Tx TP FIFO parity error", -1, 1 }, - { F_NCSIFIFO, "MPS Tx NC-SI FIFO parity error", -1, 1 }, - { V_TXDATAFIFO(M_TXDATAFIFO), "MPS Tx data FIFO parity error", - -1, 1 }, - { V_TXDESCFIFO(M_TXDESCFIFO), "MPS Tx desc FIFO parity error", - -1, 1 }, - { F_BUBBLE, "MPS Tx underflow", -1, 1 }, - { F_SECNTERR, "MPS Tx SOP/EOP error", -1, 1 }, - { F_FRMERR, "MPS Tx framing error", -1, 1 }, + static const struct intr_info mps_rx_perr_intr_info = { + .name = "MPS_RX_PERR_INT_CAUSE", + .cause_reg = A_MPS_RX_PERR_INT_CAUSE, + .enable_reg = A_MPS_RX_PERR_INT_ENABLE, + .fatal = 0xffffffff, + .details = mps_rx_perr_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_tx_intr_details[] = { + { F_PORTERR, "MPS Tx destination port is disabled" }, + { F_FRMERR, "MPS Tx framing error" }, + { F_SECNTERR, "MPS Tx SOP/EOP error" }, + { F_BUBBLE, "MPS Tx underflow" }, + { V_TXDESCFIFO(M_TXDESCFIFO), "MPS Tx desc FIFO parity error" }, + { V_TXDATAFIFO(M_TXDATAFIFO), "MPS Tx data FIFO parity error" }, + { F_NCSIFIFO, "MPS Tx NC-SI FIFO parity error" }, + { V_TPFIFO(M_TPFIFO), "MPS Tx TP FIFO parity error" }, { 0 } }; - static const struct intr_info mps_trc_intr_info[] = { - { V_FILTMEM(M_FILTMEM), "MPS TRC filter parity error", -1, 1 }, - { V_PKTFIFO(M_PKTFIFO), "MPS TRC packet FIFO parity error", -1, - 1 }, - { F_MISCPERR, "MPS TRC misc parity error", -1, 1 }, + struct intr_info mps_tx_intr_info = { + .name = "MPS_TX_INT_CAUSE", + .cause_reg = A_MPS_TX_INT_CAUSE, + .enable_reg = A_MPS_TX_INT_ENABLE, + .fatal = 0x1ffff, + .details = mps_tx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_trc_intr_details[] = { + { F_MISCPERR, "MPS TRC misc parity error" }, + { V_PKTFIFO(M_PKTFIFO), "MPS TRC packet FIFO parity error" }, + { V_FILTMEM(M_FILTMEM), "MPS TRC filter parity error" }, { 0 } }; - static const struct intr_info mps_stat_sram_intr_info[] = { - { 0x1fffff, "MPS statistics SRAM parity error", -1, 1 }, + static const struct intr_info mps_trc_intr_info = { + .name = "MPS_TRC_INT_CAUSE", + .cause_reg = A_MPS_TRC_INT_CAUSE, + .enable_reg = A_MPS_TRC_INT_ENABLE, + .fatal = F_MISCPERR | V_PKTFIFO(M_PKTFIFO) | V_FILTMEM(M_FILTMEM), + .details = mps_trc_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_sram_intr_details[] = { + { 0xffffffff, "MPS statistics SRAM parity error" }, { 0 } }; - static const struct intr_info mps_stat_tx_intr_info[] = { - { 0xfffff, "MPS statistics Tx FIFO parity error", -1, 1 }, + static const struct intr_info mps_stat_sram_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_SRAM", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_SRAM, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_SRAM, + .fatal = 0x1fffffff, + .details = mps_stat_sram_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_tx_intr_details[] = { + { 0xffffff, "MPS statistics Tx FIFO parity error" }, { 0 } }; - static const struct intr_info mps_stat_rx_intr_info[] = { - { 0xffffff, "MPS statistics Rx FIFO parity error", -1, 1 }, + static const struct intr_info mps_stat_tx_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_TX_FIFO", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_TX_FIFO, + .fatal = 0xffffff, + .details = mps_stat_tx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_rx_intr_details[] = { + { 0xffffff, "MPS statistics Rx FIFO parity error" }, { 0 } }; - static const struct intr_info mps_cls_intr_info[] = { - { F_MATCHSRAM, "MPS match SRAM parity error", -1, 1 }, - { F_MATCHTCAM, "MPS match TCAM parity error", -1, 1 }, - { F_HASHSRAM, "MPS hash SRAM parity error", -1, 1 }, + static const struct intr_info mps_stat_rx_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_RX_FIFO", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_RX_FIFO, + .fatal = 0xffffff, + .details = mps_stat_rx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_cls_intr_details[] = { + { F_HASHSRAM, "MPS hash SRAM parity error" }, + { F_MATCHTCAM, "MPS match TCAM parity error" }, + { F_MATCHSRAM, "MPS match SRAM parity error" }, + { 0 } + }; + static const struct intr_info mps_cls_intr_info = { + .name = "MPS_CLS_INT_CAUSE", + .cause_reg = A_MPS_CLS_INT_CAUSE, + .enable_reg = A_MPS_CLS_INT_ENABLE, + .fatal = F_MATCHSRAM | F_MATCHTCAM | F_HASHSRAM, + .details = mps_cls_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_sram1_intr_details[] = { + { 0xff, "MPS statistics SRAM1 parity error" }, { 0 } }; + static const struct intr_info mps_stat_sram1_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_SRAM1", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_SRAM1, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_SRAM1, + .fatal = 0xff, + .details = mps_stat_sram1_intr_details, + .actions = NULL, + }; - int fat; + bool fatal; - fat = t4_handle_intr_status(adapter, A_MPS_RX_PERR_INT_CAUSE, - mps_rx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_TX_INT_CAUSE, - mps_tx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_TRC_INT_CAUSE, - mps_trc_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_SRAM, - mps_stat_sram_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, - mps_stat_tx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, - mps_stat_rx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_CLS_INT_CAUSE, - mps_cls_intr_info); + if (chip_id(adap) == CHELSIO_T6) + mps_tx_intr_info.fatal &= ~F_BUBBLE; - t4_write_reg(adapter, A_MPS_INT_CAUSE, 0); - t4_read_reg(adapter, A_MPS_INT_CAUSE); /* flush */ - if (fat) - t4_fatal_err(adapter); -} + fatal = false; + fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_tx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_trc_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_sram_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_tx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_rx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_cls_intr_info, 0, verbose); + if (chip_id(adap) > CHELSIO_T4) { + fatal |= t4_handle_intr(adap, &mps_stat_sram1_intr_info, 0, + verbose); + } + + t4_write_reg(adap, A_MPS_INT_CAUSE, is_t4(adap) ? 0 : 0xffffffff); + t4_read_reg(adap, A_MPS_INT_CAUSE); /* flush */ -#define MEM_INT_MASK (F_PERR_INT_CAUSE | F_ECC_CE_INT_CAUSE | \ - F_ECC_UE_INT_CAUSE) + return (fatal); + +} /* * EDC/MC interrupt handler. */ -static void mem_intr_handler(struct adapter *adapter, int idx) +static bool mem_intr_handler(struct adapter *adap, int idx, bool verbose) { - static const char name[4][7] = { "EDC0", "EDC1", "MC/MC0", "MC1" }; + static const char name[4][5] = { "EDC0", "EDC1", "MC0", "MC1" }; + unsigned int count_reg, v; + static const struct intr_details mem_intr_details[] = { + { F_ECC_UE_INT_CAUSE, "Uncorrectable ECC data error(s)" }, + { F_ECC_CE_INT_CAUSE, "Correctable ECC data error(s)" }, + { F_PERR_INT_CAUSE, "FIFO parity error" }, + { 0 } + }; + struct intr_info ii = { + .fatal = F_PERR_INT_CAUSE | F_ECC_UE_INT_CAUSE, + .details = mem_intr_details, + .actions = NULL, + }; + bool fatal; + + switch (idx) { + case MEM_EDC0: + ii.name = "EDC0_INT_CAUSE"; + ii.cause_reg = EDC_REG(A_EDC_INT_CAUSE, 0); + ii.enable_reg = EDC_REG(A_EDC_INT_ENABLE, 0); + count_reg = EDC_REG(A_EDC_ECC_STATUS, 0); + break; + case MEM_EDC1: + ii.name = "EDC1_INT_CAUSE"; + ii.cause_reg = EDC_REG(A_EDC_INT_CAUSE, 1); + ii.enable_reg = EDC_REG(A_EDC_INT_ENABLE, 1); + count_reg = EDC_REG(A_EDC_ECC_STATUS, 1); + break; + case MEM_MC0: + ii.name = "MC0_INT_CAUSE"; + if (is_t4(adap)) { + ii.cause_reg = A_MC_INT_CAUSE; + ii.enable_reg = A_MC_INT_ENABLE; + count_reg = A_MC_ECC_STATUS; + } else { + ii.cause_reg = A_MC_P_INT_CAUSE; + ii.enable_reg = A_MC_P_INT_ENABLE; + count_reg = A_MC_P_ECC_STATUS; + } + break; + case MEM_MC1: + ii.name = "MC1_INT_CAUSE"; + ii.cause_reg = MC_REG(A_MC_P_INT_CAUSE, 1); + ii.enable_reg = MC_REG(A_MC_P_INT_ENABLE, 1); + count_reg = MC_REG(A_MC_P_ECC_STATUS, 1); + break; + } - unsigned int addr, cnt_addr, v; + fatal = t4_handle_intr(adap, &ii, 0, verbose); - if (idx <= MEM_EDC1) { - addr = EDC_REG(A_EDC_INT_CAUSE, idx); - cnt_addr = EDC_REG(A_EDC_ECC_STATUS, idx); - } else if (idx == MEM_MC) { - if (is_t4(adapter)) { - addr = A_MC_INT_CAUSE; - cnt_addr = A_MC_ECC_STATUS; - } else { - addr = A_MC_P_INT_CAUSE; - cnt_addr = A_MC_P_ECC_STATUS; + v = t4_read_reg(adap, count_reg); + if (v != 0) { + if (G_ECC_UECNT(v) != 0) { + CH_ALERT(adap, + "%s: %u uncorrectable ECC data error(s)\n", + name[idx], G_ECC_UECNT(v)); } - } else { - addr = MC_REG(A_MC_P_INT_CAUSE, 1); - cnt_addr = MC_REG(A_MC_P_ECC_STATUS, 1); + if (G_ECC_CECNT(v) != 0) { + if (idx <= MEM_EDC1) + t4_edc_err_read(adap, idx); + CH_WARN_RATELIMIT(adap, + "%s: %u correctable ECC data error(s)\n", + name[idx], G_ECC_CECNT(v)); + } + t4_write_reg(adap, count_reg, 0xffffffff); } - v = t4_read_reg(adapter, addr) & MEM_INT_MASK; - if (v & F_PERR_INT_CAUSE) - CH_ALERT(adapter, "%s FIFO parity error\n", - name[idx]); - if (v & F_ECC_CE_INT_CAUSE) { - u32 cnt = G_ECC_CECNT(t4_read_reg(adapter, cnt_addr)); + return (fatal); +} - if (idx <= MEM_EDC1) - t4_edc_err_read(adapter, idx); +static bool ma_wrap_status(struct adapter *adap, int arg, bool verbose) +{ + u32 v; - t4_write_reg(adapter, cnt_addr, V_ECC_CECNT(M_ECC_CECNT)); - CH_WARN_RATELIMIT(adapter, - "%u %s correctable ECC data error%s\n", - cnt, name[idx], cnt > 1 ? "s" : ""); - } - if (v & F_ECC_UE_INT_CAUSE) - CH_ALERT(adapter, - "%s uncorrectable ECC data error\n", name[idx]); + v = t4_read_reg(adap, A_MA_INT_WRAP_STATUS); + CH_ALERT(adap, + "MA address wrap-around error by client %u to address %#x\n", + G_MEM_WRAP_CLIENT_NUM(v), G_MEM_WRAP_ADDRESS(v) << 4); + t4_write_reg(adap, A_MA_INT_WRAP_STATUS, v); - t4_write_reg(adapter, addr, v); - if (v & (F_PERR_INT_CAUSE | F_ECC_UE_INT_CAUSE)) - t4_fatal_err(adapter); + return (false); } + /* * MA interrupt handler. */ -static void ma_intr_handler(struct adapter *adapter) +static bool ma_intr_handler(struct adapter *adap, int arg, bool verbose) { - u32 v, status = t4_read_reg(adapter, A_MA_INT_CAUSE); + static const struct intr_action ma_intr_actions[] = { + { F_MEM_WRAP_INT_CAUSE, 0, ma_wrap_status }, + { 0 }, + }; + static const struct intr_info ma_intr_info = { + .name = "MA_INT_CAUSE", + .cause_reg = A_MA_INT_CAUSE, + .enable_reg = A_MA_INT_ENABLE, + .fatal = F_MEM_WRAP_INT_CAUSE | F_MEM_PERR_INT_CAUSE | + F_MEM_TO_INT_CAUSE, + .details = NULL, + .actions = ma_intr_actions, + }; + static const struct intr_info ma_perr_status1 = { + .name = "MA_PARITY_ERROR_STATUS1", + .cause_reg = A_MA_PARITY_ERROR_STATUS1, + .enable_reg = A_MA_PARITY_ERROR_ENABLE1, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info ma_perr_status2 = { + .name = "MA_PARITY_ERROR_STATUS2", + .cause_reg = A_MA_PARITY_ERROR_STATUS2, + .enable_reg = A_MA_PARITY_ERROR_ENABLE2, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + bool fatal; - if (status & F_MEM_PERR_INT_CAUSE) { - CH_ALERT(adapter, - "MA parity error, parity status %#x\n", - t4_read_reg(adapter, A_MA_PARITY_ERROR_STATUS1)); - if (is_t5(adapter)) - CH_ALERT(adapter, - "MA parity error, parity status %#x\n", - t4_read_reg(adapter, - A_MA_PARITY_ERROR_STATUS2)); - } - if (status & F_MEM_WRAP_INT_CAUSE) { - v = t4_read_reg(adapter, A_MA_INT_WRAP_STATUS); - CH_ALERT(adapter, "MA address wrap-around error by " - "client %u to address %#x\n", - G_MEM_WRAP_CLIENT_NUM(v), - G_MEM_WRAP_ADDRESS(v) << 4); - } - t4_write_reg(adapter, A_MA_INT_CAUSE, status); - t4_fatal_err(adapter); + fatal = false; + fatal |= t4_handle_intr(adap, &ma_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ma_perr_status1, 0, verbose); + if (chip_id(adap) > CHELSIO_T4) + fatal |= t4_handle_intr(adap, &ma_perr_status2, 0, verbose); + + return (fatal); } /* * SMB interrupt handler. */ -static void smb_intr_handler(struct adapter *adap) +static bool smb_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info smb_intr_info[] = { - { F_MSTTXFIFOPARINT, "SMB master Tx FIFO parity error", -1, 1 }, - { F_MSTRXFIFOPARINT, "SMB master Rx FIFO parity error", -1, 1 }, - { F_SLVFIFOPARINT, "SMB slave FIFO parity error", -1, 1 }, + static const struct intr_details smb_intr_details[] = { + { F_MSTTXFIFOPARINT, "SMB master Tx FIFO parity error" }, + { F_MSTRXFIFOPARINT, "SMB master Rx FIFO parity error" }, + { F_SLVFIFOPARINT, "SMB slave FIFO parity error" }, { 0 } }; + static const struct intr_info smb_intr_info = { + .name = "SMB_INT_CAUSE", + .cause_reg = A_SMB_INT_CAUSE, + .enable_reg = A_SMB_INT_ENABLE, + .fatal = F_SLVFIFOPARINT | F_MSTRXFIFOPARINT | F_MSTTXFIFOPARINT, + .details = smb_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adap, A_SMB_INT_CAUSE, smb_intr_info)) - t4_fatal_err(adap); + return (t4_handle_intr(adap, &smb_intr_info, 0, verbose)); } /* * NC-SI interrupt handler. */ -static void ncsi_intr_handler(struct adapter *adap) +static bool ncsi_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info ncsi_intr_info[] = { - { F_CIM_DM_PRTY_ERR, "NC-SI CIM parity error", -1, 1 }, - { F_MPS_DM_PRTY_ERR, "NC-SI MPS parity error", -1, 1 }, - { F_TXFIFO_PRTY_ERR, "NC-SI Tx FIFO parity error", -1, 1 }, - { F_RXFIFO_PRTY_ERR, "NC-SI Rx FIFO parity error", -1, 1 }, + static const struct intr_details ncsi_intr_details[] = { + { F_CIM_DM_PRTY_ERR, "NC-SI CIM parity error" }, + { F_MPS_DM_PRTY_ERR, "NC-SI MPS parity error" }, + { F_TXFIFO_PRTY_ERR, "NC-SI Tx FIFO parity error" }, + { F_RXFIFO_PRTY_ERR, "NC-SI Rx FIFO parity error" }, { 0 } }; + static const struct intr_info ncsi_intr_info = { + .name = "NCSI_INT_CAUSE", + .cause_reg = A_NCSI_INT_CAUSE, + .enable_reg = A_NCSI_INT_ENABLE, + .fatal = F_RXFIFO_PRTY_ERR | F_TXFIFO_PRTY_ERR | + F_MPS_DM_PRTY_ERR | F_CIM_DM_PRTY_ERR, + .details = ncsi_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adap, A_NCSI_INT_CAUSE, ncsi_intr_info)) - t4_fatal_err(adap); + return (t4_handle_intr(adap, &ncsi_intr_info, 0, verbose)); } /* - * XGMAC interrupt handler. + * MAC interrupt handler. */ -static void xgmac_intr_handler(struct adapter *adap, int port) +static bool mac_intr_handler(struct adapter *adap, int port, bool verbose) { - u32 v, int_cause_reg; + static const struct intr_details mac_intr_details[] = { + { F_TXFIFO_PRTY_ERR, "MAC Tx FIFO parity error" }, + { F_RXFIFO_PRTY_ERR, "MAC Rx FIFO parity error" }, + { 0 } + }; + char name[32]; + struct intr_info ii; + bool fatal = false; - if (is_t4(adap)) - int_cause_reg = PORT_REG(port, A_XGMAC_PORT_INT_CAUSE); - else - int_cause_reg = T5_PORT_REG(port, A_MAC_PORT_INT_CAUSE); + if (is_t4(adap)) { + snprintf(name, sizeof(name), "XGMAC_PORT%u_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = PORT_REG(port, A_XGMAC_PORT_INT_CAUSE); + ii.enable_reg = PORT_REG(port, A_XGMAC_PORT_INT_EN); + ii.fatal = F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR, + ii.details = mac_intr_details, + ii.actions = NULL; + } else { + snprintf(name, sizeof(name), "MAC_PORT%u_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_INT_CAUSE); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_INT_EN); + ii.fatal = F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR, + ii.details = mac_intr_details, + ii.actions = NULL; + } + fatal |= t4_handle_intr(adap, &ii, 0, verbose); - v = t4_read_reg(adap, int_cause_reg); + if (chip_id(adap) >= CHELSIO_T5) { + snprintf(name, sizeof(name), "MAC_PORT%u_PERR_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_CAUSE); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_EN); + ii.fatal = 0; + ii.details = NULL; + ii.actions = NULL; + fatal |= t4_handle_intr(adap, &ii, 0, verbose); + } - v &= (F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR); - if (!v) - return; + if (chip_id(adap) >= CHELSIO_T6) { + snprintf(name, sizeof(name), "MAC_PORT%u_PERR_INT_CAUSE_100G", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_CAUSE_100G); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_EN_100G); + ii.fatal = 0; + ii.details = NULL; + ii.actions = NULL; + fatal |= t4_handle_intr(adap, &ii, 0, verbose); + } - if (v & F_TXFIFO_PRTY_ERR) - CH_ALERT(adap, "XGMAC %d Tx FIFO parity error\n", - port); - if (v & F_RXFIFO_PRTY_ERR) - CH_ALERT(adap, "XGMAC %d Rx FIFO parity error\n", - port); - t4_write_reg(adap, int_cause_reg, v); - t4_fatal_err(adap); + return (fatal); } -/* - * PL interrupt handler. - */ -static void pl_intr_handler(struct adapter *adap) +static bool plpl_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info pl_intr_info[] = { - { F_FATALPERR, "Fatal parity error", -1, 1 }, - { F_PERRVFID, "PL VFID_MAP parity error", -1, 1 }, + static const struct intr_details plpl_intr_details[] = { + { F_FATALPERR, "Fatal parity error" }, + { F_PERRVFID, "VFID_MAP parity error" }, { 0 } }; - - static const struct intr_info t5_pl_intr_info[] = { - { F_FATALPERR, "Fatal parity error", -1, 1 }, - { 0 } + struct intr_info plpl_intr_info = { + .name = "PL_PL_INT_CAUSE", + .cause_reg = A_PL_PL_INT_CAUSE, + .enable_reg = A_PL_PL_INT_ENABLE, + .fatal = F_FATALPERR, + .details = plpl_intr_details, + .actions = NULL, }; - if (t4_handle_intr_status(adap, A_PL_PL_INT_CAUSE, - is_t4(adap) ? - pl_intr_info : t5_pl_intr_info)) - t4_fatal_err(adap); -} + if (is_t4(adap)) + plpl_intr_info.fatal |= F_PERRVFID; -#define PF_INTR_MASK (F_PFSW | F_PFCIM) + return (t4_handle_intr(adap, &plpl_intr_info, 0, verbose)); +} /** * t4_slow_intr_handler - control path interrupt handler - * @adapter: the adapter + * @adap: the adapter + * @verbose: increased verbosity, for debug * * T4 interrupt handler for non-data global interrupt events, e.g., errors. * The designation 'slow' is because it involves register reads, while * data interrupts typically don't involve any MMIOs. */ -int t4_slow_intr_handler(struct adapter *adapter) -{ - u32 cause = t4_read_reg(adapter, A_PL_INT_CAUSE); +int t4_slow_intr_handler(struct adapter *adap, bool verbose) +{ + static const struct intr_details pl_intr_details[] = { + { F_MC1, "MC1" }, + { F_UART, "UART" }, + { F_ULP_TX, "ULP TX" }, + { F_SGE, "SGE" }, + { F_HMA, "HMA" }, + { F_CPL_SWITCH, "CPL Switch" }, + { F_ULP_RX, "ULP RX" }, + { F_PM_RX, "PM RX" }, + { F_PM_TX, "PM TX" }, + { F_MA, "MA" }, + { F_TP, "TP" }, + { F_LE, "LE" }, + { F_EDC1, "EDC1" }, + { F_EDC0, "EDC0" }, + { F_MC, "MC0" }, + { F_PCIE, "PCIE" }, + { F_PMU, "PMU" }, + { F_MAC3, "MAC3" }, + { F_MAC2, "MAC2" }, + { F_MAC1, "MAC1" }, + { F_MAC0, "MAC0" }, + { F_SMB, "SMB" }, + { F_SF, "SF" }, + { F_PL, "PL" }, + { F_NCSI, "NC-SI" }, + { F_MPS, "MPS" }, + { F_MI, "MI" }, + { F_DBG, "DBG" }, + { F_I2CM, "I2CM" }, + { F_CIM, "CIM" }, + { 0 } + }; + static const struct intr_info pl_perr_cause = { + .name = "PL_PERR_CAUSE", + .cause_reg = A_PL_PERR_CAUSE, + .enable_reg = A_PL_PERR_ENABLE, + .fatal = 0xffffffff, + .details = pl_intr_details, + .actions = NULL, + }; + static const struct intr_action pl_intr_action[] = { + { F_MC1, MEM_MC1, mem_intr_handler }, + { F_ULP_TX, -1, ulptx_intr_handler }, + { F_SGE, -1, sge_intr_handler }, + { F_CPL_SWITCH, -1, cplsw_intr_handler }, + { F_ULP_RX, -1, ulprx_intr_handler }, + { F_PM_RX, -1, pmrx_intr_handler}, + { F_PM_TX, -1, pmtx_intr_handler}, + { F_MA, -1, ma_intr_handler }, + { F_TP, -1, tp_intr_handler }, + { F_LE, -1, le_intr_handler }, + { F_EDC1, MEM_EDC1, mem_intr_handler }, + { F_EDC0, MEM_EDC0, mem_intr_handler }, + { F_MC0, MEM_MC0, mem_intr_handler }, + { F_PCIE, -1, pcie_intr_handler }, + { F_MAC3, 3, mac_intr_handler}, + { F_MAC2, 2, mac_intr_handler}, + { F_MAC1, 1, mac_intr_handler}, + { F_MAC0, 0, mac_intr_handler}, + { F_SMB, -1, smb_intr_handler}, + { F_PL, -1, plpl_intr_handler }, + { F_NCSI, -1, ncsi_intr_handler}, + { F_MPS, -1, mps_intr_handler }, + { F_CIM, -1, cim_intr_handler }, + { 0 } + }; + static const struct intr_info pl_intr_info = { + .name = "PL_INT_CAUSE", + .cause_reg = A_PL_INT_CAUSE, + .enable_reg = A_PL_INT_ENABLE, + .fatal = 0, + .details = pl_intr_details, + .actions = pl_intr_action, + }; + bool fatal; + u32 perr; - if (!(cause & GLBL_INTR_MASK)) - return 0; - if (cause & F_CIM) - cim_intr_handler(adapter); - if (cause & F_MPS) - mps_intr_handler(adapter); - if (cause & F_NCSI) - ncsi_intr_handler(adapter); - if (cause & F_PL) - pl_intr_handler(adapter); - if (cause & F_SMB) - smb_intr_handler(adapter); - if (cause & F_MAC0) - xgmac_intr_handler(adapter, 0); - if (cause & F_MAC1) - xgmac_intr_handler(adapter, 1); - if (cause & F_MAC2) - xgmac_intr_handler(adapter, 2); - if (cause & F_MAC3) - xgmac_intr_handler(adapter, 3); - if (cause & F_PCIE) - pcie_intr_handler(adapter); - if (cause & F_MC0) - mem_intr_handler(adapter, MEM_MC); - if (is_t5(adapter) && (cause & F_MC1)) - mem_intr_handler(adapter, MEM_MC1); - if (cause & F_EDC0) - mem_intr_handler(adapter, MEM_EDC0); - if (cause & F_EDC1) - mem_intr_handler(adapter, MEM_EDC1); - if (cause & F_LE) - le_intr_handler(adapter); - if (cause & F_TP) - tp_intr_handler(adapter); - if (cause & F_MA) - ma_intr_handler(adapter); - if (cause & F_PM_TX) - pmtx_intr_handler(adapter); - if (cause & F_PM_RX) - pmrx_intr_handler(adapter); - if (cause & F_ULP_RX) - ulprx_intr_handler(adapter); - if (cause & F_CPL_SWITCH) - cplsw_intr_handler(adapter); - if (cause & F_SGE) - sge_intr_handler(adapter); - if (cause & F_ULP_TX) - ulptx_intr_handler(adapter); - - /* Clear the interrupts just processed for which we are the master. */ - t4_write_reg(adapter, A_PL_INT_CAUSE, cause & GLBL_INTR_MASK); - (void)t4_read_reg(adapter, A_PL_INT_CAUSE); /* flush */ - return 1; + perr = t4_read_reg(adap, pl_perr_cause.cause_reg); + if (verbose || perr != 0) { + t4_show_intr_info(adap, &pl_perr_cause, perr); + if (perr != 0) + t4_write_reg(adap, pl_perr_cause.cause_reg, perr); + if (verbose) + perr |= t4_read_reg(adap, pl_intr_info.enable_reg); + } + fatal = t4_handle_intr(adap, &pl_intr_info, perr, verbose); + if (fatal) + t4_fatal_err(adap, false); + + return (0); } +#define PF_INTR_MASK (F_PFSW | F_PFCIM) + /** * t4_intr_enable - enable interrupts * @adapter: the adapter whose interrupts should be enabled @@ -4757,92 +5278,131 @@ int t4_slow_intr_handler(struct adapter *adapter) * non PF-specific interrupts from the various HW modules. Only one PCI * function at a time should be doing this. */ -void t4_intr_enable(struct adapter *adapter) +void t4_intr_enable(struct adapter *adap) { u32 val = 0; - u32 whoami = t4_read_reg(adapter, A_PL_WHOAMI); - u32 pf = (chip_id(adapter) <= CHELSIO_T5 - ? G_SOURCEPF(whoami) - : G_T6_SOURCEPF(whoami)); - if (chip_id(adapter) <= CHELSIO_T5) + if (chip_id(adap) <= CHELSIO_T5) val = F_ERR_DROPPED_DB | F_ERR_EGR_CTXT_PRIO | F_DBFIFO_HP_INT; else val = F_ERR_PCIE_ERROR0 | F_ERR_PCIE_ERROR1 | F_FATAL_WRE_LEN; - t4_write_reg(adapter, A_SGE_INT_ENABLE3, F_ERR_CPL_EXCEED_IQE_SIZE | - F_ERR_INVALID_CIDX_INC | F_ERR_CPL_OPCODE_0 | - F_ERR_DATA_CPL_ON_HIGH_QID1 | F_INGRESS_SIZE_ERR | - F_ERR_DATA_CPL_ON_HIGH_QID0 | F_ERR_BAD_DB_PIDX3 | - F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | - F_ERR_BAD_DB_PIDX0 | F_ERR_ING_CTXT_PRIO | - F_DBFIFO_LP_INT | F_EGRESS_SIZE_ERR | val); - t4_write_reg(adapter, MYPF_REG(A_PL_PF_INT_ENABLE), PF_INTR_MASK); - t4_set_reg_field(adapter, A_PL_INT_MAP0, 0, 1 << pf); + val |= F_ERR_CPL_EXCEED_IQE_SIZE | F_ERR_INVALID_CIDX_INC | + F_ERR_CPL_OPCODE_0 | F_ERR_DATA_CPL_ON_HIGH_QID1 | + F_INGRESS_SIZE_ERR | F_ERR_DATA_CPL_ON_HIGH_QID0 | + F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0 | F_ERR_ING_CTXT_PRIO | F_DBFIFO_LP_INT | + F_EGRESS_SIZE_ERR; + t4_set_reg_field(adap, A_SGE_INT_ENABLE3, val, val); + t4_write_reg(adap, MYPF_REG(A_PL_PF_INT_ENABLE), PF_INTR_MASK); + t4_set_reg_field(adap, A_PL_INT_MAP0, 0, 1 << adap->pf); } /** * t4_intr_disable - disable interrupts - * @adapter: the adapter whose interrupts should be disabled + * @adap: the adapter whose interrupts should be disabled * * Disable interrupts. We only disable the top-level interrupt * concentrators. The caller must be a PCI function managing global * interrupts. */ -void t4_intr_disable(struct adapter *adapter) +void t4_intr_disable(struct adapter *adap) { - u32 whoami = t4_read_reg(adapter, A_PL_WHOAMI); - u32 pf = (chip_id(adapter) <= CHELSIO_T5 - ? G_SOURCEPF(whoami) - : G_T6_SOURCEPF(whoami)); - t4_write_reg(adapter, MYPF_REG(A_PL_PF_INT_ENABLE), 0); - t4_set_reg_field(adapter, A_PL_INT_MAP0, 1 << pf, 0); + t4_write_reg(adap, MYPF_REG(A_PL_PF_INT_ENABLE), 0); + t4_set_reg_field(adap, A_PL_INT_MAP0, 1 << adap->pf, 0); } /** * t4_intr_clear - clear all interrupts - * @adapter: the adapter whose interrupts should be cleared + * @adap: the adapter whose interrupts should be cleared * * Clears all interrupts. The caller must be a PCI function managing * global interrupts. */ -void t4_intr_clear(struct adapter *adapter) +void t4_intr_clear(struct adapter *adap) { - static const unsigned int cause_reg[] = { - A_SGE_INT_CAUSE1, A_SGE_INT_CAUSE2, A_SGE_INT_CAUSE3, - A_PCIE_NONFAT_ERR, A_PCIE_INT_CAUSE, - A_MA_INT_WRAP_STATUS, A_MA_PARITY_ERROR_STATUS1, A_MA_INT_CAUSE, - A_EDC_INT_CAUSE, EDC_REG(A_EDC_INT_CAUSE, 1), - A_CIM_HOST_INT_CAUSE, A_CIM_HOST_UPACC_INT_CAUSE, + static const u32 cause_reg[] = { + A_CIM_HOST_INT_CAUSE, + A_CIM_HOST_UPACC_INT_CAUSE, MYPF_REG(A_CIM_PF_HOST_INT_CAUSE), - A_TP_INT_CAUSE, - A_ULP_RX_INT_CAUSE, A_ULP_TX_INT_CAUSE, - A_PM_RX_INT_CAUSE, A_PM_TX_INT_CAUSE, - A_MPS_RX_PERR_INT_CAUSE, A_CPL_INTR_CAUSE, - MYPF_REG(A_PL_PF_INT_CAUSE), - A_PL_PL_INT_CAUSE, + EDC_REG(A_EDC_INT_CAUSE, 0), EDC_REG(A_EDC_INT_CAUSE, 1), A_LE_DB_INT_CAUSE, - }; - - unsigned int i; + A_MA_INT_WRAP_STATUS, + A_MA_PARITY_ERROR_STATUS1, + A_MA_INT_CAUSE, + A_MPS_CLS_INT_CAUSE, + A_MPS_RX_PERR_INT_CAUSE, + A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, + A_MPS_STAT_PERR_INT_CAUSE_SRAM, + A_MPS_TRC_INT_CAUSE, + A_MPS_TX_INT_CAUSE, + A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, + A_NCSI_INT_CAUSE, + A_PCIE_INT_CAUSE, + A_PCIE_NONFAT_ERR, + A_PL_PL_INT_CAUSE, + A_PM_RX_INT_CAUSE, + A_PM_TX_INT_CAUSE, + A_SGE_INT_CAUSE1, + A_SGE_INT_CAUSE2, + A_SGE_INT_CAUSE3, + A_SGE_INT_CAUSE4, + A_SMB_INT_CAUSE, + A_TP_INT_CAUSE, + A_ULP_RX_INT_CAUSE, + A_ULP_RX_INT_CAUSE_2, + A_ULP_TX_INT_CAUSE, + A_ULP_TX_INT_CAUSE_2, - for (i = 0; i < ARRAY_SIZE(cause_reg); ++i) - t4_write_reg(adapter, cause_reg[i], 0xffffffff); + MYPF_REG(A_PL_PF_INT_CAUSE), + }; + int i; + const int nchan = adap->chip_params->nchan; - t4_write_reg(adapter, is_t4(adapter) ? A_MC_INT_CAUSE : - A_MC_P_INT_CAUSE, 0xffffffff); + for (i = 0; i < ARRAY_SIZE(cause_reg); i++) + t4_write_reg(adap, cause_reg[i], 0xffffffff); - if (is_t4(adapter)) { - t4_write_reg(adapter, A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - 0xffffffff); - t4_write_reg(adapter, A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - 0xffffffff); - } else - t4_write_reg(adapter, A_MA_PARITY_ERROR_STATUS2, 0xffffffff); + if (is_t4(adap)) { + t4_write_reg(adap, A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + 0xffffffff); + t4_write_reg(adap, A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + 0xffffffff); + t4_write_reg(adap, A_MC_INT_CAUSE, 0xffffffff); + for (i = 0; i < nchan; i++) { + t4_write_reg(adap, PORT_REG(i, A_XGMAC_PORT_INT_CAUSE), + 0xffffffff); + } + } + if (chip_id(adap) >= CHELSIO_T5) { + t4_write_reg(adap, A_MA_PARITY_ERROR_STATUS2, 0xffffffff); + t4_write_reg(adap, A_MPS_STAT_PERR_INT_CAUSE_SRAM1, 0xffffffff); + t4_write_reg(adap, A_SGE_INT_CAUSE5, 0xffffffff); + t4_write_reg(adap, A_MC_P_INT_CAUSE, 0xffffffff); + if (is_t5(adap)) { + t4_write_reg(adap, MC_REG(A_MC_P_INT_CAUSE, 1), + 0xffffffff); + } + for (i = 0; i < nchan; i++) { + t4_write_reg(adap, T5_PORT_REG(i, + A_MAC_PORT_PERR_INT_CAUSE), 0xffffffff); + if (chip_id(adap) > CHELSIO_T5) { + t4_write_reg(adap, T5_PORT_REG(i, + A_MAC_PORT_PERR_INT_CAUSE_100G), + 0xffffffff); + } + t4_write_reg(adap, T5_PORT_REG(i, A_MAC_PORT_INT_CAUSE), + 0xffffffff); + } + } + if (chip_id(adap) >= CHELSIO_T6) { + t4_write_reg(adap, A_SGE_INT_CAUSE6, 0xffffffff); + } - t4_write_reg(adapter, A_PL_INT_CAUSE, GLBL_INTR_MASK); - (void) t4_read_reg(adapter, A_PL_INT_CAUSE); /* flush */ + t4_write_reg(adap, A_MPS_INT_CAUSE, is_t4(adap) ? 0 : 0xffffffff); + t4_write_reg(adap, A_PL_PERR_CAUSE, 0xffffffff); + t4_write_reg(adap, A_PL_INT_CAUSE, 0xffffffff); + (void) t4_read_reg(adap, A_PL_INT_CAUSE); /* flush */ } /** diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index fe2fb1951282..64f995bd842f 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -1077,6 +1077,7 @@ t4_attach(device_t dev) rc = partition_resources(sc); if (rc != 0) goto done; /* error message displayed already */ + t4_intr_clear(sc); } rc = get_params__post_init(sc); @@ -2563,14 +2564,23 @@ vcxgbe_detach(device_t dev) } void -t4_fatal_err(struct adapter *sc) +t4_fatal_err(struct adapter *sc, bool fw_error) { - t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); - t4_intr_disable(sc); - log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", + + t4_shutdown_adapter(sc); + log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); if (t4_panic_on_fatal_err) panic("panic requested on fatal error"); + + if (fw_error) { + ASSERT_SYNCHRONIZED_OP(sc); + sc->flags |= ADAP_ERR; + } else { + ADAPTER_LOCK(sc); + sc->flags |= ADAP_ERR; + ADAPTER_UNLOCK(sc); + } } void @@ -10069,20 +10079,6 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, return (rc); } -void -t4_db_full(struct adapter *sc) -{ - - CXGBE_UNIMPLEMENTED(__func__); -} - -void -t4_db_dropped(struct adapter *sc) -{ - - CXGBE_UNIMPLEMENTED(__func__); -} - #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *vi, int enable) diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 7950d8cc89ab..b24e4d125ee0 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1394,8 +1394,12 @@ void t4_intr_err(void *arg) { struct adapter *sc = arg; + const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; - t4_slow_intr_handler(sc); + if (sc->flags & ADAP_ERR) + return; + + t4_slow_intr_handler(sc, verbose); } /* diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 388a0bac53b5..12c431c7ec14 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -293,7 +293,7 @@ static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); -/* MSIX handlers */ +/* MSI-X handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); @@ -780,7 +780,9 @@ em_if_attach_pre(if_ctx_t ctx) scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); - device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); + if (bootverbose) + device_printf(dev, "attach_pre capping queues at %d\n", + scctx->isc_ntxqsets_max); if (adapter->hw.mac.type >= igb_mac_min) { int try_second_bar; @@ -1301,7 +1303,7 @@ em_if_init(if_ctx_t ctx) em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); - /* MSI/X configuration for 82574 */ + /* MSI-X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); @@ -1427,7 +1429,7 @@ em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) /********************************************************************* * - * MSIX RX Interrupt Service routine + * MSI-X RX Interrupt Service routine * **********************************************************************/ static int @@ -1442,7 +1444,7 @@ em_msix_que(void *arg) /********************************************************************* * - * MSIX Link Fast Interrupt Service routine + * MSI-X Link Fast Interrupt Service routine * **********************************************************************/ static int @@ -1912,7 +1914,6 @@ em_allocate_pci_resources(if_ctx_t ctx) for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { - adapter->io_rid = rid; break; } rid += 4; @@ -1924,8 +1925,8 @@ em_allocate_pci_resources(if_ctx_t ctx) device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } - adapter->ioport = bus_alloc_resource_any(dev, - SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); + adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, + &rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); @@ -1945,7 +1946,7 @@ em_allocate_pci_resources(if_ctx_t ctx) /********************************************************************* * - * Setup the MSIX Interrupt handlers + * Set up the MSI-X Interrupt handlers * **********************************************************************/ static int @@ -1974,7 +1975,7 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has - * NOTHING to do with the MSIX vector + * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); @@ -2001,7 +2002,7 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has - * NOTHING to do with the MSIX vector + * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); @@ -2050,7 +2051,7 @@ igb_configure_queues(struct adapter *adapter) E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); - /* Turn on MSIX */ + /* Turn on MSI-X */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: @@ -2184,7 +2185,7 @@ em_free_pci_resources(if_ctx_t ctx) struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -2192,24 +2193,26 @@ em_free_pci_resources(if_ctx_t ctx) iflib_irq_free(ctx, &que->que_irq); } - /* First release all the interrupt resources */ if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->memory); + rman_get_rid(adapter->memory), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - EM_FLASH, adapter->flash); + rman_get_rid(adapter->flash), adapter->flash); adapter->flash = NULL; } - if (adapter->ioport != NULL) + + if (adapter->ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, - adapter->io_rid, adapter->ioport); + rman_get_rid(adapter->ioport), adapter->ioport); + adapter->ioport = NULL; + } } -/* Setup MSI or MSI/X */ +/* Set up MSI or MSI-X */ static int em_setup_msix(if_ctx_t ctx) { @@ -2851,7 +2854,9 @@ em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs txr->tx_paddr = paddrs[i*ntxqs]; } - device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); + if (bootverbose) + device_printf(iflib_get_dev(ctx), + "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); @@ -2889,8 +2894,10 @@ em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } - - device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); + + if (bootverbose) + device_printf(iflib_get_dev(ctx), + "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: @@ -3133,7 +3140,7 @@ em_initialize_receive_unit(if_ctx_t ctx) rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* - * When using MSIX interrupts we need to throttle + * When using MSI-X interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { @@ -4007,7 +4014,7 @@ em_add_hw_stats(struct adapter *adapter) "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); + "Link MSI-X IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); @@ -4524,7 +4531,7 @@ em_print_debug_info(struct adapter *adapter) /* * 82574 only: - * Write a new value to the EEPROM increasing the number of MSIX + * Write a new value to the EEPROM increasing the number of MSI-X * vectors from 3 to 5, for proper multiqueue support. */ static void @@ -4539,7 +4546,7 @@ em_enable_vectors_82574(if_ctx_t ctx) printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " - "reported MSIX vectors from 3 to 5...\n"); + "reported MSI-X vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index be172a78cbbe..f12fda8db759 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -352,8 +352,8 @@ /* * 82574 has a nonstandard address for EIAC - * and since its only used in MSIX, and in - * the em driver only 82574 uses MSIX we can + * and since its only used in MSI-X, and in + * the em driver only 82574 uses MSI-X we can * solve it just using this define. */ #define EM_EIAC 0x000DC @@ -468,7 +468,6 @@ struct adapter { struct resource *memory; struct resource *flash; struct resource *ioport; - int io_rid; struct resource *res; void *tag; diff --git a/sys/dev/if_ndis/if_ndis.c b/sys/dev/if_ndis/if_ndis.c index 7a545dbc8f8f..404ae4d92d53 100644 --- a/sys/dev/if_ndis/if_ndis.c +++ b/sys/dev/if_ndis/if_ndis.c @@ -568,15 +568,6 @@ ndis_attach(device_t dev) callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ - if (sc->ndis_iftype == PCMCIABus) { - error = ndis_alloc_amem(sc); - if (error) { - device_printf(dev, "failed to allocate " - "attribute memory\n"); - goto fail; - } - } - /* Create sysctl registry nodes */ ndis_create_sysctls(sc); @@ -1098,9 +1089,6 @@ ndis_detach(device_t dev) if (ifp != NULL) if_free(ifp); - if (sc->ndis_iftype == PCMCIABus) - ndis_free_amem(sc); - if (sc->ndis_sc) ndis_destroy_dma(sc); diff --git a/sys/dev/if_ndis/if_ndis_pccard.c b/sys/dev/if_ndis/if_ndis_pccard.c index 36d52986691d..ee4e174ae3ef 100644 --- a/sys/dev/if_ndis/if_ndis_pccard.c +++ b/sys/dev/if_ndis/if_ndis_pccard.c @@ -74,6 +74,7 @@ MODULE_DEPEND(ndis, pccard, 1, 1, 1); static int ndis_probe_pccard (device_t); static int ndis_attach_pccard (device_t); +static int ndis_detach_pccard (device_t); static struct resource_list *ndis_get_resource_list (device_t, device_t); static int ndis_devcompare (interface_type, @@ -91,7 +92,7 @@ static device_method_t ndis_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ndis_probe_pccard), DEVMETHOD(device_attach, ndis_attach_pccard), - DEVMETHOD(device_detach, ndis_detach), + DEVMETHOD(device_detach, ndis_detach_pccard), DEVMETHOD(device_shutdown, ndis_shutdown), DEVMETHOD(device_suspend, ndis_suspend), DEVMETHOD(device_resume, ndis_resume), @@ -175,6 +176,50 @@ ndis_probe_pccard(dev) return(ENXIO); } +#define NDIS_AM_RID 3 + +static int +ndis_alloc_amem(struct ndis_softc *sc) +{ + int error, rid; + + rid = NDIS_AM_RID; + sc->ndis_res_am = bus_alloc_resource_anywhere(sc->ndis_dev, + SYS_RES_MEMORY, &rid, 0x1000, RF_ACTIVE); + + if (sc->ndis_res_am == NULL) { + device_printf(sc->ndis_dev, + "failed to allocate attribute memory\n"); + return(ENXIO); + } + sc->ndis_rescnt++; + resource_list_add(&sc->ndis_rl, SYS_RES_MEMORY, rid, + rman_get_start(sc->ndis_res_am), rman_get_end(sc->ndis_res_am), + rman_get_size(sc->ndis_res_am)); + + error = CARD_SET_MEMORY_OFFSET(device_get_parent(sc->ndis_dev), + sc->ndis_dev, rid, 0, NULL); + + if (error) { + device_printf(sc->ndis_dev, + "CARD_SET_MEMORY_OFFSET() returned 0x%x\n", error); + return(error); + } + + error = CARD_SET_RES_FLAGS(device_get_parent(sc->ndis_dev), + sc->ndis_dev, SYS_RES_MEMORY, rid, PCCARD_A_MEM_ATTR); + + if (error) { + device_printf(sc->ndis_dev, + "CARD_SET_RES_FLAGS() returned 0x%x\n", error); + return(error); + } + + sc->ndis_am_rid = rid; + + return(0); +} + /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. @@ -251,88 +296,40 @@ ndis_attach_pccard(dev) sc->ndis_devidx = devidx; + error = ndis_alloc_amem(sc); + if (error) { + device_printf(dev, "failed to allocate attribute memory\n"); + goto fail; + } + error = ndis_attach(dev); fail: return(error); } -static struct resource_list * -ndis_get_resource_list(dev, child) - device_t dev; - device_t child; -{ - struct ndis_softc *sc; - - sc = device_get_softc(dev); - return (&sc->ndis_rl); -} - -#define NDIS_AM_RID 3 - -int -ndis_alloc_amem(arg) - void *arg; +static int +ndis_detach_pccard(device_t dev) { - struct ndis_softc *sc; - int error, rid; - - if (arg == NULL) - return(EINVAL); - - sc = arg; - rid = NDIS_AM_RID; - sc->ndis_res_am = bus_alloc_resource_anywhere(sc->ndis_dev, - SYS_RES_MEMORY, &rid, 0x1000, RF_ACTIVE); - - if (sc->ndis_res_am == NULL) { - device_printf(sc->ndis_dev, - "failed to allocate attribute memory\n"); - return(ENXIO); - } - sc->ndis_rescnt++; - resource_list_add(&sc->ndis_rl, SYS_RES_MEMORY, rid, - rman_get_start(sc->ndis_res_am), rman_get_end(sc->ndis_res_am), - rman_get_size(sc->ndis_res_am)); - - error = CARD_SET_MEMORY_OFFSET(device_get_parent(sc->ndis_dev), - sc->ndis_dev, rid, 0, NULL); - - if (error) { - device_printf(sc->ndis_dev, - "CARD_SET_MEMORY_OFFSET() returned 0x%x\n", error); - return(error); - } - - error = CARD_SET_RES_FLAGS(device_get_parent(sc->ndis_dev), - sc->ndis_dev, SYS_RES_MEMORY, rid, PCCARD_A_MEM_ATTR); + struct ndis_softc *sc = device_get_softc(dev); - if (error) { - device_printf(sc->ndis_dev, - "CARD_SET_RES_FLAGS() returned 0x%x\n", error); - return(error); - } + (void) ndis_detach(dev); - sc->ndis_am_rid = rid; + if (sc->ndis_res_am != NULL) + bus_release_resource(sc->ndis_dev, SYS_RES_MEMORY, + sc->ndis_am_rid, sc->ndis_res_am); + resource_list_free(&sc->ndis_rl); - return(0); + return (0); } -void -ndis_free_amem(arg) - void *arg; +static struct resource_list * +ndis_get_resource_list(dev, child) + device_t dev; + device_t child; { struct ndis_softc *sc; - if (arg == NULL) - return; - - sc = arg; - - if (sc->ndis_res_am != NULL) - bus_release_resource(sc->ndis_dev, SYS_RES_MEMORY, - sc->ndis_am_rid, sc->ndis_res_am); - resource_list_free(&sc->ndis_rl); - - return; + sc = device_get_softc(dev); + return (&sc->ndis_rl); } diff --git a/sys/dev/imcsmb/imcsmb.c b/sys/dev/imcsmb/imcsmb.c index 9cf1a51821bb..0c18fb9f68cb 100644 --- a/sys/dev/imcsmb/imcsmb.c +++ b/sys/dev/imcsmb/imcsmb.c @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_pci.c b/sys/dev/imcsmb/imcsmb_pci.c index 87d1489b4f3e..0843e61f88d1 100644 --- a/sys/dev/imcsmb/imcsmb_pci.c +++ b/sys/dev/imcsmb/imcsmb_pci.c @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_reg.h b/sys/dev/imcsmb/imcsmb_reg.h index 3c256ddfead6..4d4f56986664 100644 --- a/sys/dev/imcsmb/imcsmb_reg.h +++ b/sys/dev/imcsmb/imcsmb_reg.h @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_var.h b/sys/dev/imcsmb/imcsmb_var.h index ea3753bb85cc..0bf81c417042 100644 --- a/sys/dev/imcsmb/imcsmb_var.h +++ b/sys/dev/imcsmb/imcsmb_var.h @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/ipw/if_ipw.c b/sys/dev/ipw/if_ipw.c index d6ab94c1b5d8..49150f9151bf 100644 --- a/sys/dev/ipw/if_ipw.c +++ b/sys/dev/ipw/if_ipw.c @@ -1326,10 +1326,7 @@ ipw_release_sbd(struct ipw_softc *sc, struct ipw_soft_bd *sbd) bus_dmamap_unload(sc->txbuf_dmat, sbuf->map); SLIST_INSERT_HEAD(&sc->free_sbuf, sbuf, next); - if (sbuf->m->m_flags & M_TXCB) - ieee80211_process_callback(sbuf->ni, sbuf->m, 0/*XXX*/); - m_freem(sbuf->m); - ieee80211_free_node(sbuf->ni); + ieee80211_tx_complete(sbuf->ni, sbuf->m, 0/*XXX*/); sc->sc_tx_timer = 0; break; diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index b73e0449673a..b4b13892957f 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -120,6 +120,7 @@ static int ixgbe_if_resume(if_ctx_t ctx); static void ixgbe_if_stop(if_ctx_t ctx); void ixgbe_if_enable_intr(if_ctx_t ctx); static void ixgbe_if_disable_intr(if_ctx_t ctx); +static void ixgbe_link_intr_enable(if_ctx_t ctx); static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int ixgbe_if_media_change(if_ctx_t ctx); @@ -173,7 +174,7 @@ static void ixgbe_init_device_features(struct adapter *adapter); static void ixgbe_check_fan_failure(struct adapter *, u32, bool); static void ixgbe_add_media_types(if_ctx_t ctx); static void ixgbe_update_stats_counters(struct adapter *adapter); -static void ixgbe_config_link(struct adapter *adapter); +static void ixgbe_config_link(if_ctx_t ctx); static void ixgbe_get_slot_info(struct adapter *); static void ixgbe_check_wol_support(struct adapter *adapter); static void ixgbe_enable_rx_drop(struct adapter *); @@ -254,6 +255,7 @@ static device_method_t ixgbe_if_methods[] = { DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), + DEVMETHOD(ifdi_link_intr_enable, ixgbe_link_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), @@ -377,6 +379,7 @@ static struct if_shared_ctx ixgbe_sctx_init = { .isc_vendor_info = ixgbe_vendor_info_array, .isc_driver_version = ixgbe_driver_version, .isc_driver = &ixgbe_if_driver, + .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, @@ -446,19 +449,6 @@ ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, } - iflib_config_gtask_init(ctx, &adapter->mod_task, ixgbe_handle_mod, - "mod_task"); - iflib_config_gtask_init(ctx, &adapter->msf_task, ixgbe_handle_msf, - "msf_task"); - iflib_config_gtask_init(ctx, &adapter->phy_task, ixgbe_handle_phy, - "phy_task"); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - iflib_config_gtask_init(ctx, &adapter->mbx_task, - ixgbe_handle_mbx, "mbx_task"); - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - iflib_config_gtask_init(ctx, &adapter->fdir_task, - ixgbe_reinit_fdir, "fdir_task"); - device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); @@ -1362,8 +1352,9 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) * ixgbe_config_link ************************************************************************/ static void -ixgbe_config_link(struct adapter *adapter) +ixgbe_config_link(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; @@ -1371,7 +1362,8 @@ ixgbe_config_link(struct adapter *adapter) sfp = ixgbe_is_sfp(hw); if (sfp) { - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; + iflib_admin_intr_deferred(ctx); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, @@ -1388,7 +1380,6 @@ ixgbe_config_link(struct adapter *adapter) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } - } /* ixgbe_config_link */ /************************************************************************ @@ -2021,7 +2012,7 @@ ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) cpu_id = rss_getcpu(i % rss_getnumbuckets()); } else { /* - * Bind the msix vector, and thus the + * Bind the MSI-X vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS @@ -2096,8 +2087,6 @@ ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) INIT_DEBUGOUT("ixgbe_if_media_status: begin"); - iflib_admin_intr_deferred(ctx); - ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; @@ -2386,7 +2375,7 @@ ixgbe_msix_link(void *arg) /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); - iflib_admin_intr_deferred(adapter->ctx); + adapter->task_requests |= IXGBE_REQUEST_TASK_LSC; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { @@ -2397,7 +2386,7 @@ ixgbe_msix_link(void *arg) return (FILTER_HANDLED); /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); - GROUPTASK_ENQUEUE(&adapter->fdir_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_FDIR; } else if (eicr & IXGBE_EICR_ECC) { device_printf(iflib_get_dev(adapter->ctx), @@ -2441,7 +2430,7 @@ ixgbe_msix_link(void *arg) /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) - GROUPTASK_ENQUEUE(&adapter->mbx_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MBX; } if (ixgbe_is_sfp(hw)) { @@ -2453,16 +2442,14 @@ ixgbe_msix_link(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } @@ -2476,13 +2463,10 @@ ixgbe_msix_link(void *arg) if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); - GROUPTASK_ENQUEUE(&adapter->phy_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; } - /* Re-enable other interrupts */ - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); - - return (FILTER_HANDLED); + return (adapter->task_requests != 0) ? FILTER_SCHEDULE_THREAD : FILTER_HANDLED; } /* ixgbe_msix_link */ /************************************************************************ @@ -2646,12 +2630,6 @@ ixgbe_if_detach(if_ctx_t ctx) return (EBUSY); } - iflib_config_gtask_deinit(&adapter->mod_task); - iflib_config_gtask_deinit(&adapter->msf_task); - iflib_config_gtask_deinit(&adapter->phy_task); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - iflib_config_gtask_deinit(&adapter->mbx_task); - ixgbe_setup_low_power_mode(ctx); /* let hardware know driver is unloading */ @@ -2910,6 +2888,12 @@ ixgbe_if_init(if_ctx_t ctx) /* Configure RX settings */ ixgbe_initialize_receive_units(ctx); + /* + * Initialize variable holding task enqueue requests + * from MSI-X interrupts + */ + adapter->task_requests = 0; + /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); @@ -3011,7 +2995,7 @@ ixgbe_if_init(if_ctx_t ctx) ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ - ixgbe_config_link(adapter); + ixgbe_config_link(ctx); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); @@ -3374,7 +3358,6 @@ ixgbe_handle_mod(void *context) device_t dev = iflib_get_dev(ctx); u32 err, cage_full = 0; - adapter->sfp_reinit = 1; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: @@ -3411,11 +3394,11 @@ ixgbe_handle_mod(void *context) "Setup failure - unsupported SFP+ module type.\n"); goto handle_mod_out; } - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; return; handle_mod_out: - adapter->sfp_reinit = 0; + adapter->task_requests &= ~(IXGBE_REQUEST_TASK_MSF); } /* ixgbe_handle_mod */ @@ -3431,9 +3414,6 @@ ixgbe_handle_msf(void *context) u32 autoneg; bool negotiate; - if (adapter->sfp_reinit != 1) - return; - /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); @@ -3447,8 +3427,6 @@ ixgbe_handle_msf(void *context) ifmedia_removeall(adapter->media); ixgbe_add_media_types(adapter->ctx); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); - - adapter->sfp_reinit = 0; } /* ixgbe_handle_msf */ /************************************************************************ @@ -3543,10 +3521,20 @@ ixgbe_if_update_admin_status(if_ctx_t ctx) } } - ixgbe_update_stats_counters(adapter); + /* Handle task requests from msix_link() */ + if (adapter->task_requests & IXGBE_REQUEST_TASK_MOD) + ixgbe_handle_mod(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_MSF) + ixgbe_handle_msf(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_MBX) + ixgbe_handle_mbx(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_FDIR) + ixgbe_reinit_fdir(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_PHY) + ixgbe_handle_phy(ctx); + adapter->task_requests = 0; - /* Re-enable link interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_LSC); + ixgbe_update_stats_counters(adapter); } /* ixgbe_if_update_admin_status */ /************************************************************************ @@ -3682,6 +3670,18 @@ ixgbe_if_disable_intr(if_ctx_t ctx) } /* ixgbe_if_disable_intr */ /************************************************************************ + * ixgbe_link_intr_enable + ************************************************************************/ +static void +ixgbe_link_intr_enable(if_ctx_t ctx) +{ + struct ixgbe_hw *hw = &((struct adapter *)iflib_get_softc(ctx))->hw; + + /* Re-enable other interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); +} /* ixgbe_link_intr_enable */ + +/************************************************************************ * ixgbe_if_rx_queue_intr_enable ************************************************************************/ static int @@ -3784,22 +3784,21 @@ ixgbe_intr(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) - GROUPTASK_ENQUEUE(&adapter->phy_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_intr */ @@ -3814,7 +3813,7 @@ ixgbe_free_pci_resources(if_ctx_t ctx) struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -3824,13 +3823,9 @@ ixgbe_free_pci_resources(if_ctx_t ctx) } } - /* - * Free link/admin interrupt - */ if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - + rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixgbe_free_pci_resources */ /************************************************************************ diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index cd0fb5939e45..79b7d78d4c73 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -144,11 +144,9 @@ static driver_t ixv_driver = { devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixv_driver, ixv_vendor_info_array); +MODULE_DEPEND(ixv, iflib, 1, 1, 1); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(ixv, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ static device_method_t ixv_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), @@ -222,6 +220,7 @@ static struct if_shared_ctx ixv_sctx_init = { .isc_vendor_info = ixv_vendor_info_array, .isc_driver_version = ixv_driver_version, .isc_driver = &ixv_if_driver, + .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, @@ -1132,7 +1131,7 @@ ixv_free_pci_resources(if_ctx_t ctx) struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -1142,10 +1141,9 @@ ixv_free_pci_resources(if_ctx_t ctx) } } - /* Clean the Legacy or Link interrupt last */ if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); + rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixv_free_pci_resources */ /************************************************************************ diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 2e021a120f20..78316b4c4659 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -131,7 +131,7 @@ ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) switch (pi->ipi_ipproto) { case IPPROTO_TCP: - if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; else offload = FALSE; diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 714c740a5ba5..afc55dccd4ef 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -428,16 +428,11 @@ struct adapter { /* Support for pluggable optics */ bool sfp_probe; - struct grouptask mod_task; /* SFP tasklet */ - struct grouptask msf_task; /* Multispeed Fiber */ - struct grouptask mbx_task; /* VF -> PF mailbox interrupt */ - int sfp_reinit; /* Flow Director */ int fdir_reinit; - struct grouptask fdir_task; - struct grouptask phy_task; /* PHY intr tasklet */ + u32 task_requests; /* * Queues: diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index 36101dac2961..fc5f191ee65e 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -4427,4 +4427,11 @@ struct ixgbe_bypass_eeprom { #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD \ (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) +#define IXGBE_REQUEST_TASK_MOD 0x01 +#define IXGBE_REQUEST_TASK_MSF 0x02 +#define IXGBE_REQUEST_TASK_MBX 0x04 +#define IXGBE_REQUEST_TASK_FDIR 0x08 +#define IXGBE_REQUEST_TASK_PHY 0x10 +#define IXGBE_REQUEST_TASK_LSC 0x20 + #endif /* _IXGBE_TYPE_H_ */ diff --git a/sys/dev/ixl/if_iavf.c b/sys/dev/ixl/if_iavf.c index 7272ab1ef31d..aa84e5776813 100644 --- a/sys/dev/ixl/if_iavf.c +++ b/sys/dev/ixl/if_iavf.c @@ -358,7 +358,7 @@ iavf_if_attach_pre(if_ctx_t ctx) goto err_early; } - iavf_dbg_init(sc, "Allocated PCI resources and MSIX vectors\n"); + iavf_dbg_init(sc, "Allocated PCI resources and MSI-X vectors\n"); /* * XXX: This is called by init_shared_code in the PF driver, @@ -407,7 +407,8 @@ iavf_if_attach_pre(if_ctx_t ctx) goto err_aq; } - device_printf(dev, "VSIs %d, QPs %d, MSIX %d, RSS sizes: key %d lut %d\n", + device_printf(dev, + "VSIs %d, QPs %d, MSI-X %d, RSS sizes: key %d lut %d\n", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, @@ -1485,7 +1486,7 @@ iavf_free_pci_resources(struct iavf_sc *sc) struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = sc->dev; - /* We may get here before stations are setup */ + /* We may get here before stations are set up */ if (rx_que == NULL) goto early; @@ -1498,7 +1499,7 @@ iavf_free_pci_resources(struct iavf_sc *sc) early: if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), sc->pci_mem); + rman_get_rid(sc->pci_mem), sc->pci_mem); } diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index a03a14435482..fc9ad8e11d7c 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -299,7 +299,7 @@ int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, - &ixl_limit_iwarp_msix, 0, "Limit MSIX vectors assigned to iWARP"); + &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; @@ -684,14 +684,14 @@ ixl_if_attach_post(if_ctx_t ctx) error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, - "interfacing to iwarp driver failed: %d\n", + "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else - device_printf(dev, - "iwarp disabled on this device (no msix vectors)\n"); + device_printf(dev, "iWARP disabled on this device " + "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); @@ -857,7 +857,7 @@ ixl_if_init(if_ctx_t ctx) /* Set up RSS */ ixl_config_rss(pf); - /* Set up MSI/X routing and the ITR settings */ + /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); diff --git a/sys/dev/ixl/ixl_iw.c b/sys/dev/ixl/ixl_iw.c index f597252e6def..6557e9dca4b3 100644 --- a/sys/dev/ixl/ixl_iw.c +++ b/sys/dev/ixl/ixl_iw.c @@ -333,7 +333,7 @@ ixl_iw_pf_msix_init(void *pf_handle, if ((msix_info->aeq_vector < IXL_IW_VEC_BASE(pf)) || (msix_info->aeq_vector >= IXL_IW_VEC_LIMIT(pf))) { - printf("%s: invalid MSIX vector (%i) for AEQ\n", + printf("%s: invalid MSI-X vector (%i) for AEQ\n", __func__, msix_info->aeq_vector); return (EINVAL); } diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index 6d393813e831..3a4a0dc31aff 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -278,7 +278,8 @@ retry: } /* Print a subset of the capability information. */ - device_printf(dev, "PF-ID[%d]: VFs %d, MSIX %d, VF MSIX %d, QPs %d, %s\n", + device_printf(dev, + "PF-ID[%d]: VFs %d, MSI-X %d, VF MSI-X %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : @@ -505,7 +506,7 @@ ixl_intr(void *arg) /********************************************************************* * - * MSIX VSI Interrupt Service routine + * MSI-X VSI Interrupt Service routine * **********************************************************************/ int @@ -524,7 +525,7 @@ ixl_msix_que(void *arg) /********************************************************************* * - * MSIX Admin Queue Interrupt Service routine + * MSI-X Admin Queue Interrupt Service routine * **********************************************************************/ int @@ -791,7 +792,7 @@ ixl_configure_intr0_msix(struct ixl_pf *pf) /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt - * in MSIX mode. + * in MSI-X mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ @@ -909,12 +910,12 @@ ixl_free_pci_resources(struct ixl_pf *pf) device_t dev = iflib_get_dev(vsi->ctx); struct ixl_rx_queue *rx_que = vsi->rx_queues; - /* We may get here before stations are setup */ + /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* - ** Release all msix VSI resources: + ** Release all MSI-X VSI resources: */ iflib_irq_free(vsi->ctx, &vsi->irq); @@ -923,7 +924,7 @@ ixl_free_pci_resources(struct ixl_pf *pf) early: if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), pf->pci_mem); + rman_get_rid(pf->pci_mem), pf->pci_mem); } void diff --git a/sys/dev/jedec_dimm/jedec_dimm.c b/sys/dev/jedec_dimm/jedec_dimm.c index 66144cfa6678..10235dc39193 100644 --- a/sys/dev/jedec_dimm/jedec_dimm.c +++ b/sys/dev/jedec_dimm/jedec_dimm.c @@ -5,7 +5,6 @@ * * Copyright (c) 2016 Andriy Gapon <avg@FreeBSD.org> * Copyright (c) 2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/jedec_dimm/jedec_dimm.h b/sys/dev/jedec_dimm/jedec_dimm.h index f6c5485b6e9b..3b330251efc5 100644 --- a/sys/dev/jedec_dimm/jedec_dimm.h +++ b/sys/dev/jedec_dimm/jedec_dimm.h @@ -4,7 +4,6 @@ * Authors: Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 8b508737e328..61739bec59ee 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -830,6 +830,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) struct netmap_kring *kring; u_int n[NR_TXRX]; enum txrx t; + int err = 0; if (na->tx_rings != NULL) { if (netmap_debug & NM_DEBUG_ON) @@ -869,7 +870,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) for (i = 0; i < n[t]; i++) { kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); - kring->na = na; kring->notify_na = na; kring->ring_id = i; kring->tx = t; @@ -895,13 +895,21 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) nm_txrx2str(t), i); ND("ktx %s h %d c %d t %d", kring->name, kring->rhead, kring->rcur, kring->rtail); + err = nm_os_selinfo_init(&kring->si, kring->name); + if (err) { + netmap_krings_delete(na); + return err; + } mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF); - nm_os_selinfo_init(&kring->si); + kring->na = na; /* setting this field marks the mutex as initialized */ + } + err = nm_os_selinfo_init(&na->si[t], na->name); + if (err) { + netmap_krings_delete(na); + return err; } - nm_os_selinfo_init(&na->si[t]); } - return 0; } @@ -925,7 +933,8 @@ netmap_krings_delete(struct netmap_adapter *na) /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&(*kring)->q_lock); + if ((*kring)->na != NULL) + mtx_destroy(&(*kring)->q_lock); nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); @@ -2962,6 +2971,9 @@ nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size) case NETMAP_REQ_OPT_CSB: rv = sizeof(struct nmreq_opt_csb); break; + case NETMAP_REQ_OPT_SYNC_KLOOP_MODE: + rv = sizeof(struct nmreq_opt_sync_kloop_mode); + break; } /* subtract the common header */ return rv - sizeof(struct nmreq_option); @@ -3278,10 +3290,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : - &na->rx_rings[priv->np_qfirst[NR_RX]]->si; - si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : - &na->tx_rings[priv->np_qfirst[NR_TX]]->si; + si[NR_RX] = priv->np_si[NR_RX]; + si[NR_TX] = priv->np_si[NR_TX]; #ifdef __FreeBSD__ /* diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 94bde267a279..f94083f7d044 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -58,6 +58,7 @@ #include <sys/unistd.h> /* RFNOWAIT */ #include <sys/sched.h> /* sched_bind() */ #include <sys/smp.h> /* mp_maxid */ +#include <sys/taskqueue.h> /* taskqueue_enqueue(), taskqueue_create(), ... */ #include <net/if.h> #include <net/if_var.h> #include <net/if_types.h> /* IFT_ETHER */ @@ -75,16 +76,48 @@ /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ -void nm_os_selinfo_init(NM_SELINFO_T *si) { - struct mtx *m = &si->m; - mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); - knlist_init_mtx(&si->si.si_note, m); +static void +nm_kqueue_notify(void *opaque, int pending) +{ + struct nm_selinfo *si = opaque; + + /* We use a non-zero hint to distinguish this notification call + * from the call done in kqueue_scan(), which uses hint=0. + */ + KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100); +} + +int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) { + int err; + + TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si); + si->ntfytq = taskqueue_create(name, M_NOWAIT, + taskqueue_thread_enqueue, &si->ntfytq); + if (si->ntfytq == NULL) + return -ENOMEM; + err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name); + if (err) { + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; + return err; + } + + snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name); + mtx_init(&si->m, si->mtxname, NULL, MTX_DEF); + knlist_init_mtx(&si->si.si_note, &si->m); + + return (0); } void nm_os_selinfo_uninit(NM_SELINFO_T *si) { - /* XXX kqueue(9) needed; these will mirror knlist_init. */ + if (si->ntfytq == NULL) { + return; /* si was not initialized */ + } + taskqueue_drain(si->ntfytq, &si->ntfytask); + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ @@ -1292,13 +1325,18 @@ nm_os_kctx_destroy(struct nm_kctx *nmk) /* * In addition to calling selwakeuppri(), nm_os_selwakeup() also - * needs to call KNOTE to wake up kqueue listeners. - * We use a non-zero 'hint' argument to inform the netmap_knrw() - * function that it is being called from 'nm_os_selwakeup'; this - * is necessary because when netmap_knrw() is called by the kevent - * subsystem (i.e. kevent_scan()) we also need to call netmap_poll(). - * The knote uses a private mutex associated to the 'si' (see struct - * selinfo, struct nm_selinfo, and nm_os_selinfo_init). + * needs to call knote() to wake up kqueue listeners. + * This operation is deferred to a taskqueue in order to avoid possible + * lock order reversals; these may happen because knote() grabs a + * private lock associated to the 'si' (see struct selinfo, + * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup() + * can be called while holding the lock associated to a different + * 'si'. + * When calling knote() we use a non-zero 'hint' argument to inform + * the netmap_knrw() function that it is being called from + * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is + * called by the kevent subsystem (i.e. kevent_scan()) we also need to + * call netmap_poll(). * * The netmap_kqfilter() function registers one or another f_event * depending on read or write mode. A pointer to the struct @@ -1315,11 +1353,7 @@ nm_os_selwakeup(struct nm_selinfo *si) if (netmap_verbose) nm_prinf("on knote %p", &si->si.si_note); selwakeuppri(&si->si, PI_NET); - /* We use a non-zero hint to distinguish this notification call - * from the call done in kqueue_scan(), which uses hint=0. - */ - KNOTE(&si->si.si_note, /*hint=*/0x100, - mtx_owned(&si->m) ? KNF_LISTLOCKED : 0); + taskqueue_enqueue(si->ntfytq, &si->ntfytask); } void diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index e9b83a23532b..3f4b00d814f4 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -133,7 +133,10 @@ struct netmap_adapter *netmap_getna(if_t ifp); struct nm_selinfo { struct selinfo si; + struct taskqueue *ntfytq; + struct task ntfytask; struct mtx m; + char mtxname[32]; }; @@ -295,7 +298,7 @@ struct netmap_priv_d; struct nm_bdg_args; /* os-specific NM_SELINFO_T initialzation/destruction functions */ -void nm_os_selinfo_init(NM_SELINFO_T *); +int nm_os_selinfo_init(NM_SELINFO_T *, const char *name); void nm_os_selinfo_uninit(NM_SELINFO_T *); const char *nm_dump_buf(char *p, int len, int lim, char *dst); @@ -1166,6 +1169,15 @@ nm_kr_txempty(struct netmap_kring *kring) * rxsync_prologue */ #define nm_kr_rxempty(_k) nm_kr_txempty(_k) +/* True if the application needs to wait for more space on the ring + * (more received packets or more free tx slots). + * Only valid after *xsync_prologue. */ +static inline int +nm_kr_wouldblock(struct netmap_kring *kring) +{ + return kring->rcur == kring->nr_hwtail; +} + /* * protect against multiple threads using the same ring. * also check that the ring has not been stopped or locked diff --git a/sys/dev/netmap/netmap_kloop.c b/sys/dev/netmap/netmap_kloop.c index 98536cd03f57..2bd3685a2dff 100644 --- a/sys/dev/netmap/netmap_kloop.c +++ b/sys/dev/netmap/netmap_kloop.c @@ -141,6 +141,9 @@ sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) kring->rcur, kring->rtail, kring->nr_hwtail); } +/* Arguments for netmap_sync_kloop_tx_ring() and + * netmap_sync_kloop_rx_ring(). + */ struct sync_kloop_ring_args { struct netmap_kring *kring; struct nm_csb_atok *csb_atok; @@ -148,6 +151,10 @@ struct sync_kloop_ring_args { #ifdef SYNC_KLOOP_POLL struct eventfd_ctx *irq_ctx; #endif /* SYNC_KLOOP_POLL */ + /* Are we busy waiting rather than using a schedule() loop ? */ + bool busy_wait; + /* Are we processing in the context of VM exit ? */ + bool direct; }; static void @@ -161,10 +168,16 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) uint32_t num_slots; int batch; + if (unlikely(nm_kr_tryget(kring, 1, NULL))) { + return; + } + num_slots = kring->nkr_num_slots; /* Disable application --> kernel notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 0); + if (!a->direct) { + csb_ktoa_kick_enable(csb_ktoa, 0); + } /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); @@ -197,7 +210,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + csb_ktoa_kick_enable(csb_ktoa, 1); + } break; } @@ -206,8 +221,10 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { - /* Reenable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + } nm_prerr("txsync() failed"); break; } @@ -232,7 +249,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) /* Interrupt the application if needed. */ #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { - /* Disable application kick to avoid sending unnecessary kicks */ + /* We could disable kernel --> application kicks here, + * to avoid spurious interrupts. */ eventfd_signal(a->irq_ctx, 1); more_txspace = false; } @@ -241,6 +259,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (shadow_ring.head == kring->rhead) { + if (a->busy_wait) { + break; + } /* * No more packets to transmit. We enable notifications and * go to sleep, waiting for a kick from the application when new @@ -268,6 +289,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) } } + nm_kr_put(kring); + #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { eventfd_signal(a->irq_ctx, 1); @@ -297,13 +320,19 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) bool some_recvd = false; uint32_t num_slots; + if (unlikely(nm_kr_tryget(kring, 1, NULL))) { + return; + } + num_slots = kring->nkr_num_slots; /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ num_slots = kring->nkr_num_slots; /* Disable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 0); + if (!a->direct) { + csb_ktoa_kick_enable(csb_ktoa, 0); + } /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); @@ -315,7 +344,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + csb_ktoa_kick_enable(csb_ktoa, 1); + } break; } @@ -324,8 +355,10 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { - /* Reenable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + } nm_prerr("rxsync() failed"); break; } @@ -351,7 +384,8 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) #ifdef SYNC_KLOOP_POLL /* Interrupt the application if needed. */ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { - /* Disable application kick to avoid sending unnecessary kicks */ + /* We could disable kernel --> application kicks here, + * to avoid spurious interrupts. */ eventfd_signal(a->irq_ctx, 1); some_recvd = false; } @@ -360,6 +394,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (sync_kloop_norxslots(kring, shadow_ring.head)) { + if (a->busy_wait) { + break; + } /* * No more slots available for reception. We enable notification and * go to sleep, waiting for a kick from the application when new receive @@ -401,6 +438,7 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) } #ifdef SYNC_KLOOP_POLL +struct sync_kloop_poll_ctx; struct sync_kloop_poll_entry { /* Support for receiving notifications from * a netmap ring or from the application. */ @@ -411,12 +449,24 @@ struct sync_kloop_poll_entry { /* Support for sending notifications to the application. */ struct eventfd_ctx *irq_ctx; struct file *irq_filp; + + /* Arguments for the ring processing function. Useful + * in case of custom wake-up function. */ + struct sync_kloop_ring_args *args; + struct sync_kloop_poll_ctx *parent; + }; struct sync_kloop_poll_ctx { poll_table wait_table; unsigned int next_entry; + int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *); unsigned int num_entries; + unsigned int num_tx_rings; + unsigned int num_rings; + /* First num_tx_rings entries are for the TX kicks. + * Then the RX kicks entries follow. The last two + * entries are for TX irq, and RX irq. */ struct sync_kloop_poll_entry entries[0]; }; @@ -433,9 +483,77 @@ sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, entry->wqh = wqh; entry->filp = file; /* Use the default wake up function. */ - init_waitqueue_entry(&entry->wait, current); + if (poll_ctx->next_wake_fun == NULL) { + init_waitqueue_entry(&entry->wait, current); + } else { + init_waitqueue_func_entry(&entry->wait, + poll_ctx->next_wake_fun); + } add_wait_queue(wqh, &entry->wait); - poll_ctx->next_entry++; +} + +static int +sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + + netmap_sync_kloop_tx_ring(entry->args); + + return 0; +} + +static int +sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + struct sync_kloop_poll_ctx *poll_ctx = entry->parent; + int i; + + for (i = 0; i < poll_ctx->num_tx_rings; i++) { + struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; + + if (irq_ctx) { + eventfd_signal(irq_ctx, 1); + } + } + + return 0; +} + +static int +sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + + netmap_sync_kloop_rx_ring(entry->args); + + return 0; +} + +static int +sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + struct sync_kloop_poll_ctx *poll_ctx = entry->parent; + int i; + + for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) { + struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; + + if (irq_ctx) { + eventfd_signal(irq_ctx, 1); + } + } + + return 0; } #endif /* SYNC_KLOOP_POLL */ @@ -455,6 +573,10 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) struct nm_csb_ktoa* csb_ktoa_base; struct netmap_adapter *na; struct nmreq_option *opt; + bool na_could_sleep = false; + bool busy_wait = true; + bool direct_tx = false; + bool direct_rx = false; int err = 0; int i; @@ -505,8 +627,44 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) goto out; } + /* Prepare the arguments for netmap_sync_kloop_tx_ring() + * and netmap_sync_kloop_rx_ring(). */ + for (i = 0; i < num_tx_rings; i++) { + struct sync_kloop_ring_args *a = args + i; + + a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; + a->csb_atok = csb_atok_base + i; + a->csb_ktoa = csb_ktoa_base + i; + a->busy_wait = busy_wait; + a->direct = direct_tx; + } + for (i = 0; i < num_rx_rings; i++) { + struct sync_kloop_ring_args *a = args + num_tx_rings + i; + + a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; + a->csb_atok = csb_atok_base + num_tx_rings + i; + a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; + a->busy_wait = busy_wait; + a->direct = direct_rx; + } + /* Validate notification options. */ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_SYNC_KLOOP_MODE); + if (opt != NULL) { + struct nmreq_opt_sync_kloop_mode *mode_opt = + (struct nmreq_opt_sync_kloop_mode *)opt; + + direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX); + direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX); + if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX | + NM_OPT_SYNC_KLOOP_DIRECT_RX)) { + opt->nro_status = err = EINVAL; + goto out; + } + opt->nro_status = 0; + } + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); if (opt != NULL) { err = nmreq_checkduplicate(opt); @@ -524,54 +682,132 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #ifdef SYNC_KLOOP_POLL eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; opt->nro_status = 0; + + /* Check if some ioeventfd entry is not defined, and force sleep + * synchronization in that case. */ + busy_wait = false; + for (i = 0; i < num_rings; i++) { + if (eventfds_opt->eventfds[i].ioeventfd < 0) { + busy_wait = true; + break; + } + } + + if (busy_wait && (direct_tx || direct_rx)) { + /* For direct processing we need all the + * ioeventfds to be valid. */ + opt->nro_status = err = EINVAL; + goto out; + } + /* We need 2 poll entries for TX and RX notifications coming * from the netmap adapter, plus one entries per ring for the * notifications coming from the application. */ poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + - (2 + num_rings) * sizeof(poll_ctx->entries[0])); + (num_rings + 2) * sizeof(poll_ctx->entries[0])); init_poll_funcptr(&poll_ctx->wait_table, sync_kloop_poll_table_queue_proc); poll_ctx->num_entries = 2 + num_rings; + poll_ctx->num_tx_rings = num_tx_rings; + poll_ctx->num_rings = num_rings; poll_ctx->next_entry = 0; - /* Poll for notifications coming from the applications through - * eventfds . */ - for (i = 0; i < num_rings; i++) { - struct eventfd_ctx *irq; - struct file *filp; - unsigned long mask; + poll_ctx->next_wake_fun = NULL; - filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto out; - } - mask = filp->f_op->poll(filp, &poll_ctx->wait_table); - if (mask & POLLERR) { - err = EINVAL; - goto out; - } + if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) { + /* In direct mode, VALE txsync is called from + * wake-up context, where it is not possible + * to sleep. + */ + na->na_flags &= ~NAF_BDG_MAYSLEEP; + na_could_sleep = true; + } + + for (i = 0; i < num_rings + 2; i++) { + poll_ctx->entries[i].args = args + i; + poll_ctx->entries[i].parent = poll_ctx; + } - filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto out; + /* Poll for notifications coming from the applications through + * eventfds. */ + for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) { + struct eventfd_ctx *irq = NULL; + struct file *filp = NULL; + unsigned long mask; + bool tx_ring = (i < num_tx_rings); + + if (eventfds_opt->eventfds[i].irqfd >= 0) { + filp = eventfd_fget( + eventfds_opt->eventfds[i].irqfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + irq = eventfd_ctx_fileget(filp); + if (IS_ERR(irq)) { + err = PTR_ERR(irq); + goto out; + } } poll_ctx->entries[i].irq_filp = filp; - irq = eventfd_ctx_fileget(filp); - if (IS_ERR(irq)) { - err = PTR_ERR(irq); - goto out; - } poll_ctx->entries[i].irq_ctx = irq; + poll_ctx->entries[i].args->busy_wait = busy_wait; + /* Don't let netmap_sync_kloop_*x_ring() use + * IRQs in direct mode. */ + poll_ctx->entries[i].args->irq_ctx = + ((tx_ring && direct_tx) || + (!tx_ring && direct_rx)) ? NULL : + poll_ctx->entries[i].irq_ctx; + poll_ctx->entries[i].args->direct = + (tx_ring ? direct_tx : direct_rx); + + if (!busy_wait) { + filp = eventfd_fget( + eventfds_opt->eventfds[i].ioeventfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + if (tx_ring && direct_tx) { + /* Override the wake up function + * so that it can directly call + * netmap_sync_kloop_tx_ring(). + */ + poll_ctx->next_wake_fun = + sync_kloop_tx_kick_wake_fun; + } else if (!tx_ring && direct_rx) { + /* Same for direct RX. */ + poll_ctx->next_wake_fun = + sync_kloop_rx_kick_wake_fun; + } else { + poll_ctx->next_wake_fun = NULL; + } + mask = filp->f_op->poll(filp, + &poll_ctx->wait_table); + if (mask & POLLERR) { + err = EINVAL; + goto out; + } + } } + /* Poll for notifications coming from the netmap rings bound to * this file descriptor. */ - { + if (!busy_wait) { NMG_LOCK(); + /* In direct mode, override the wake up function so + * that it can forward the netmap_tx_irq() to the + * guest. */ + poll_ctx->next_wake_fun = direct_tx ? + sync_kloop_tx_irq_wake_fun : NULL; poll_wait(priv->np_filp, priv->np_si[NR_TX], &poll_ctx->wait_table); + poll_ctx->next_entry++; + + poll_ctx->next_wake_fun = direct_rx ? + sync_kloop_rx_irq_wake_fun : NULL; poll_wait(priv->np_filp, priv->np_si[NR_RX], &poll_ctx->wait_table); + poll_ctx->next_entry++; NMG_UNLOCK(); } #else /* SYNC_KLOOP_POLL */ @@ -580,30 +816,9 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #endif /* SYNC_KLOOP_POLL */ } - /* Prepare the arguments for netmap_sync_kloop_tx_ring() - * and netmap_sync_kloop_rx_ring(). */ - for (i = 0; i < num_tx_rings; i++) { - struct sync_kloop_ring_args *a = args + i; - - a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; - a->csb_atok = csb_atok_base + i; - a->csb_ktoa = csb_ktoa_base + i; -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) - a->irq_ctx = poll_ctx->entries[i].irq_ctx; -#endif /* SYNC_KLOOP_POLL */ - } - for (i = 0; i < num_rx_rings; i++) { - struct sync_kloop_ring_args *a = args + num_tx_rings + i; - - a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; - a->csb_atok = csb_atok_base + num_tx_rings + i; - a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) - a->irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx; -#endif /* SYNC_KLOOP_POLL */ - } + nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, " + "na_could_sleep %u", busy_wait, direct_tx, direct_rx, + na_could_sleep); /* Main loop. */ for (;;) { @@ -612,7 +827,7 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) } #ifdef SYNC_KLOOP_POLL - if (poll_ctx) { + if (!busy_wait) { /* It is important to set the task state as * interruptible before processing any TX/RX ring, * so that if a notification on ring Y comes after @@ -627,46 +842,37 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #endif /* SYNC_KLOOP_POLL */ /* Process all the TX rings bound to this file descriptor. */ - for (i = 0; i < num_tx_rings; i++) { + for (i = 0; !direct_tx && i < num_tx_rings; i++) { struct sync_kloop_ring_args *a = args + i; - - if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { - continue; - } netmap_sync_kloop_tx_ring(a); - nm_kr_put(a->kring); } /* Process all the RX rings bound to this file descriptor. */ - for (i = 0; i < num_rx_rings; i++) { + for (i = 0; !direct_rx && i < num_rx_rings; i++) { struct sync_kloop_ring_args *a = args + num_tx_rings + i; - - if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { - continue; - } netmap_sync_kloop_rx_ring(a); - nm_kr_put(a->kring); } -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) { - /* If a poll context is present, yield to the scheduler - * waiting for a notification to come either from - * netmap or the application. */ - schedule_timeout(msecs_to_jiffies(3000)); - } else -#endif /* SYNC_KLOOP_POLL */ - { + if (busy_wait) { /* Default synchronization method: sleep for a while. */ usleep_range(sleep_us, sleep_us); } +#ifdef SYNC_KLOOP_POLL + else { + /* Yield to the scheduler waiting for a notification + * to come either from netmap or the application. */ + schedule_timeout(msecs_to_jiffies(3000)); + } +#endif /* SYNC_KLOOP_POLL */ } out: #ifdef SYNC_KLOOP_POLL if (poll_ctx) { /* Stop polling from netmap and the eventfds, and deallocate * the poll context. */ - __set_current_state(TASK_RUNNING); + if (!busy_wait) { + __set_current_state(TASK_RUNNING); + } for (i = 0; i < poll_ctx->next_entry; i++) { struct sync_kloop_poll_entry *entry = poll_ctx->entries + i; @@ -696,6 +902,9 @@ out: /* Reset the kloop state. */ NMG_LOCK(); priv->np_kloop_state = 0; + if (na_could_sleep) { + na->na_flags |= NAF_BDG_MAYSLEEP; + } NMG_UNLOCK(); return err; @@ -770,14 +979,14 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, atok->appl_need_kick = 0; /* - * First part: tell the host (updating the CSB) to process the new - * packets. + * First part: tell the host to process the new packets, + * updating the CSB. */ kring->nr_hwcur = ktoa->hwcur; nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); /* Ask for a kick from a guest to the host if needed. */ - if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring)) + if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || (flags & NAF_FORCE_RECLAIM)) { atok->sync_flags = flags; @@ -787,7 +996,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, /* * Second part: reclaim buffers for completed transmissions. */ - if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) { + if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) { nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); } @@ -797,7 +1006,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, * go to sleep and we need to be notified by the host when more free * space is available. */ - if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ @@ -805,7 +1014,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there is new free space, disable notifications */ - if (unlikely(!nm_kr_txempty(kring))) { + if (unlikely(!nm_kr_wouldblock(kring))) { atok->appl_need_kick = 0; } } @@ -851,11 +1060,6 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, */ if (kring->rhead != kring->nr_hwcur) { nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); - /* Ask for a kick from the guest to the host if needed. */ - if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { - atok->sync_flags = flags; - notify = true; - } } /* @@ -863,7 +1067,7 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, * we need to be notified by the host when more RX slots have been * completed. */ - if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ @@ -871,11 +1075,18 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there are new slots, disable notifications. */ - if (!nm_kr_rxempty(kring)) { + if (!nm_kr_wouldblock(kring)) { atok->appl_need_kick = 0; } } + /* Ask for a kick from the guest to the host if needed. */ + if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) + && NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = flags; + notify = true; + } + nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", kring->name, atok->head, atok->cur, ktoa->hwtail, kring->rhead, kring->rcur, kring->nr_hwtail); diff --git a/sys/dev/nvdimm/nvdimm.c b/sys/dev/nvdimm/nvdimm.c index bdc4f57ab2dd..d4c22dff2409 100644 --- a/sys/dev/nvdimm/nvdimm.c +++ b/sys/dev/nvdimm/nvdimm.c @@ -77,99 +77,6 @@ nvdimm_find_by_handle(nfit_handle_t nv_handle) } static int -nvdimm_parse_flush_addr(void *nfitsubtbl, void *arg) -{ - ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr; - struct nvdimm_dev *nv; - int i; - - nfitflshaddr = nfitsubtbl; - nv = arg; - if (nfitflshaddr->DeviceHandle != nv->nv_handle) - return (0); - - MPASS(nv->nv_flush_addr == NULL && nv->nv_flush_addr_cnt == 0); - nv->nv_flush_addr = mallocarray(nfitflshaddr->HintCount, - sizeof(uint64_t *), M_NVDIMM, M_WAITOK); - for (i = 0; i < nfitflshaddr->HintCount; i++) - nv->nv_flush_addr[i] = (uint64_t *)nfitflshaddr->HintAddress[i]; - nv->nv_flush_addr_cnt = nfitflshaddr->HintCount; - return (0); -} - -int -nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type, - int (*cb)(void *, void *), void *arg) -{ - ACPI_NFIT_HEADER *nfithdr; - ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; - ACPI_NFIT_MEMORY_MAP *nfitmap; - ACPI_NFIT_INTERLEAVE *nfitintrl; - ACPI_NFIT_SMBIOS *nfitsmbios; - ACPI_NFIT_CONTROL_REGION *nfitctlreg; - ACPI_NFIT_DATA_REGION *nfitdtreg; - ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr; - char *ptr; - int error; - - error = 0; - for (ptr = (char *)(nfitbl + 1); - ptr < (char *)nfitbl + nfitbl->Header.Length; - ptr += nfithdr->Length) { - nfithdr = (ACPI_NFIT_HEADER *)ptr; - if (nfithdr->Type != type) - continue; - switch (nfithdr->Type) { - case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: - nfitaddr = __containerof(nfithdr, - ACPI_NFIT_SYSTEM_ADDRESS, Header); - error = cb(nfitaddr, arg); - break; - case ACPI_NFIT_TYPE_MEMORY_MAP: - nfitmap = __containerof(nfithdr, - ACPI_NFIT_MEMORY_MAP, Header); - error = cb(nfitmap, arg); - break; - case ACPI_NFIT_TYPE_INTERLEAVE: - nfitintrl = __containerof(nfithdr, - ACPI_NFIT_INTERLEAVE, Header); - error = cb(nfitintrl, arg); - break; - case ACPI_NFIT_TYPE_SMBIOS: - nfitsmbios = __containerof(nfithdr, - ACPI_NFIT_SMBIOS, Header); - error = cb(nfitsmbios, arg); - break; - case ACPI_NFIT_TYPE_CONTROL_REGION: - nfitctlreg = __containerof(nfithdr, - ACPI_NFIT_CONTROL_REGION, Header); - error = cb(nfitctlreg, arg); - break; - case ACPI_NFIT_TYPE_DATA_REGION: - nfitdtreg = __containerof(nfithdr, - ACPI_NFIT_DATA_REGION, Header); - error = cb(nfitdtreg, arg); - break; - case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - nfitflshaddr = __containerof(nfithdr, - ACPI_NFIT_FLUSH_ADDRESS, Header); - error = cb(nfitflshaddr, arg); - break; - case ACPI_NFIT_TYPE_RESERVED: - default: - if (bootverbose) - printf("NFIT subtype %d unknown\n", - nfithdr->Type); - error = 0; - break; - } - if (error != 0) - break; - } - return (error); -} - -static int nvdimm_probe(device_t dev) { @@ -197,8 +104,8 @@ nvdimm_attach(device_t dev) device_printf(dev, "cannot get NFIT\n"); return (ENXIO); } - nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_FLUSH_ADDRESS, - nvdimm_parse_flush_addr, nv); + acpi_nfit_get_flush_addrs(nfitbl, nv->nv_handle, &nv->nv_flush_addr, + &nv->nv_flush_addr_cnt); AcpiPutTable(&nfitbl->Header); return (0); } @@ -228,33 +135,93 @@ nvdimm_resume(device_t dev) } static ACPI_STATUS -nvdimm_root_create_dev(ACPI_HANDLE handle, UINT32 nesting_level, void *context, +find_dimm(ACPI_HANDLE handle, UINT32 nesting_level, void *context, void **return_value) { - ACPI_STATUS status; ACPI_DEVICE_INFO *device_info; - device_t parent, child; - uintptr_t *ivars; + ACPI_STATUS status; - parent = context; - child = BUS_ADD_CHILD(parent, 100, "nvdimm", -1); - if (child == NULL) { - device_printf(parent, "failed to create nvdimm\n"); - return_ACPI_STATUS(AE_ERROR); - } status = AcpiGetObjectInfo(handle, &device_info); - if (ACPI_FAILURE(status)) { - device_printf(parent, "failed to get nvdimm device info\n"); + if (ACPI_FAILURE(status)) return_ACPI_STATUS(AE_ERROR); + if (device_info->Address == (uintptr_t)context) { + *(ACPI_HANDLE *)return_value = handle; + return_ACPI_STATUS(AE_CTRL_TERMINATE); } - ivars = mallocarray(NVDIMM_ROOT_IVAR_MAX - 1, sizeof(uintptr_t), - M_NVDIMM, M_ZERO | M_WAITOK); - device_set_ivars(child, ivars); - nvdimm_root_set_acpi_handle(child, handle); - nvdimm_root_set_device_handle(child, device_info->Address); return_ACPI_STATUS(AE_OK); } +static ACPI_HANDLE +get_dimm_acpi_handle(ACPI_HANDLE root_handle, nfit_handle_t adr) +{ + ACPI_HANDLE res; + ACPI_STATUS status; + + res = NULL; + status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, root_handle, 1, find_dimm, + NULL, (void *)(uintptr_t)adr, &res); + if (ACPI_FAILURE(status)) + res = NULL; + return (res); +} + +static int +nvdimm_root_create_devs(device_t dev, ACPI_TABLE_NFIT *nfitbl) +{ + ACPI_HANDLE root_handle, dimm_handle; + device_t child; + nfit_handle_t *dimm_ids, *dimm; + uintptr_t *ivars; + int num_dimm_ids; + + root_handle = acpi_get_handle(dev); + acpi_nfit_get_dimm_ids(nfitbl, &dimm_ids, &num_dimm_ids); + for (dimm = dimm_ids; dimm < dimm_ids + num_dimm_ids; dimm++) { + dimm_handle = get_dimm_acpi_handle(root_handle, *dimm); + child = BUS_ADD_CHILD(dev, 100, "nvdimm", -1); + if (child == NULL) { + device_printf(dev, "failed to create nvdimm\n"); + return (ENXIO); + } + ivars = mallocarray(NVDIMM_ROOT_IVAR_MAX, sizeof(uintptr_t), + M_NVDIMM, M_ZERO | M_WAITOK); + device_set_ivars(child, ivars); + nvdimm_root_set_acpi_handle(child, dimm_handle); + nvdimm_root_set_device_handle(child, *dimm); + } + free(dimm_ids, M_NVDIMM); + return (0); +} + +static int +nvdimm_root_create_spas(struct nvdimm_root_dev *dev, ACPI_TABLE_NFIT *nfitbl) +{ + ACPI_NFIT_SYSTEM_ADDRESS **spas, **spa; + struct SPA_mapping *spa_mapping; + enum SPA_mapping_type spa_type; + int error, num_spas; + + error = 0; + acpi_nfit_get_spa_ranges(nfitbl, &spas, &num_spas); + for (spa = spas; spa < spas + num_spas; spa++) { + spa_type = nvdimm_spa_type_from_uuid( + (struct uuid *)(*spa)->RangeGuid); + if (spa_type == SPA_TYPE_UNKNOWN) + continue; + spa_mapping = malloc(sizeof(struct SPA_mapping), M_NVDIMM, + M_WAITOK | M_ZERO); + error = nvdimm_spa_init(spa_mapping, *spa, spa_type); + if (error != 0) { + nvdimm_spa_fini(spa_mapping); + free(spa, M_NVDIMM); + break; + } + SLIST_INSERT_HEAD(&dev->spas, spa_mapping, link); + } + free(spas, M_NVDIMM); + return (error); +} + static char *nvdimm_root_id[] = {"ACPI0012", NULL}; static int @@ -274,25 +241,42 @@ nvdimm_root_probe(device_t dev) static int nvdimm_root_attach(device_t dev) { - ACPI_HANDLE handle; + struct nvdimm_root_dev *root; + ACPI_TABLE_NFIT *nfitbl; ACPI_STATUS status; int error; - handle = acpi_get_handle(dev); - status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, handle, 1, - nvdimm_root_create_dev, NULL, dev, NULL); - if (ACPI_FAILURE(status)) - device_printf(dev, "failed adding children\n"); + status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); + if (ACPI_FAILURE(status)) { + device_printf(dev, "cannot get NFIT\n"); + return (ENXIO); + } + error = nvdimm_root_create_devs(dev, nfitbl); + if (error != 0) + return (error); error = bus_generic_attach(dev); + if (error != 0) + return (error); + root = device_get_softc(dev); + error = nvdimm_root_create_spas(root, nfitbl); + AcpiPutTable(&nfitbl->Header); return (error); } static int nvdimm_root_detach(device_t dev) { + struct nvdimm_root_dev *root; + struct SPA_mapping *spa, *next; device_t *children; int i, error, num_children; + root = device_get_softc(dev); + SLIST_FOREACH_SAFE(spa, &root->spas, link, next) { + nvdimm_spa_fini(spa); + SLIST_REMOVE_HEAD(&root->spas, link); + free(spa, M_NVDIMM); + } error = bus_generic_detach(dev); if (error != 0) return (error); @@ -356,6 +340,7 @@ static device_method_t nvdimm_root_methods[] = { static driver_t nvdimm_root_driver = { "nvdimm_root", nvdimm_root_methods, + sizeof(struct nvdimm_root_dev), }; DRIVER_MODULE(nvdimm_root, acpi, nvdimm_root_driver, nvdimm_root_devclass, NULL, diff --git a/sys/dev/nvdimm/nvdimm_nfit.c b/sys/dev/nvdimm/nvdimm_nfit.c new file mode 100644 index 000000000000..ae00b88f8018 --- /dev/null +++ b/sys/dev/nvdimm/nvdimm_nfit.c @@ -0,0 +1,203 @@ +/*- + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bio.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/uuid.h> + +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> +#include <dev/nvdimm/nvdimm_var.h> + +static int +uint32_t_compare(const void *a, const void *b) +{ + + return (*(const uint32_t *)a - *(const uint32_t *)b); +} + +static int +find_matches(ACPI_TABLE_NFIT *nfitbl, uint16_t type, uint16_t offset, + uint64_t mask, uint64_t value, void **ptrs, int ptrs_len) +{ + ACPI_NFIT_HEADER *h, *end; + uint64_t val; + size_t load_size; + int count; + + h = (ACPI_NFIT_HEADER *)(nfitbl + 1); + end = (ACPI_NFIT_HEADER *)((char *)nfitbl + + nfitbl->Header.Length); + load_size = roundup2(flsl(mask), 8) / 8; + count = 0; + + while (h < end) { + if (h->Type == type) { + bcopy((char *)h + offset, &val, load_size); + val &= mask; + if (val == value) { + if (ptrs_len > 0) { + ptrs[count] = h; + ptrs_len--; + } + count++; + } + } + if (h->Length == 0) + break; + h = (ACPI_NFIT_HEADER *)((char *)h + h->Length); + } + return (count); +} + +static void +malloc_find_matches(ACPI_TABLE_NFIT *nfitbl, uint16_t type, uint16_t offset, + uint64_t mask, uint64_t value, void ***ptrs, int *ptrs_len) +{ + int count; + + count = find_matches(nfitbl, type, offset, mask, value, NULL, 0); + *ptrs_len = count; + if (count == 0) { + *ptrs = NULL; + return; + } + *ptrs = mallocarray(count, sizeof(void *), M_NVDIMM, M_WAITOK); + find_matches(nfitbl, type, offset, mask, value, *ptrs, *ptrs_len); +} + +void +acpi_nfit_get_dimm_ids(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t **listp, + int *countp) +{ + ACPI_NFIT_SYSTEM_ADDRESS **spas; + ACPI_NFIT_MEMORY_MAP ***regions; + int i, j, k, maxids, num_spas, *region_counts; + + acpi_nfit_get_spa_ranges(nfitbl, &spas, &num_spas); + if (num_spas == 0) { + *listp = NULL; + *countp = 0; + return; + } + regions = mallocarray(num_spas, sizeof(uint16_t *), M_NVDIMM, + M_WAITOK); + region_counts = mallocarray(num_spas, sizeof(int), M_NVDIMM, M_WAITOK); + for (i = 0; i < num_spas; i++) { + acpi_nfit_get_region_mappings_by_spa_range(nfitbl, + spas[i]->RangeIndex, ®ions[i], ®ion_counts[i]); + } + maxids = 0; + for (i = 0; i < num_spas; i++) { + maxids += region_counts[i]; + } + *listp = mallocarray(maxids, sizeof(nfit_handle_t), M_NVDIMM, M_WAITOK); + k = 0; + for (i = 0; i < num_spas; i++) { + for (j = 0; j < region_counts[i]; j++) + (*listp)[k++] = regions[i][j]->DeviceHandle; + } + qsort((*listp), maxids, sizeof(uint32_t), uint32_t_compare); + i = 0; + for (j = 1; j < maxids; j++) { + if ((*listp)[i] != (*listp)[j]) + (*listp)[++i] = (*listp)[j]; + } + *countp = i + 1; + free(region_counts, M_NVDIMM); + for (i = 0; i < num_spas; i++) + free(regions[i], M_NVDIMM); + free(regions, M_NVDIMM); + free(spas, M_NVDIMM); +} + +void +acpi_nfit_get_spa_range(ACPI_TABLE_NFIT *nfitbl, uint16_t range_index, + ACPI_NFIT_SYSTEM_ADDRESS **spa) +{ + + *spa = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, + offsetof(ACPI_NFIT_SYSTEM_ADDRESS, RangeIndex), UINT16_MAX, + range_index, (void **)spa, 1); +} + +void +acpi_nfit_get_spa_ranges(ACPI_TABLE_NFIT *nfitbl, + ACPI_NFIT_SYSTEM_ADDRESS ***listp, int *countp) +{ + + malloc_find_matches(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, 0, 0, 0, + (void ***)listp, countp); +} + +void +acpi_nfit_get_region_mappings_by_spa_range(ACPI_TABLE_NFIT *nfitbl, + uint16_t spa_range_index, ACPI_NFIT_MEMORY_MAP ***listp, int *countp) +{ + + malloc_find_matches(nfitbl, ACPI_NFIT_TYPE_MEMORY_MAP, + offsetof(ACPI_NFIT_MEMORY_MAP, RangeIndex), UINT16_MAX, + spa_range_index, (void ***)listp, countp); +} + +void acpi_nfit_get_control_region(ACPI_TABLE_NFIT *nfitbl, + uint16_t control_region_index, ACPI_NFIT_CONTROL_REGION **out) +{ + + *out = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_CONTROL_REGION, + offsetof(ACPI_NFIT_CONTROL_REGION, RegionIndex), UINT16_MAX, + control_region_index, (void **)out, 1); +} + +void +acpi_nfit_get_flush_addrs(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t dimm, + uint64_t ***listp, int *countp) +{ + ACPI_NFIT_FLUSH_ADDRESS *subtable; + int i; + + subtable = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_FLUSH_ADDRESS, + offsetof(ACPI_NFIT_FLUSH_ADDRESS, DeviceHandle), UINT32_MAX, + dimm, (void **)&subtable, 1); + if (subtable == NULL || subtable->HintCount == 0) { + *listp = NULL; + *countp = 0; + return; + } + *countp = subtable->HintCount; + *listp = mallocarray(subtable->HintCount, sizeof(uint64_t *), M_NVDIMM, + M_WAITOK); + for (i = 0; i < subtable->HintCount; i++) + (*listp)[i] = (uint64_t *)(intptr_t)subtable->HintAddress[i]; +} diff --git a/sys/dev/nvdimm/nvdimm_spa.c b/sys/dev/nvdimm/nvdimm_spa.c index b621193f951d..0bbb41746d85 100644 --- a/sys/dev/nvdimm/nvdimm_spa.c +++ b/sys/dev/nvdimm/nvdimm_spa.c @@ -82,19 +82,6 @@ __FBSDID("$FreeBSD$"); #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \ {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}} -struct SPA_mapping *spa_mappings; -int spa_mappings_cnt; - -static int -nvdimm_spa_count(void *nfitsubtbl __unused, void *arg) -{ - int *cnt; - - cnt = arg; - (*cnt)++; - return (0); -} - static struct nvdimm_SPA_uuid_list_elm { const char *u_name; struct uuid u_id; @@ -419,22 +406,17 @@ nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e) return (0); } -static g_init_t nvdimm_spa_g_init; -static g_fini_t nvdimm_spa_g_fini; - struct g_class nvdimm_spa_g_class = { .name = "SPA", .version = G_VERSION, .start = nvdimm_spa_g_start, .access = nvdimm_spa_g_access, - .init = nvdimm_spa_g_init, - .fini = nvdimm_spa_g_fini, }; DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa); -static int -nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, - int spa_type) +int +nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, + enum SPA_mapping_type spa_type) { struct make_dev_args mda; struct sglist *spa_sg; @@ -512,7 +494,7 @@ nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, if (error1 == 0) error1 = error; } else { - g_topology_assert(); + g_topology_lock(); spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d", spa->spa_nfit_idx); spa->spa_g->softc = spa; @@ -526,12 +508,13 @@ nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx, DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); + g_topology_unlock(); } return (error1); } -static void -nvdimm_spa_fini_one(struct SPA_mapping *spa) +void +nvdimm_spa_fini(struct SPA_mapping *spa) { mtx_lock(&spa->spa_g_mtx); @@ -563,87 +546,3 @@ nvdimm_spa_fini_one(struct SPA_mapping *spa) mtx_destroy(&spa->spa_g_mtx); mtx_destroy(&spa->spa_g_stat_mtx); } - -static int -nvdimm_spa_parse(void *nfitsubtbl, void *arg) -{ - ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; - struct SPA_mapping *spa; - enum SPA_mapping_type spa_type; - int error, *i; - - i = arg; - spa = &spa_mappings[(*i)++]; - nfitaddr = nfitsubtbl; - spa_type = nvdimm_spa_type_from_uuid( - (struct uuid *)&nfitaddr->RangeGuid); - if (spa_type == SPA_TYPE_UNKNOWN) { - printf("Unknown SPA UUID %d ", nfitaddr->RangeIndex); - printf_uuid((struct uuid *)&nfitaddr->RangeGuid); - printf("\n"); - return (0); - } - error = nvdimm_spa_init_one(spa, nfitaddr, spa_type); - if (error != 0) - nvdimm_spa_fini_one(spa); - return (0); -} - -static int -nvdimm_spa_init1(ACPI_TABLE_NFIT *nfitbl) -{ - int error, i; - - error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, - nvdimm_spa_count, &spa_mappings_cnt); - if (error != 0) - return (error); - spa_mappings = malloc(sizeof(struct SPA_mapping) * spa_mappings_cnt, - M_NVDIMM, M_WAITOK | M_ZERO); - i = 0; - error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, - nvdimm_spa_parse, &i); - if (error != 0) { - free(spa_mappings, M_NVDIMM); - spa_mappings = NULL; - return (error); - } - return (0); -} - -static void -nvdimm_spa_g_init(struct g_class *mp __unused) -{ - ACPI_TABLE_NFIT *nfitbl; - ACPI_STATUS status; - int error; - - spa_mappings_cnt = 0; - spa_mappings = NULL; - if (acpi_disabled("nvdimm")) - return; - status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); - if (ACPI_FAILURE(status)) { - if (bootverbose) - printf("nvdimm_spa_g_init: cannot find NFIT\n"); - return; - } - error = nvdimm_spa_init1(nfitbl); - if (error != 0) - printf("nvdimm_spa_g_init: error %d\n", error); - AcpiPutTable(&nfitbl->Header); -} - -static void -nvdimm_spa_g_fini(struct g_class *mp __unused) -{ - int i; - - if (spa_mappings == NULL) - return; - for (i = 0; i < spa_mappings_cnt; i++) - nvdimm_spa_fini_one(&spa_mappings[i]); - free(spa_mappings, M_NVDIMM); - spa_mappings = NULL; - spa_mappings_cnt = 0; -} diff --git a/sys/dev/nvdimm/nvdimm_var.h b/sys/dev/nvdimm/nvdimm_var.h index c70476e38b98..c4f61988cc39 100644 --- a/sys/dev/nvdimm/nvdimm_var.h +++ b/sys/dev/nvdimm/nvdimm_var.h @@ -44,6 +44,10 @@ __BUS_ACCESSOR(nvdimm_root, acpi_handle, NVDIMM_ROOT, ACPI_HANDLE, ACPI_HANDLE) __BUS_ACCESSOR(nvdimm_root, device_handle, NVDIMM_ROOT, DEVICE_HANDLE, nfit_handle_t) +struct nvdimm_root_dev { + SLIST_HEAD(, SPA_mapping) spas; +}; + struct nvdimm_dev { device_t nv_dev; nfit_handle_t nv_handle; @@ -64,6 +68,7 @@ enum SPA_mapping_type { }; struct SPA_mapping { + SLIST_ENTRY(SPA_mapping) link; enum SPA_mapping_type spa_type; int spa_domain; int spa_nfit_idx; @@ -84,14 +89,24 @@ struct SPA_mapping { bool spa_g_proc_exiting; }; -extern struct SPA_mapping *spa_mappings; -extern int spa_mappings_cnt; - MALLOC_DECLARE(M_NVDIMM); +void acpi_nfit_get_dimm_ids(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t **listp, + int *countp); +void acpi_nfit_get_spa_range(ACPI_TABLE_NFIT *nfitbl, uint16_t range_index, + ACPI_NFIT_SYSTEM_ADDRESS **spa); +void acpi_nfit_get_spa_ranges(ACPI_TABLE_NFIT *nfitbl, + ACPI_NFIT_SYSTEM_ADDRESS ***listp, int *countp); +void acpi_nfit_get_region_mappings_by_spa_range(ACPI_TABLE_NFIT *nfitbl, + uint16_t spa_range_index, ACPI_NFIT_MEMORY_MAP ***listp, int *countp); +void acpi_nfit_get_control_region(ACPI_TABLE_NFIT *nfitbl, + uint16_t control_region_index, ACPI_NFIT_CONTROL_REGION **out); +void acpi_nfit_get_flush_addrs(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t dimm, + uint64_t ***listp, int *countp); enum SPA_mapping_type nvdimm_spa_type_from_uuid(struct uuid *); struct nvdimm_dev *nvdimm_find_by_handle(nfit_handle_t nv_handle); -int nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type, - int (*cb)(void *, void *), void *arg); +int nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, + enum SPA_mapping_type spa_type); +void nvdimm_spa_fini(struct SPA_mapping *spa); #endif /* __DEV_NVDIMM_VAR_H__ */ diff --git a/sys/dev/qlnx/qlnxe/ecore_iwarp.c b/sys/dev/qlnx/qlnxe/ecore_iwarp.c new file mode 100644 index 000000000000..eec3613499a9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_iwarp.c @@ -0,0 +1,3970 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_iwarp.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_hw.h" +#include "ecore_hsi_iwarp.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28123) +#pragma warning(disable : 28167) +#endif + +/* Default values used for MPA Rev 1 */ +#define ECORE_IWARP_ORD_DEFAULT 32 +#define ECORE_IWARP_IRD_DEFAULT 32 + +#define ECORE_IWARP_MAX_FW_MSS 4120 + +struct mpa_v2_hdr { + __be16 ird; + __be16 ord; +}; + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_SEND_RTR 0x4000 /* on ird */ +#define MPA_V2_READ_RTR 0x4000 /* on ord */ +#define MPA_V2_WRITE_RTR 0x8000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define MPA_REV2(_mpa_rev) (_mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) + +#define ECORE_IWARP_INVALID_TCP_CID 0xffffffff +/* How many times fin will be sent before FW aborts and send RST */ +#define ECORE_IWARP_MAX_FIN_RT_DEFAULT 2 +#define ECORE_IWARP_RCV_WND_SIZE_MIN (0xffff) +/* INTERNAL: These numbers are derived from BRB buffer sizes to obtain optimal performance */ +#define ECORE_IWARP_RCV_WND_SIZE_BB_DEF_2_PORTS (200*1024) +#define ECORE_IWARP_RCV_WND_SIZE_BB_DEF_4_PORTS (100*1024) +#define ECORE_IWARP_RCV_WND_SIZE_AH_DEF_2_PORTS (150*1024) +#define ECORE_IWARP_RCV_WND_SIZE_AH_DEF_4_PORTS (90*1024) +#define ECORE_IWARP_MAX_WND_SCALE (14) +/* Timestamp header is the length of the timestamp option (10): + * kind:8 bit, length:8 bit, timestamp:32 bit, ack: 32bit + * rounded up to a multiple of 4 + */ +#define TIMESTAMP_HEADER_SIZE (12) + +static enum _ecore_status_t +ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 fw_return_code); + +static enum _ecore_status_t +ecore_iwarp_empty_ramrod(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener); + +static OSAL_INLINE struct ecore_iwarp_fpdu * +ecore_iwarp_get_curr_fpdu(struct ecore_hwfn *p_hwfn, u16 cid); + +/* Override devinfo with iWARP specific values */ +void +ecore_iwarp_init_devinfo(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE; + dev->max_qp = OSAL_MIN_T(u64, + IWARP_MAX_QPS, + p_hwfn->p_rdma_info->num_qps) - + ECORE_IWARP_PREALLOC_CNT; + + dev->max_cq = dev->max_qp; + + dev->max_qp_resp_rd_atomic_resc = ECORE_IWARP_IRD_DEFAULT; + dev->max_qp_req_rd_atomic_resc = ECORE_IWARP_ORD_DEFAULT; +} + +enum _ecore_status_t +ecore_iwarp_init_hw(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP; + ecore_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1); + p_hwfn->b_rdma_enabled_in_prs = true; + + return 0; +} + +void +ecore_iwarp_init_fw_ramrod(struct ecore_hwfn *p_hwfn, + struct iwarp_init_func_ramrod_data *p_ramrod) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ooo handle = %d\n", + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle); + + p_ramrod->iwarp.ll2_ooo_q_index = + p_hwfn->hw_info.resc_start[ECORE_LL2_QUEUE] + + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle; + + p_ramrod->tcp.max_fin_rt = ECORE_IWARP_MAX_FIN_RT_DEFAULT; + return; +} + +static enum _ecore_status_t +ecore_iwarp_alloc_cid(struct ecore_hwfn *p_hwfn, u32 *cid) +{ + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + *cid += ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "Failed in allocating iwarp cid\n"); + return rc; + } + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, *cid); + + if (rc != ECORE_SUCCESS) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + *cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + *cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + } + + return rc; +} + +static void +ecore_iwarp_set_tcp_cid(struct ecore_hwfn *p_hwfn, u32 cid) +{ + cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_set_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +/* This function allocates a cid for passive tcp ( called from syn receive) + * the reason it's separate from the regular cid allocation is because it + * is assured that these cids already have ilt alloacted. They are preallocated + * to ensure that we won't need to allocate memory during syn processing + */ +static enum _ecore_status_t +ecore_iwarp_alloc_tcp_cid(struct ecore_hwfn *p_hwfn, u32 *cid) +{ + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + *cid += ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "can't allocate iwarp tcp cid max-count=%d\n", + p_hwfn->p_rdma_info->tcp_cid_map.max_count); + + *cid = ECORE_IWARP_INVALID_TCP_CID; + } + + return rc; +} + +/* We have two cid maps, one for tcp which should be used only from passive + * syn processing and replacing a pre-allocated ep in the list. the second + * for active tcp and for QPs. + */ +static void ecore_iwarp_cid_cleaned(struct ecore_hwfn *p_hwfn, u32 cid) +{ + cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + if (cid < ECORE_IWARP_PREALLOC_CNT) { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + } else { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + } + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +enum _ecore_status_t +ecore_iwarp_create_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct iwarp_create_qp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u16 physical_queue; + u32 cid; + + qp->shared_queue = + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + if (!qp->shared_queue) { + DP_NOTICE(p_hwfn, false, + "ecore iwarp create qp failed: cannot allocate memory (shared queue).\n"); + return ECORE_NOMEM; + } else { + out_params->sq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->sq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->rq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + out_params->rq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + } + + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + goto err1; + + qp->icid = (u16)cid; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.cid = qp->icid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_CREATE_QP, + PROTOCOLID_IWARP, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.iwarp_create_qp; + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, + qp->signal_all); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, + qp->use_srq); + + p_ramrod->pd = qp->pd; + p_ramrod->sq_num_pages = qp->sq_num_pages; + p_ramrod->rq_num_pages = qp->rq_num_pages; + + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + + p_ramrod->cq_cid_for_sq = + OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + p_ramrod->cq_cid_for_rq = + OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->rq_cq_id); + + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + physical_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_ramrod->physical_q0 = OSAL_CPU_TO_LE16(physical_queue); + physical_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_ramrod->physical_q1 = OSAL_CPU_TO_LE16(physical_queue); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + goto err1; + + return rc; + +err1: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->shared_queue, + qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_modify_fw(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct iwarp_modify_qp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MODIFY_QP, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.iwarp_modify_qp; + SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN, + 0x1); + if (qp->iwarp_state == ECORE_IWARP_QP_STATE_CLOSING) + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING; + else + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x)rc=%d\n", + qp->icid, rc); + + return rc; +} + +enum ecore_iwarp_qp_state +ecore_roce2iwarp_state(enum ecore_roce_qp_state state) +{ + switch (state) { + case ECORE_ROCE_QP_STATE_RESET: + case ECORE_ROCE_QP_STATE_INIT: + case ECORE_ROCE_QP_STATE_RTR: + return ECORE_IWARP_QP_STATE_IDLE; + case ECORE_ROCE_QP_STATE_RTS: + return ECORE_IWARP_QP_STATE_RTS; + case ECORE_ROCE_QP_STATE_SQD: + return ECORE_IWARP_QP_STATE_CLOSING; + case ECORE_ROCE_QP_STATE_ERR: + return ECORE_IWARP_QP_STATE_ERROR; + case ECORE_ROCE_QP_STATE_SQE: + return ECORE_IWARP_QP_STATE_TERMINATE; + } + return ECORE_IWARP_QP_STATE_ERROR; +} + +static enum ecore_roce_qp_state +ecore_iwarp2roce_state(enum ecore_iwarp_qp_state state) +{ + switch (state) { + case ECORE_IWARP_QP_STATE_IDLE: + return ECORE_ROCE_QP_STATE_INIT; + case ECORE_IWARP_QP_STATE_RTS: + return ECORE_ROCE_QP_STATE_RTS; + case ECORE_IWARP_QP_STATE_TERMINATE: + return ECORE_ROCE_QP_STATE_SQE; + case ECORE_IWARP_QP_STATE_CLOSING: + return ECORE_ROCE_QP_STATE_SQD; + case ECORE_IWARP_QP_STATE_ERROR: + return ECORE_ROCE_QP_STATE_ERR; + } + return ECORE_ROCE_QP_STATE_ERR; +} + +const char *iwarp_state_names[] = { + "IDLE", + "RTS", + "TERMINATE", + "CLOSING", + "ERROR", +}; + +enum _ecore_status_t +ecore_iwarp_modify_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + enum ecore_iwarp_qp_state new_state, + bool internal) +{ + enum ecore_iwarp_qp_state prev_iw_state; + enum _ecore_status_t rc = 0; + bool modify_fw = false; + + /* modify QP can be called from upper-layer or as a result of async + * RST/FIN... therefore need to protect + */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + prev_iw_state = qp->iwarp_state; + + if (prev_iw_state == new_state) { + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + return ECORE_SUCCESS; + } + + switch (prev_iw_state) { + case ECORE_IWARP_QP_STATE_IDLE: + switch (new_state) { + case ECORE_IWARP_QP_STATE_RTS: + qp->iwarp_state = ECORE_IWARP_QP_STATE_RTS; + break; + case ECORE_IWARP_QP_STATE_ERROR: + qp->iwarp_state = ECORE_IWARP_QP_STATE_ERROR; + if (!internal) + modify_fw = true; + break; + default: + break; + } + break; + case ECORE_IWARP_QP_STATE_RTS: + switch (new_state) { + case ECORE_IWARP_QP_STATE_CLOSING: + if (!internal) + modify_fw = true; + + qp->iwarp_state = ECORE_IWARP_QP_STATE_CLOSING; + break; + case ECORE_IWARP_QP_STATE_ERROR: + if (!internal) + modify_fw = true; + qp->iwarp_state = ECORE_IWARP_QP_STATE_ERROR; + break; + default: + break; + } + break; + case ECORE_IWARP_QP_STATE_ERROR: + switch (new_state) { + case ECORE_IWARP_QP_STATE_IDLE: + /* TODO: destroy flow -> need to destroy EP&QP */ + qp->iwarp_state = new_state; + break; + case ECORE_IWARP_QP_STATE_CLOSING: + /* could happen due to race... do nothing.... */ + break; + default: + rc = ECORE_INVAL; + } + break; + case ECORE_IWARP_QP_STATE_TERMINATE: + case ECORE_IWARP_QP_STATE_CLOSING: + qp->iwarp_state = new_state; + break; + default: + break; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) %s --> %s %s\n", + qp->icid, + iwarp_state_names[prev_iw_state], + iwarp_state_names[qp->iwarp_state], + internal ? "internal" : " "); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + + if (modify_fw) + ecore_iwarp_modify_fw(p_hwfn, qp); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_fw_destroy(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_DESTROY_QP, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc); + + return rc; +} + +static void ecore_iwarp_destroy_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + bool remove_from_active_list) +{ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + ep->ep_buffer_virt, + ep->ep_buffer_phys, + sizeof(*ep->ep_buffer_virt)); + + if (remove_from_active_list) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + if (ep->qp) + ep->qp->ep = OSAL_NULL; + + OSAL_FREE(p_hwfn->p_dev, ep); +} + +enum _ecore_status_t +ecore_iwarp_destroy_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + enum _ecore_status_t rc = ECORE_SUCCESS; + struct ecore_iwarp_ep *ep = qp->ep; + struct ecore_iwarp_fpdu *fpdu; + int wait_count = 0; + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, qp->icid); + if (fpdu && fpdu->incomplete_bytes) + DP_NOTICE(p_hwfn, false, + "Pending Partial fpdu with incomplete bytes=%d\n", + fpdu->incomplete_bytes); + + if (qp->iwarp_state != ECORE_IWARP_QP_STATE_ERROR) { + + rc = ecore_iwarp_modify_qp(p_hwfn, qp, + ECORE_IWARP_QP_STATE_ERROR, + false); + + if (rc != ECORE_SUCCESS) + return rc; + } + + /* Make sure ep is closed before returning and freeing memory. */ + if (ep) { + while (ep->state != ECORE_IWARP_EP_CLOSED) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Waiting for ep->state to be closed...state=%x\n", + ep->state); + + OSAL_MSLEEP(100); + if (wait_count++ > 200) { + DP_NOTICE(p_hwfn, false, "ep state close timeout state=%x\n", + ep->state); + break; + } + } + + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + } + + rc = ecore_iwarp_fw_destroy(p_hwfn, qp); + + if (qp->shared_queue) + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->shared_queue, + qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_create_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep **ep_out) +{ + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + + ep = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*ep)); + if (!ep) { + DP_NOTICE(p_hwfn, false, + "ecore create ep failed: cannot allocate memory (ep). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + + ep->state = ECORE_IWARP_EP_INIT; + + /* ep_buffer is allocated once and is structured as follows: + * [MAX_PRIV_DATA_LEN][MAX_PRIV_DATA_LEN][union async_output] + * We could have allocated this in three calls but since all together + * it is less than a page, we do one allocation and initialize pointers + * accordingly + */ + ep->ep_buffer_virt = OSAL_DMA_ALLOC_COHERENT( + p_hwfn->p_dev, + &ep->ep_buffer_phys, + sizeof(*ep->ep_buffer_virt)); + + if (!ep->ep_buffer_virt) { + DP_NOTICE(p_hwfn, false, + "ecore create ep failed: cannot allocate memory (ulp buffer). rc = %d\n", + ECORE_NOMEM); + rc = ECORE_NOMEM; + goto err; + } + + ep->sig = 0xdeadbeef; + + *ep_out = ep; + + return ECORE_SUCCESS; + +err: + OSAL_FREE(p_hwfn->p_dev, ep); + return rc; +} + +static void +ecore_iwarp_print_tcp_ramrod(struct ecore_hwfn *p_hwfn, + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, ">>> PRINT TCP RAMROD\n"); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_mac=%x %x %x\n", + p_tcp_ramrod->tcp.local_mac_addr_lo, + p_tcp_ramrod->tcp.local_mac_addr_mid, + p_tcp_ramrod->tcp.local_mac_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_mac=%x %x %x\n", + p_tcp_ramrod->tcp.remote_mac_addr_lo, + p_tcp_ramrod->tcp.remote_mac_addr_mid, + p_tcp_ramrod->tcp.remote_mac_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "vlan_id=%x\n", + p_tcp_ramrod->tcp.vlan_id); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "flags=%x\n", + p_tcp_ramrod->tcp.flags); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ip_version=%x\n", + p_tcp_ramrod->tcp.ip_version); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_ip=%x.%x.%x.%x\n", + p_tcp_ramrod->tcp.local_ip[0], + p_tcp_ramrod->tcp.local_ip[1], + p_tcp_ramrod->tcp.local_ip[2], + p_tcp_ramrod->tcp.local_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_ip=%x.%x.%x.%x\n", + p_tcp_ramrod->tcp.remote_ip[0], + p_tcp_ramrod->tcp.remote_ip[1], + p_tcp_ramrod->tcp.remote_ip[2], + p_tcp_ramrod->tcp.remote_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "flow_label=%x\n", + p_tcp_ramrod->tcp.flow_label); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ttl=%x\n", + p_tcp_ramrod->tcp.ttl); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "tos_or_tc=%x\n", + p_tcp_ramrod->tcp.tos_or_tc); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_port=%x\n", + p_tcp_ramrod->tcp.local_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_port=%x\n", + p_tcp_ramrod->tcp.remote_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "mss=%x\n", + p_tcp_ramrod->tcp.mss); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rcv_wnd_scale=%x\n", + p_tcp_ramrod->tcp.rcv_wnd_scale); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "connect_mode=%x\n", + p_tcp_ramrod->tcp.connect_mode); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_ip_payload_length=%x\n", + p_tcp_ramrod->tcp.syn_ip_payload_length); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_phy_addr_lo=%x\n", + p_tcp_ramrod->tcp.syn_phy_addr_lo); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_phy_addr_hi=%x\n", + p_tcp_ramrod->tcp.syn_phy_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "<<<f PRINT TCP RAMROD\n"); +} + +/* Default values for tcp option2 */ +#define ECORE_IWARP_DEF_MAX_RT_TIME (0) +#define ECORE_IWARP_DEF_CWND_FACTOR (4) +#define ECORE_IWARP_DEF_KA_MAX_PROBE_CNT (5) +#define ECORE_IWARP_DEF_KA_TIMEOUT (1200000) /* 20 min */ +#define ECORE_IWARP_DEF_KA_INTERVAL (1000) /* 1 sec */ + +static enum _ecore_status_t +ecore_iwarp_tcp_offload(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t async_output_phys; + dma_addr_t in_pdata_phys; + enum _ecore_status_t rc; + u16 physical_q; + u8 tcp_flags; + int i; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = ep->tcp_cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + init_data.comp_mode = ECORE_SPQ_MODE_CB; + } else { + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + } + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_TCP_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload; + + /* Point to the "second half" of the ulp buffer */ + in_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, in_pdata); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr.hi = + DMA_HI_LE(in_pdata_phys); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr.lo = + DMA_LO_LE(in_pdata_phys); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.len = + OSAL_CPU_TO_LE16(sizeof(ep->ep_buffer_virt->in_pdata)); + + async_output_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, async_output); + + p_tcp_ramrod->iwarp.async_eqe_output_buf.hi = + DMA_HI_LE(async_output_phys); + p_tcp_ramrod->iwarp.async_eqe_output_buf.lo = + DMA_LO_LE(async_output_phys); + p_tcp_ramrod->iwarp.handle_for_async.hi = OSAL_CPU_TO_LE32(PTR_HI(ep)); + p_tcp_ramrod->iwarp.handle_for_async.lo = OSAL_CPU_TO_LE32(PTR_LO(ep)); + + physical_q = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_tcp_ramrod->iwarp.physical_q0 = OSAL_CPU_TO_LE16(physical_q); + physical_q = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_tcp_ramrod->iwarp.physical_q1 = OSAL_CPU_TO_LE16(physical_q); + p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev; + + ecore_set_fw_mac_addr(&p_tcp_ramrod->tcp.remote_mac_addr_hi, + &p_tcp_ramrod->tcp.remote_mac_addr_mid, + &p_tcp_ramrod->tcp.remote_mac_addr_lo, + ep->remote_mac_addr); + ecore_set_fw_mac_addr(&p_tcp_ramrod->tcp.local_mac_addr_hi, + &p_tcp_ramrod->tcp.local_mac_addr_mid, + &p_tcp_ramrod->tcp.local_mac_addr_lo, + ep->local_mac_addr); + + p_tcp_ramrod->tcp.vlan_id = OSAL_CPU_TO_LE16(ep->cm_info.vlan); + + tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags; + p_tcp_ramrod->tcp.flags = 0; + SET_FIELD(p_tcp_ramrod->tcp.flags, + TCP_OFFLOAD_PARAMS_OPT2_TS_EN, + !!(tcp_flags & ECORE_IWARP_TS_EN)); + + SET_FIELD(p_tcp_ramrod->tcp.flags, + TCP_OFFLOAD_PARAMS_OPT2_DA_EN, + !!(tcp_flags & ECORE_IWARP_DA_EN)); + + p_tcp_ramrod->tcp.ip_version = ep->cm_info.ip_version; + + for (i = 0; i < 4; i++) { + p_tcp_ramrod->tcp.remote_ip[i] = + OSAL_CPU_TO_LE32(ep->cm_info.remote_ip[i]); + p_tcp_ramrod->tcp.local_ip[i] = + OSAL_CPU_TO_LE32(ep->cm_info.local_ip[i]); + } + + p_tcp_ramrod->tcp.remote_port = + OSAL_CPU_TO_LE16(ep->cm_info.remote_port); + p_tcp_ramrod->tcp.local_port = OSAL_CPU_TO_LE16(ep->cm_info.local_port); + p_tcp_ramrod->tcp.mss = OSAL_CPU_TO_LE16(ep->mss); + p_tcp_ramrod->tcp.flow_label = 0; + p_tcp_ramrod->tcp.ttl = 0x40; + p_tcp_ramrod->tcp.tos_or_tc = 0; + + p_tcp_ramrod->tcp.max_rt_time = ECORE_IWARP_DEF_MAX_RT_TIME; + p_tcp_ramrod->tcp.cwnd = ECORE_IWARP_DEF_CWND_FACTOR * p_tcp_ramrod->tcp.mss; + p_tcp_ramrod->tcp.ka_max_probe_cnt = ECORE_IWARP_DEF_KA_MAX_PROBE_CNT; + p_tcp_ramrod->tcp.ka_timeout = ECORE_IWARP_DEF_KA_TIMEOUT; + p_tcp_ramrod->tcp.ka_interval = ECORE_IWARP_DEF_KA_INTERVAL; + + p_tcp_ramrod->tcp.rcv_wnd_scale = + (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale; + p_tcp_ramrod->tcp.connect_mode = ep->connect_mode; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + p_tcp_ramrod->tcp.syn_ip_payload_length = + OSAL_CPU_TO_LE16(ep->syn_ip_payload_length); + p_tcp_ramrod->tcp.syn_phy_addr_hi = + DMA_HI_LE(ep->syn_phy_addr); + p_tcp_ramrod->tcp.syn_phy_addr_lo = + DMA_LO_LE(ep->syn_phy_addr); + } + + ecore_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "EP(0x%x) Offload completed rc=%d\n" , ep->tcp_cid, rc); + + return rc; +} + +/* This function should be called after IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE + * is received. it will be called from the dpc context. + */ +static enum _ecore_status_t +ecore_iwarp_mpa_offload(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod; + struct ecore_iwarp_info *iwarp_info; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t async_output_phys; + dma_addr_t out_pdata_phys; + dma_addr_t in_pdata_phys; + struct ecore_rdma_qp *qp; + bool reject; + enum _ecore_status_t rc; + + if (!ep) + return ECORE_INVAL; + + qp = ep->qp; + reject = (qp == OSAL_NULL); + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = reject ? ep->tcp_cid : qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE || !ep->event_cb) + init_data.comp_mode = ECORE_SPQ_MODE_CB; + else + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + + if (rc != ECORE_SUCCESS) + return rc; + + p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload; + out_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, out_pdata); + p_mpa_ramrod->common.outgoing_ulp_buffer.addr.hi = + DMA_HI_LE(out_pdata_phys); + p_mpa_ramrod->common.outgoing_ulp_buffer.addr.lo = + DMA_LO_LE(out_pdata_phys); + p_mpa_ramrod->common.outgoing_ulp_buffer.len = + ep->cm_info.private_data_len; + p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed; + + p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord; + p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird; + + p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid; + + in_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, in_pdata); + p_mpa_ramrod->tcp_connect_side = ep->connect_mode; + p_mpa_ramrod->incoming_ulp_buffer.addr.hi = + DMA_HI_LE(in_pdata_phys); + p_mpa_ramrod->incoming_ulp_buffer.addr.lo = + DMA_LO_LE(in_pdata_phys); + p_mpa_ramrod->incoming_ulp_buffer.len = + OSAL_CPU_TO_LE16(sizeof(ep->ep_buffer_virt->in_pdata)); + async_output_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, async_output); + p_mpa_ramrod->async_eqe_output_buf.hi = + DMA_HI_LE(async_output_phys); + p_mpa_ramrod->async_eqe_output_buf.lo = + DMA_LO_LE(async_output_phys); + p_mpa_ramrod->handle_for_async.hi = OSAL_CPU_TO_LE32(PTR_HI(ep)); + p_mpa_ramrod->handle_for_async.lo = OSAL_CPU_TO_LE32(PTR_LO(ep)); + + if (!reject) { + p_mpa_ramrod->shared_queue_addr.hi = + DMA_HI_LE(qp->shared_queue_phys_addr); + p_mpa_ramrod->shared_queue_addr.lo = + DMA_LO_LE(qp->shared_queue_phys_addr); + + p_mpa_ramrod->stats_counter_id = + RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + } else { + p_mpa_ramrod->common.reject = 1; + } + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + p_mpa_ramrod->rcv_wnd = iwarp_info->rcv_wnd_size; + p_mpa_ramrod->mode = ep->mpa_rev; + SET_FIELD(p_mpa_ramrod->rtr_pref, + IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, + ep->rtr_type); + + ep->state = ECORE_IWARP_EP_MPA_OFFLOADED; + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (!reject) + ep->cid = qp->icid; /* Now they're migrated. */ + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n", + reject ? 0xffff : qp->icid, ep->tcp_cid, rc, ep->cm_info.ird, + ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject); + return rc; +} + +static void +ecore_iwarp_mpa_received(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_cm_event_params params; + struct mpa_v2_hdr *mpa_v2_params; + union async_output *async_data; + u16 mpa_ord, mpa_ird; + u8 mpa_hdr_size = 0; + u8 mpa_rev; + + async_data = &ep->ep_buffer_virt->async_output; + + mpa_rev = async_data->mpa_request.mpa_handshake_mode; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "private_data_len=%x handshake_mode=%x private_data=(%x)\n", + async_data->mpa_request.ulp_data_len, + mpa_rev, + *((u32 *)((u8 *)ep->ep_buffer_virt->in_pdata))); + + if (ep->listener->state > ECORE_IWARP_LISTENER_STATE_UNPAUSE) { + /* MPA reject initiated by ecore */ + OSAL_MEMSET(&ep->cm_info, 0, sizeof(ep->cm_info)); + ep->event_cb = OSAL_NULL; + ecore_iwarp_mpa_offload(p_hwfn, ep); + return; + } + + if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + if (iwarp_info->mpa_rev == MPA_NEGOTIATION_TYPE_BASIC) { + DP_ERR(p_hwfn, "MPA_NEGOTIATE Received MPA rev 2 on driver supporting only MPA rev 1\n"); + /* MPA_REV2 ToDo: close the tcp connection. */ + return; + } + + /* Read ord/ird values from private data buffer */ + mpa_v2_params = + (struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata); + mpa_hdr_size = sizeof(*mpa_v2_params); + + mpa_ord = ntohs(mpa_v2_params->ord); + mpa_ird = ntohs(mpa_v2_params->ird); + + /* Temprary store in cm_info incoming ord/ird requested, later + * replace with negotiated value during accept + */ + ep->cm_info.ord = (u8)OSAL_MIN_T(u16, + (mpa_ord & MPA_V2_IRD_ORD_MASK), + ECORE_IWARP_ORD_DEFAULT); + + ep->cm_info.ird = (u8)OSAL_MIN_T(u16, + (mpa_ird & MPA_V2_IRD_ORD_MASK), + ECORE_IWARP_IRD_DEFAULT); + + /* Peer2Peer negotiation */ + ep->rtr_type = MPA_RTR_TYPE_NONE; + if (mpa_ird & MPA_V2_PEER2PEER_MODEL) { + if (mpa_ord & MPA_V2_WRITE_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE; + + if (mpa_ord & MPA_V2_READ_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ; + + if (mpa_ird & MPA_V2_SEND_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND; + + ep->rtr_type &= iwarp_info->rtr_type; + /* if we're left with no match send our capabilities */ + if (ep->rtr_type == MPA_RTR_TYPE_NONE) + ep->rtr_type = iwarp_info->rtr_type; + + /* prioritize write over send and read */ + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE) + ep->rtr_type = MPA_RTR_TYPE_ZERO_WRITE; + } + + ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + } else { + ep->cm_info.ord = ECORE_IWARP_ORD_DEFAULT; + ep->cm_info.ird = ECORE_IWARP_IRD_DEFAULT; + ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n", + mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type, + async_data->mpa_request.ulp_data_len, + mpa_hdr_size); + + /* Strip mpa v2 hdr from private data before sending to upper layer */ + ep->cm_info.private_data = + ep->ep_buffer_virt->in_pdata + mpa_hdr_size; + + ep->cm_info.private_data_len = + async_data->mpa_request.ulp_data_len - mpa_hdr_size; + + params.event = ECORE_IWARP_EVENT_MPA_REQUEST; + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = ECORE_SUCCESS; + + ep->state = ECORE_IWARP_EP_MPA_REQ_RCVD; + ep->event_cb(ep->cb_context, ¶ms); +} + +static void +ecore_iwarp_move_to_ep_list(struct ecore_hwfn *p_hwfn, + osal_list_t *list, struct ecore_iwarp_ep *ep) +{ + OSAL_SPIN_LOCK(&ep->listener->lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, &ep->listener->ep_list); + OSAL_SPIN_UNLOCK(&ep->listener->lock); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); +} + +static void +ecore_iwarp_return_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + ep->state = ECORE_IWARP_EP_INIT; + if (ep->qp) + ep->qp->ep = OSAL_NULL; + ep->qp = OSAL_NULL; + OSAL_MEMSET(&ep->cm_info, 0, sizeof(ep->cm_info)); + + if (ep->tcp_cid == ECORE_IWARP_INVALID_TCP_CID) { + /* We don't care about the return code, it's ok if tcp_cid + * remains invalid...in this case we'll defer allocation + */ + ecore_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + } + + ecore_iwarp_move_to_ep_list(p_hwfn, + &p_hwfn->p_rdma_info->iwarp.ep_free_list, + ep); +} + +static void +ecore_iwarp_parse_private_data(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct mpa_v2_hdr *mpa_v2_params; + union async_output *async_data; + u16 mpa_ird, mpa_ord; + u8 mpa_data_size = 0; + + if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) { + mpa_v2_params = (struct mpa_v2_hdr *) + ((u8 *)ep->ep_buffer_virt->in_pdata); + mpa_data_size = sizeof(*mpa_v2_params); + mpa_ird = ntohs(mpa_v2_params->ird); + mpa_ord = ntohs(mpa_v2_params->ord); + + ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK); + ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK); + } /* else: Ord / Ird already configured */ + + async_data = &ep->ep_buffer_virt->async_output; + + ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size; + ep->cm_info.private_data_len = + async_data->mpa_response.ulp_data_len - mpa_data_size; +} + +static void +ecore_iwarp_mpa_reply_arrived(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct ecore_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + DP_NOTICE(p_hwfn, true, "MPA reply event not expected on passive side!\n"); + return; + } + + params.event = ECORE_IWARP_EVENT_ACTIVE_MPA_REPLY; + + ecore_iwarp_parse_private_data(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = ECORE_SUCCESS; + + ep->mpa_reply_processed = true; + + ep->event_cb(ep->cb_context, ¶ms); +} + +#define ECORE_IWARP_CONNECT_MODE_STRING(ep) \ + (ep->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active" + +/* Called as a result of the event: + * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE + */ +static void +ecore_iwarp_mpa_complete(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE) + params.event = ECORE_IWARP_EVENT_ACTIVE_COMPLETE; + else + params.event = ECORE_IWARP_EVENT_PASSIVE_COMPLETE; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE && + !ep->mpa_reply_processed) { + ecore_iwarp_parse_private_data(p_hwfn, ep); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + + params.ep_context = ep; + + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->state != ECORE_IWARP_EP_MPA_OFFLOADED)) { + /* This is a FW bug. Shouldn't get complete without offload */ + DP_NOTICE(p_hwfn, false, "%s(0x%x) ERROR: Got MPA complete without MPA offload fw_return_code=%d ep->state=%d\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid, + fw_return_code, ep->state); + ep->state = ECORE_IWARP_EP_CLOSED; + return; + } + + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->state == ECORE_IWARP_EP_ABORTING)) + return; + + ep->state = ECORE_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case RDMA_RETURN_OK: + ep->qp->max_rd_atomic_req = ep->cm_info.ord; + ep->qp->max_rd_atomic_resp = ep->cm_info.ird; + ecore_iwarp_modify_qp(p_hwfn, ep->qp, + ECORE_IWARP_QP_STATE_RTS, + 1); + ep->state = ECORE_IWARP_EP_ESTABLISHED; + params.status = ECORE_SUCCESS; + break; + case IWARP_CONN_ERROR_MPA_TIMEOUT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA timeout\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_TIMEOUT; + break; + case IWARP_CONN_ERROR_MPA_ERROR_REJECT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Reject\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_RST: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid, + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_MPA_FIN: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA received FIN\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INSUF_IRD: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA insufficient ird\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_RTR_MISMATCH: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA RTR MISMATCH\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Invalid Packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_LOCAL_ERROR: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Local Error\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_TERMINATE: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA TERMINATE\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + default: + params.status = ECORE_CONN_RESET; + break; + } + + if (ep->event_cb) + ep->event_cb(ep->cb_context, ¶ms); + + /* on passive side, if there is no associated QP (REJECT) we need to + * return the ep to the pool, otherwise we wait for QP to release it. + * Since we add an element in accept instead of this one. in anycase + * we need to remove it from the ep_list (active connections)... + */ + if (fw_return_code != RDMA_RETURN_OK) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->qp == OSAL_NULL)) { /* Rejected */ + ecore_iwarp_return_ep(p_hwfn, ep); + } else { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY( + &ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + } +} + +static void +ecore_iwarp_mpa_v2_set_private(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 *mpa_data_size) +{ + struct mpa_v2_hdr *mpa_v2_params; + u16 mpa_ird, mpa_ord; + + *mpa_data_size = 0; + if (MPA_REV2(ep->mpa_rev)) { + mpa_v2_params = + (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata; + *mpa_data_size = sizeof(*mpa_v2_params); + + mpa_ird = (u16)ep->cm_info.ird; + mpa_ord = (u16)ep->cm_info.ord; + + if (ep->rtr_type != MPA_RTR_TYPE_NONE) { + mpa_ird |= MPA_V2_PEER2PEER_MODEL; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND) + mpa_ird |= MPA_V2_SEND_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE) + mpa_ord |= MPA_V2_WRITE_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) + mpa_ord |= MPA_V2_READ_RTR; + } + + mpa_v2_params->ird = htons(mpa_ird); + mpa_v2_params->ord = htons(mpa_ord); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n", + mpa_v2_params->ird, + mpa_v2_params->ord, + *((u32 *)mpa_v2_params), + mpa_ord & MPA_V2_IRD_ORD_MASK, + mpa_ird & MPA_V2_IRD_ORD_MASK, + !!(mpa_ird & MPA_V2_PEER2PEER_MODEL), + !!(mpa_ird & MPA_V2_SEND_RTR), + !!(mpa_ord & MPA_V2_WRITE_RTR), + !!(mpa_ord & MPA_V2_READ_RTR)); + } +} + +enum _ecore_status_t +ecore_iwarp_connect(void *rdma_cxt, + struct ecore_iwarp_connect_in *iparams, + struct ecore_iwarp_connect_out *oparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_info *iwarp_info; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + u8 mpa_data_size = 0; + u8 ts_hdr_size = 0; + u32 cid; + + if ((iparams->cm_info.ord > ECORE_IWARP_ORD_DEFAULT) || + (iparams->cm_info.ird > ECORE_IWARP_IRD_DEFAULT)) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, iparams->cm_info.ord, + iparams->cm_info.ird); + + return ECORE_INVAL; + } + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + + /* Allocate ep object */ + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + return rc; + + if (iparams->qp->ep == OSAL_NULL) { + rc = ecore_iwarp_create_ep(p_hwfn, &ep); + if (rc != ECORE_SUCCESS) + return rc; + } else { + ep = iparams->qp->ep; + DP_ERR(p_hwfn, "Note re-use of QP for different connect\n"); + ep->state = ECORE_IWARP_EP_INIT; + } + + ep->tcp_cid = cid; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->qp = iparams->qp; + ep->qp->ep = ep; + OSAL_MEMCPY(ep->remote_mac_addr, + iparams->remote_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(ep->local_mac_addr, + iparams->local_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info)); + + ep->cm_info.ord = iparams->cm_info.ord; + ep->cm_info.ird = iparams->cm_info.ird; + + ep->rtr_type = iwarp_info->rtr_type; + if (iwarp_info->peer2peer == 0) + ep->rtr_type = MPA_RTR_TYPE_NONE; + + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && + (ep->cm_info.ord == 0)) + ep->cm_info.ord = 1; + + ep->mpa_rev = iwarp_info->mpa_rev; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = (u8 *)ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->cm_info.private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)(u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->cm_info.private_data, + iparams->cm_info.private_data_len); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & ECORE_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + ep->mss = iparams->mss - ts_hdr_size; + ep->mss = OSAL_MIN_T(u16, ECORE_IWARP_MAX_FW_MSS, ep->mss); + + ep->event_cb = iparams->event_cb; + ep->cb_context = iparams->cb_context; + ep->connect_mode = TCP_CONNECT_ACTIVE; + + oparams->ep_context = ep; + + rc = ecore_iwarp_tcp_offload(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n", + iparams->qp->icid, ep->tcp_cid, rc); + + if (rc != ECORE_SUCCESS) + ecore_iwarp_destroy_ep(p_hwfn, ep, true); + + return rc; +} + +static struct ecore_iwarp_ep * +ecore_iwarp_get_free_ep(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_ep *ep = OSAL_NULL; + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (OSAL_LIST_IS_EMPTY(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + DP_ERR(p_hwfn, "Ep list is empty\n"); + goto out; + } + + ep = OSAL_LIST_FIRST_ENTRY(&p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct ecore_iwarp_ep, + list_entry); + + /* in some cases we could have failed allocating a tcp cid when added + * from accept / failure... retry now..this is not the common case. + */ + if (ep->tcp_cid == ECORE_IWARP_INVALID_TCP_CID) { + rc = ecore_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + /* if we fail we could look for another entry with a valid + * tcp_cid, but since we don't expect to reach this anyway + * it's not worth the handling + */ + if (rc) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ep = OSAL_NULL; + goto out; + } + } + + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + +out: + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + return ep; +} + +/* takes into account timer scan ~20 ms and interrupt/dpc overhead */ +#define ECORE_IWARP_MAX_CID_CLEAN_TIME 100 +/* Technically we shouldn't reach this count with 100 ms iteration sleep */ +#define ECORE_IWARP_MAX_NO_PROGRESS_CNT 5 + +/* This function waits for all the bits of a bmap to be cleared, as long as + * there is progress ( i.e. the number of bits left to be cleared decreases ) + * the function continues. + */ +static enum _ecore_status_t +ecore_iwarp_wait_cid_map_cleared(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap) +{ + int prev_weight = 0; + int wait_count = 0; + int weight = 0; + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + prev_weight = weight; + + while (weight) { + OSAL_MSLEEP(ECORE_IWARP_MAX_CID_CLEAN_TIME); + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + + if (prev_weight == weight) { + wait_count++; + } else { + prev_weight = weight; + wait_count = 0; + } + + if (wait_count > ECORE_IWARP_MAX_NO_PROGRESS_CNT) { + DP_NOTICE(p_hwfn, false, + "%s bitmap wait timed out (%d cids pending)\n", + bmap->name, weight); + return ECORE_TIMEOUT; + } + } + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_iwarp_wait_for_all_cids(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + int i; + + rc = ecore_iwarp_wait_cid_map_cleared( + p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map); + if (rc) + return rc; + + /* Now free the tcp cids from the main cid map */ + for (i = 0; i < ECORE_IWARP_PREALLOC_CNT; i++) { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + i); + } + + /* Now wait for all cids to be completed */ + rc = ecore_iwarp_wait_cid_map_cleared( + p_hwfn, &p_hwfn->p_rdma_info->cid_map); + + return rc; +} + +static void +ecore_iwarp_free_prealloc_ep(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_ep *ep; + u32 cid; + + while (!OSAL_LIST_IS_EMPTY(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep = OSAL_LIST_FIRST_ENTRY( + &p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct ecore_iwarp_ep, list_entry); + + if (ep == OSAL_NULL) { + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + break; + } + +#ifdef _NTDDK_ +#pragma warning(suppress : 6011) +#endif + OSAL_LIST_REMOVE_ENTRY( + &ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (ep->tcp_cid != ECORE_IWARP_INVALID_TCP_CID) { + cid = ep->tcp_cid - ecore_cxt_get_proto_cid_start( + p_hwfn, p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + } + + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + } +} + +static enum _ecore_status_t +ecore_iwarp_prealloc_ep(struct ecore_hwfn *p_hwfn, bool init) +{ + struct ecore_iwarp_ep *ep; + int rc = ECORE_SUCCESS; + u32 cid; + int count; + int i; + + if (init) + count = ECORE_IWARP_PREALLOC_CNT; + else + count = 1; + + for (i = 0; i < count; i++) { + rc = ecore_iwarp_create_ep(p_hwfn, &ep); + if (rc != ECORE_SUCCESS) + return rc; + + /* During initialization we allocate from the main pool, + * afterwards we allocate only from the tcp_cid. + */ + if (init) { + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + goto err; + ecore_iwarp_set_tcp_cid(p_hwfn, cid); + } else { + /* We don't care about the return code, it's ok if + * tcp_cid remains invalid...in this case we'll + * defer allocation + */ + ecore_iwarp_alloc_tcp_cid(p_hwfn, &cid); + } + + ep->tcp_cid = cid; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + return rc; + +err: + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_alloc(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->p_rdma_info->iwarp.qp_lock); +#endif + OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.qp_lock); + + /* Allocate bitmap for tcp cid. These are used by passive side + * to ensure it can allocate a tcp cid during dpc that was + * pre-acquired and doesn't require dynamic allocation of ilt + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, + ECORE_IWARP_PREALLOC_CNT, + "TCP_CID"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate tcp cid, rc = %d\n", + rc); + return rc; + } + + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.ep_free_list); +//DAVIDS OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.iw_lock); + rc = ecore_iwarp_prealloc_ep(p_hwfn, true); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ecore_iwarp_prealloc_ep failed, rc = %d\n", + rc); + return rc; + } + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ecore_iwarp_prealloc_ep success, rc = %d\n", + rc); + + return ecore_ooo_alloc(p_hwfn); +} + +void +ecore_iwarp_resc_free(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_DEALLOC(iwarp_info->iw_lock); + OSAL_SPIN_LOCK_DEALLOC(iwarp_info->qp_lock); +#endif + ecore_ooo_free(p_hwfn); + if (iwarp_info->partial_fpdus) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->partial_fpdus); + if (iwarp_info->mpa_bufs) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->mpa_bufs); + if (iwarp_info->mpa_intermediate_buf) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->mpa_intermediate_buf); + + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); +} + + +enum _ecore_status_t +ecore_iwarp_accept(void *rdma_cxt, + struct ecore_iwarp_accept_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_ep *ep; + u8 mpa_data_size = 0; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n"); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + iparams->qp->icid, ep->tcp_cid); + + if ((iparams->ord > ECORE_IWARP_ORD_DEFAULT) || + (iparams->ird > ECORE_IWARP_IRD_DEFAULT)) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, ep->tcp_cid, + iparams->ord, iparams->ord); + return ECORE_INVAL; + } + + /* We could reach qp->ep != OSAL NULL if we do accept on the same qp */ + if (iparams->qp->ep == OSAL_NULL) { + /* We need to add a replacement for the ep to the free list */ + ecore_iwarp_prealloc_ep(p_hwfn, false); + } else { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Note re-use of QP for different connect\n"); + /* Return the old ep to the free_pool */ + ecore_iwarp_return_ep(p_hwfn, iparams->qp->ep); + } + + ecore_iwarp_move_to_ep_list(p_hwfn, + &p_hwfn->p_rdma_info->iwarp.ep_list, + ep); + ep->listener = OSAL_NULL; + ep->cb_context = iparams->cb_context; + ep->qp = iparams->qp; + ep->qp->ep = ep; + + if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + /* Negotiate ord/ird: if upperlayer requested ord larger than + * ird advertised by remote, we need to decrease our ord + * to match remote ord + */ + if (iparams->ord > ep->cm_info.ird) { + iparams->ord = ep->cm_info.ird; + } + + /* For chelsio compatability, if rtr_zero read is requested + * we can't set ird to zero + */ + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && + (iparams->ird == 0)) + iparams->ird = 1; + } + + /* Update cm_info ord/ird to be negotiated values */ + ep->cm_info.ord = iparams->ord; + ep->cm_info.ird = iparams->ird; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, + iparams->private_data_len); + + if (ep->state == ECORE_IWARP_EP_CLOSED) { + DP_NOTICE(p_hwfn, false, + "(0x%x) Accept called on EP in CLOSED state\n", + ep->tcp_cid); + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + return ECORE_CONN_RESET; + } + + rc = ecore_iwarp_mpa_offload(p_hwfn, ep); + if (rc) { + ecore_iwarp_modify_qp(p_hwfn, + iparams->qp, + ECORE_IWARP_QP_STATE_ERROR, + 1); + } + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_reject(void *rdma_cxt, + struct ecore_iwarp_reject_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_ep *ep; + u8 mpa_data_size = 0; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n"); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid); + + ep->cb_context = iparams->cb_context; + ep->qp = OSAL_NULL; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, + iparams->private_data_len); + + if (ep->state == ECORE_IWARP_EP_CLOSED) { + DP_NOTICE(p_hwfn, false, + "(0x%x) Reject called on EP in CLOSED state\n", + ep->tcp_cid); + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + return ECORE_CONN_RESET; + } + + rc = ecore_iwarp_mpa_offload(p_hwfn, ep); + return rc; +} + +static void +ecore_iwarp_print_cm_info(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ip_version = %d\n", + cm_info->ip_version); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_ip %x.%x.%x.%x\n", + cm_info->remote_ip[0], + cm_info->remote_ip[1], + cm_info->remote_ip[2], + cm_info->remote_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_ip %x.%x.%x.%x\n", + cm_info->local_ip[0], + cm_info->local_ip[1], + cm_info->local_ip[2], + cm_info->local_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_port = %x\n", + cm_info->remote_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_port = %x\n", + cm_info->local_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "vlan = %x\n", + cm_info->vlan); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "private_data_len = %x\n", + cm_info->private_data_len); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ord = %d\n", + cm_info->ord); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ird = %d\n", + cm_info->ird); +} + +static int +ecore_iwarp_ll2_post_rx(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ll2_buff *buf, + u8 handle) +{ + enum _ecore_status_t rc; + + rc = ecore_ll2_post_rx_buffer( + p_hwfn, + handle, + buf->data_phys_addr, + (u16)buf->buff_size, + buf, 1); + + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n", + rc, handle); + OSAL_DMA_FREE_COHERENT( + p_hwfn->p_dev, + buf->data, + buf->data_phys_addr, + buf->buff_size); + OSAL_FREE(p_hwfn->p_dev, buf); + } + + return rc; +} + +static bool +ecore_iwarp_ep_exists(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener, + struct ecore_iwarp_cm_info *cm_info) +{ + struct ecore_iwarp_ep *ep = OSAL_NULL; + bool found = false; + + OSAL_SPIN_LOCK(&listener->lock); + OSAL_LIST_FOR_EACH_ENTRY(ep, &listener->ep_list, + list_entry, struct ecore_iwarp_ep) { + if ((ep->cm_info.local_port == cm_info->local_port) && + (ep->cm_info.remote_port == cm_info->remote_port) && + (ep->cm_info.vlan == cm_info->vlan) && + !OSAL_MEMCMP(&(ep->cm_info.local_ip), cm_info->local_ip, + sizeof(cm_info->local_ip)) && + !OSAL_MEMCMP(&(ep->cm_info.remote_ip), cm_info->remote_ip, + sizeof(cm_info->remote_ip))) { + found = true; + break; + } + } + + OSAL_SPIN_UNLOCK(&listener->lock); + + if (found) { + DP_NOTICE(p_hwfn, false, "SYN received on active connection - dropping\n"); + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + return true; + } + + return false; +} + +static struct ecore_iwarp_listener * +ecore_iwarp_get_listener(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info) +{ + struct ecore_iwarp_listener *listener = OSAL_NULL; + static const u32 ip_zero[4] = {0, 0, 0, 0}; + bool found = false; + + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + OSAL_LIST_FOR_EACH_ENTRY(listener, + &p_hwfn->p_rdma_info->iwarp.listen_list, + list_entry, struct ecore_iwarp_listener) { + + if (listener->port == cm_info->local_port) { + /* Any IP (i.e. 0.0.0.0 ) will be treated as any vlan */ + if (!OSAL_MEMCMP(listener->ip_addr, + ip_zero, + sizeof(ip_zero))) { + found = true; + break; + } + + /* If not any IP -> check vlan as well */ + if (!OSAL_MEMCMP(listener->ip_addr, + cm_info->local_ip, + sizeof(cm_info->local_ip)) && + + (listener->vlan == cm_info->vlan)) { + found = true; + break; + } + } + } + + if (found && listener->state == ECORE_IWARP_LISTENER_STATE_ACTIVE) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener found = %p\n", + listener); + return listener; + } + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener not found\n"); + return OSAL_NULL; +} + +static enum _ecore_status_t +ecore_iwarp_parse_rx_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info, + void *buf, + u8 *remote_mac_addr, + u8 *local_mac_addr, + int *payload_len, + int *tcp_start_offset) +{ + struct ecore_vlan_ethhdr *vethh; + struct ecore_ethhdr *ethh; + struct ecore_iphdr *iph; + struct ecore_ipv6hdr *ip6h; + struct ecore_tcphdr *tcph; + bool vlan_valid = false; + int eth_hlen, ip_hlen; + u16 eth_type; + int i; + + ethh = (struct ecore_ethhdr *)buf; + eth_type = ntohs(ethh->h_proto); + if (eth_type == ETH_P_8021Q) { + vlan_valid = true; + vethh = (struct ecore_vlan_ethhdr *)ethh; + cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK; + eth_type = ntohs(vethh->h_vlan_encapsulated_proto); + } + + eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + + OSAL_MEMCPY(remote_mac_addr, + ethh->h_source, + ETH_ALEN); + + OSAL_MEMCPY(local_mac_addr, + ethh->h_dest, + ETH_ALEN); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_type =%d Source mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_type, ethh->h_source[0], ethh->h_source[1], + ethh->h_source[2], ethh->h_source[3], + ethh->h_source[4], ethh->h_source[5]); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_hlen=%d destination mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_hlen, ethh->h_dest[0], ethh->h_dest[1], + ethh->h_dest[2], ethh->h_dest[3], + ethh->h_dest[4], ethh->h_dest[5]); + + iph = (struct ecore_iphdr *)((u8 *)(ethh) + eth_hlen); + + if (eth_type == ETH_P_IP) { + if (iph->protocol != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, false, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return ECORE_INVAL; + } + + cm_info->local_ip[0] = ntohl(iph->daddr); + cm_info->remote_ip[0] = ntohl(iph->saddr); + cm_info->ip_version = (enum ecore_tcp_ip_version)TCP_IPV4; + + ip_hlen = (iph->ihl)*sizeof(u32); + *payload_len = ntohs(iph->tot_len) - ip_hlen; + + } else if (eth_type == ETH_P_IPV6) { + ip6h = (struct ecore_ipv6hdr *)iph; + + if (ip6h->nexthdr != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, false, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return ECORE_INVAL; + } + + for (i = 0; i < 4; i++) { + cm_info->local_ip[i] = + ntohl(ip6h->daddr.in6_u.u6_addr32[i]); + cm_info->remote_ip[i] = + ntohl(ip6h->saddr.in6_u.u6_addr32[i]); + } + cm_info->ip_version = (enum ecore_tcp_ip_version)TCP_IPV6; + + ip_hlen = sizeof(*ip6h); + *payload_len = ntohs(ip6h->payload_len); + } else { + DP_NOTICE(p_hwfn, false, + "Unexpected ethertype on ll2 %x\n", eth_type); + return ECORE_INVAL; + } + + tcph = (struct ecore_tcphdr *)((u8 *)iph + ip_hlen); + + if (!tcph->syn) { + DP_NOTICE(p_hwfn, false, + "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n", + iph->ihl, tcph->source, tcph->dest); + return ECORE_INVAL; + } + + cm_info->local_port = ntohs(tcph->dest); + cm_info->remote_port = ntohs(tcph->source); + + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + *tcp_start_offset = eth_hlen + ip_hlen; + + return ECORE_SUCCESS; +} + +static struct ecore_iwarp_fpdu * +ecore_iwarp_get_curr_fpdu(struct ecore_hwfn *p_hwfn, u16 cid) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_fpdu *partial_fpdu; + u32 idx = cid - ecore_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP); + + if (idx >= iwarp_info->max_num_partial_fpdus) { + DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid, + iwarp_info->max_num_partial_fpdus); + return OSAL_NULL; + } + + partial_fpdu = &iwarp_info->partial_fpdus[idx]; + + return partial_fpdu; +} + +enum ecore_iwarp_mpa_pkt_type { + ECORE_IWARP_MPA_PKT_PACKED, + ECORE_IWARP_MPA_PKT_PARTIAL, + ECORE_IWARP_MPA_PKT_UNALIGNED +}; + +#define ECORE_IWARP_INVALID_FPDU_LENGTH 0xffff +#define ECORE_IWARP_MPA_FPDU_LENGTH_SIZE (2) +#define ECORE_IWARP_MPA_CRC32_DIGEST_SIZE (4) + +/* Pad to multiple of 4 */ +#define ECORE_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) (((data_len) + 3) & ~3) + +#define ECORE_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \ + (ECORE_IWARP_PDU_DATA_LEN_WITH_PAD(_mpa_len + \ + ECORE_IWARP_MPA_FPDU_LENGTH_SIZE) + \ + ECORE_IWARP_MPA_CRC32_DIGEST_SIZE) + +/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ +#define ECORE_IWARP_MAX_BDS_PER_FPDU 3 + +char *pkt_type_str[] = { + "ECORE_IWARP_MPA_PKT_PACKED", + "ECORE_IWARP_MPA_PKT_PARTIAL", + "ECORE_IWARP_MPA_PKT_UNALIGNED" +}; + +static enum _ecore_status_t +ecore_iwarp_recycle_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct ecore_iwarp_ll2_buff *buf); + +static enum ecore_iwarp_mpa_pkt_type +ecore_iwarp_mpa_classify(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + u16 tcp_payload_len, + u8 *mpa_data) + +{ + enum ecore_iwarp_mpa_pkt_type pkt_type; + u16 mpa_len; + + if (fpdu->incomplete_bytes) { + pkt_type = ECORE_IWARP_MPA_PKT_UNALIGNED; + goto out; + } + + /* special case of one byte remaining... */ + if (tcp_payload_len == 1) { + /* lower byte will be read next packet */ + fpdu->fpdu_length = *mpa_data << 8; + pkt_type = ECORE_IWARP_MPA_PKT_PARTIAL; + goto out; + } + + mpa_len = ntohs(*((u16 *)(mpa_data))); + fpdu->fpdu_length = ECORE_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + + if (fpdu->fpdu_length <= tcp_payload_len) + pkt_type = ECORE_IWARP_MPA_PKT_PACKED; + else + pkt_type = ECORE_IWARP_MPA_PKT_PARTIAL; + +out: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n", + pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len); + + return pkt_type; +} + +static void +ecore_iwarp_init_fpdu(struct ecore_iwarp_ll2_buff *buf, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + u16 tcp_payload_size, u8 placement_offset) +{ + fpdu->mpa_buf = buf; + fpdu->pkt_hdr = buf->data_phys_addr + placement_offset; + fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset; + + fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset; + fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset; + + if (tcp_payload_size == 1) + fpdu->incomplete_bytes = ECORE_IWARP_INVALID_FPDU_LENGTH; + else if (tcp_payload_size < fpdu->fpdu_length) + fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size; + else + fpdu->incomplete_bytes = 0; /* complete fpdu */ + + fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; +} + +static enum _ecore_status_t +ecore_iwarp_copy_fpdu(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + struct ecore_iwarp_ll2_buff *buf, + u16 tcp_payload_size) + +{ + u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf; + enum _ecore_status_t rc; + + /* need to copy the data from the partial packet stored in fpdu + * to the new buf, for this we also need to move the data currently + * placed on the buf. The assumption is that the buffer is big enough + * since fpdu_length <= mss, we use an intermediate buffer since + * we may need to copy the new data to an overlapping location + */ + if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) { + DP_ERR(p_hwfn, + "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n", + fpdu->mpa_frag_virt, fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + OSAL_MEMCPY(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len); + OSAL_MEMCPY(tmp_buf + fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + rc = ecore_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf); + if (rc) + return rc; + + /* If we managed to post the buffer copy the data to the new buffer + * o/w this will occur in the next round... + */ + OSAL_MEMCPY((u8 *)(buf->data), tmp_buf, + fpdu->mpa_frag_len + tcp_payload_size); + + fpdu->mpa_buf = buf; + /* fpdu->pkt_hdr remains as is */ + /* fpdu->mpa_frag is overriden with new buf */ + fpdu->mpa_frag = buf->data_phys_addr; + fpdu->mpa_frag_virt = buf->data; + fpdu->mpa_frag_len += tcp_payload_size; + + fpdu->incomplete_bytes -= tcp_payload_size; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + + return 0; +} + +static void +ecore_iwarp_update_fpdu_length(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + u8 *mpa_data) +{ + u16 mpa_len; + + /* Update incomplete packets if needed */ + if (fpdu->incomplete_bytes == ECORE_IWARP_INVALID_FPDU_LENGTH) { + mpa_len = fpdu->fpdu_length | *mpa_data; + fpdu->fpdu_length = ECORE_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + fpdu->mpa_frag_len = fpdu->fpdu_length; + /* one byte of hdr */ + fpdu->incomplete_bytes = fpdu->fpdu_length - 1; + DP_VERBOSE(p_hwfn, + ECORE_MSG_RDMA, + "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n", + mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes); + } +} + +#define ECORE_IWARP_IS_RIGHT_EDGE(_curr_pkt) \ + (GET_FIELD(_curr_pkt->flags, \ + UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE)) + +/* This function is used to recycle a buffer using the ll2 drop option. It + * uses the mechanism to ensure that all buffers posted to tx before this one + * were completed. The buffer sent here will be sent as a cookie in the tx + * completion function and can then be reposted to rx chain when done. The flow + * that requires this is the flow where a FPDU splits over more than 3 tcp + * segments. In this case the driver needs to re-post a rx buffer instead of + * the one received, but driver can't simply repost a buffer it copied from + * as there is a case where the buffer was originally a packed FPDU, and is + * partially posted to FW. Driver needs to ensure FW is done with it. + */ +static enum _ecore_status_t +ecore_iwarp_recycle_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct ecore_iwarp_ll2_buff *buf) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_DROP; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + buf->piggy_buf = OSAL_NULL; + tx_pkt.cookie = buf; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n", + (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, buf, rc); + + if (rc) + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't drop packet rc=%d\n", rc); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_win_right_edge(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n", + tx_pkt.num_of_bds, (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, rc); + + if (rc) + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send right edge rc=%d\n", rc); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_send_fpdu(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *curr_pkt, + struct ecore_iwarp_ll2_buff *buf, + u16 tcp_payload_size, + enum ecore_iwarp_mpa_pkt_type pkt_type) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + + tx_pkt.num_of_bds = (pkt_type == ECORE_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + /* Send the mpa_buf only with the last fpdu (in case of packed) */ + if ((pkt_type == ECORE_IWARP_MPA_PKT_UNALIGNED) || + (tcp_payload_size <= fpdu->fpdu_length)) + tx_pkt.cookie = fpdu->mpa_buf; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE; + + /* special case of unaligned packet and not packed, need to send + * both buffers as cookie to release. + */ + if (tcp_payload_size == fpdu->incomplete_bytes) { + fpdu->mpa_buf->piggy_buf = buf; + } + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + if (rc) + goto err; + + rc = ecore_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle, + fpdu->mpa_frag, + fpdu->mpa_frag_len); + if (rc) + goto err; + + if (fpdu->incomplete_bytes) { + rc = ecore_ll2_set_fragment_of_tx_packet( + p_hwfn, ll2_handle, + buf->data_phys_addr + curr_pkt->first_mpa_offset, + fpdu->incomplete_bytes); + + if (rc) + goto err; + } + +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: Sent FPDU num_bds=%d [%lx, 0x%x], [0x%lx, 0x%x], [0x%lx, 0x%x] (cookie %p) rc=%d\n", + tx_pkt.num_of_bds, (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, (long unsigned int)fpdu->mpa_frag, + fpdu->mpa_frag_len, (long unsigned int)buf->data_phys_addr + + curr_pkt->first_mpa_offset, fpdu->incomplete_bytes, + tx_pkt.cookie, rc); + + return rc; +} + +static void +ecore_iwarp_mpa_get_data(struct ecore_hwfn *p_hwfn, + struct unaligned_opaque_data *curr_pkt, + u32 opaque_data0, u32 opaque_data1) +{ + u64 opaque_data; + + opaque_data = HILO_64(opaque_data1, opaque_data0); + *curr_pkt = *((struct unaligned_opaque_data *)&opaque_data); + + /* fix endianity */ + curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset + + OSAL_LE16_TO_CPU(curr_pkt->first_mpa_offset); + curr_pkt->cid = OSAL_LE32_TO_CPU(curr_pkt->cid); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "OPAQUE0=0x%x OPAQUE1=0x%x first_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n", + opaque_data0, opaque_data1, curr_pkt->first_mpa_offset, + curr_pkt->tcp_payload_offset, curr_pkt->flags, + curr_pkt->cid); +} + +static void +ecore_iwarp_mpa_print_tcp_seq(struct ecore_hwfn *p_hwfn, + void *buf) +{ + struct ecore_vlan_ethhdr *vethh; + struct ecore_ethhdr *ethh; + struct ecore_iphdr *iph; + struct ecore_ipv6hdr *ip6h; + struct ecore_tcphdr *tcph; + bool vlan_valid = false; + int eth_hlen, ip_hlen; + u16 eth_type; + + if ((p_hwfn->dp_level > ECORE_LEVEL_VERBOSE) || + !(p_hwfn->dp_module & ECORE_MSG_RDMA)) + return; + + ethh = (struct ecore_ethhdr *)buf; + eth_type = ntohs(ethh->h_proto); + if (eth_type == ETH_P_8021Q) { + vlan_valid = true; + vethh = (struct ecore_vlan_ethhdr *)ethh; + eth_type = ntohs(vethh->h_vlan_encapsulated_proto); + } + + eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + + iph = (struct ecore_iphdr *)((u8 *)(ethh) + eth_hlen); + + if (eth_type == ETH_P_IP) { + ip_hlen = (iph->ihl)*sizeof(u32); + } else if (eth_type == ETH_P_IPV6) { + ip6h = (struct ecore_ipv6hdr *)iph; + ip_hlen = sizeof(*ip6h); + } else { + DP_ERR(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type); + return; + } + + tcph = (struct ecore_tcphdr *)((u8 *)iph + ip_hlen); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Processing MPA PKT: tcp_seq=0x%x tcp_ack_seq=0x%x\n", + ntohl(tcph->seq), ntohl(tcph->ack_seq)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_type =%d Source mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_type, ethh->h_source[0], ethh->h_source[1], + ethh->h_source[2], ethh->h_source[3], + ethh->h_source[4], ethh->h_source[5]); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_hlen=%d destination mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_hlen, ethh->h_dest[0], ethh->h_dest[1], + ethh->h_dest[2], ethh->h_dest[3], + ethh->h_dest[4], ethh->h_dest[5]); + + return; +} + +/* This function is called when an unaligned or incomplete MPA packet arrives + * driver needs to align the packet, perhaps using previous data and send + * it down to FW once it is aligned. + */ +static enum _ecore_status_t +ecore_iwarp_process_mpa_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ll2_mpa_buf *mpa_buf) +{ + struct ecore_iwarp_ll2_buff *buf = mpa_buf->ll2_buf; + enum ecore_iwarp_mpa_pkt_type pkt_type; + struct unaligned_opaque_data *curr_pkt = &mpa_buf->data; + struct ecore_iwarp_fpdu *fpdu; + u8 *mpa_data; + enum _ecore_status_t rc = ECORE_SUCCESS; + + ecore_iwarp_mpa_print_tcp_seq( + p_hwfn, (u8 *)(buf->data) + mpa_buf->placement_offset); + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff); + if (!fpdu) {/* something corrupt with cid, post rx back */ + DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n", + curr_pkt->cid); + rc = ecore_iwarp_ll2_post_rx( + p_hwfn, buf, p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle); + + if (rc) { /* not much we can do here except log and free */ + DP_ERR(p_hwfn, "Post rx buffer failed\n"); + + /* we don't expect any failures from rx, not even + * busy since we allocate #bufs=#descs + */ + rc = ECORE_UNKNOWN_ERROR; + } + return rc; + } + + do { + mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset); + + pkt_type = ecore_iwarp_mpa_classify(p_hwfn, fpdu, + mpa_buf->tcp_payload_len, + mpa_data); + + switch (pkt_type) { + case ECORE_IWARP_MPA_PKT_PARTIAL: + ecore_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + if (!ECORE_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + mpa_buf->tcp_payload_len = 0; + break; + } + + rc = ecore_iwarp_win_right_edge(p_hwfn, fpdu); + + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); + break; + } + + mpa_buf->tcp_payload_len = 0; + break; + case ECORE_IWARP_MPA_PKT_PACKED: + if (fpdu->fpdu_length == 8) { + DP_ERR(p_hwfn, "SUSPICIOUS fpdu_length = 0x%x: assuming bug...aborting this packet...\n", + fpdu->fpdu_length); + mpa_buf->tcp_payload_len = 0; + break; + } + + ecore_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + rc = ecore_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); + break; + } + mpa_buf->tcp_payload_len -= fpdu->fpdu_length; + curr_pkt->first_mpa_offset += fpdu->fpdu_length; + break; + case ECORE_IWARP_MPA_PKT_UNALIGNED: + ecore_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data); + if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) { + /* special handling of fpdu split over more + * than 2 segments + */ + if (ECORE_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + rc = ecore_iwarp_win_right_edge(p_hwfn, + fpdu); + /* packet will be re-processed later */ + if (rc) + return rc; + } + + rc = ecore_iwarp_copy_fpdu( + p_hwfn, fpdu, curr_pkt, + buf, mpa_buf->tcp_payload_len); + + /* packet will be re-processed later */ + if (rc) + return rc; + + mpa_buf->tcp_payload_len = 0; + + break; + } + + rc = ecore_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:delay rc=%d\n", rc); + /* don't reset fpdu -> we need it for next + * classify + */ + break; + } + mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes; + curr_pkt->first_mpa_offset += fpdu->incomplete_bytes; + /* The framed PDU was sent - no more incomplete bytes */ + fpdu->incomplete_bytes = 0; + break; + } + + } while (mpa_buf->tcp_payload_len && !rc); + + return rc; +} + +static void +ecore_iwarp_process_pending_pkts(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_ll2_mpa_buf *mpa_buf = OSAL_NULL; + enum _ecore_status_t rc; + + while (!OSAL_LIST_IS_EMPTY(&iwarp_info->mpa_buf_pending_list)) { + mpa_buf = OSAL_LIST_FIRST_ENTRY( + &iwarp_info->mpa_buf_pending_list, + struct ecore_iwarp_ll2_mpa_buf, + list_entry); + + rc = ecore_iwarp_process_mpa_pkt(p_hwfn, mpa_buf); + + /* busy means break and continue processing later, don't + * remove the buf from the pending list. + */ + if (rc == ECORE_BUSY) + break; + +#ifdef _NTDDK_ +#pragma warning(suppress : 6011) +#pragma warning(suppress : 28182) +#endif + OSAL_LIST_REMOVE_ENTRY( + &mpa_buf->list_entry, + &iwarp_info->mpa_buf_pending_list); + + OSAL_LIST_PUSH_TAIL(&mpa_buf->list_entry, + &iwarp_info->mpa_buf_list); + + if (rc) { /* different error, don't continue */ + DP_NOTICE(p_hwfn, false, "process pkts failed rc=%d\n", + rc); + break; + } + } +} + +static void +ecore_iwarp_ll2_comp_mpa_pkt(void *cxt, + struct ecore_ll2_comp_rx_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_ll2_mpa_buf *mpa_buf; + + iwarp_info->unalign_rx_comp++; + + mpa_buf = OSAL_LIST_FIRST_ENTRY(&iwarp_info->mpa_buf_list, + struct ecore_iwarp_ll2_mpa_buf, + list_entry); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "LL2 MPA CompRx buf=%p placement_offset=%d, payload_len=0x%x mpa_buf=%p\n", + data->cookie, data->u.placement_offset, + data->length.packet_length, mpa_buf); + + if (!mpa_buf) { + DP_ERR(p_hwfn, "no free mpa buf. this is a driver bug.\n"); + return; + } + OSAL_LIST_REMOVE_ENTRY(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list); + + ecore_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data, + data->opaque_data_0, data->opaque_data_1); + + mpa_buf->tcp_payload_len = data->length.packet_length - + mpa_buf->data.first_mpa_offset; + mpa_buf->ll2_buf = (struct ecore_iwarp_ll2_buff *)data->cookie; + mpa_buf->data.first_mpa_offset += data->u.placement_offset; + mpa_buf->placement_offset = data->u.placement_offset; + + OSAL_LIST_PUSH_TAIL(&mpa_buf->list_entry, + &iwarp_info->mpa_buf_pending_list); + + ecore_iwarp_process_pending_pkts(p_hwfn); +} + +static void +ecore_iwarp_ll2_comp_syn_pkt(void *cxt, struct ecore_ll2_comp_rx_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buf = + (struct ecore_iwarp_ll2_buff *)data->cookie; + struct ecore_iwarp_listener *listener; + struct ecore_iwarp_cm_info cm_info; + struct ecore_ll2_tx_pkt_info tx_pkt; + u8 remote_mac_addr[ETH_ALEN]; + u8 local_mac_addr[ETH_ALEN]; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + int tcp_start_offset; + u8 ts_hdr_size = 0; + int payload_len; + u32 hdr_size; + + OSAL_MEM_ZERO(&cm_info, sizeof(cm_info)); + + /* Check if packet was received with errors... */ + if (data->err_flags != 0) { + DP_NOTICE(p_hwfn, false, "Error received on SYN packet: 0x%x\n", + data->err_flags); + goto err; + } + + if (GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && + GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { + DP_NOTICE(p_hwfn, false, "Syn packet received with checksum error\n"); + goto err; + } + + rc = ecore_iwarp_parse_rx_pkt( + p_hwfn, &cm_info, (u8 *)(buf->data) + data->u.placement_offset, + remote_mac_addr, local_mac_addr, &payload_len, + &tcp_start_offset); + if (rc) + goto err; + + /* Check if there is a listener for this 4-tuple */ + listener = ecore_iwarp_get_listener(p_hwfn, &cm_info); + if (!listener) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "SYN received on tuple not listened on parse_flags=%d packet len=%d\n", + data->parse_flags, data->length.packet_length); + + OSAL_MEMSET(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.bd_flags = 0; + tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.first_frag = buf->data_phys_addr + + data->u.placement_offset; + tx_pkt.first_frag_len = data->length.packet_length; + tx_pkt.cookie = buf; + + rc = ecore_ll2_prepare_tx_packet( + p_hwfn, + p_hwfn->p_rdma_info->iwarp.ll2_syn_handle, + &tx_pkt, true); + + if (rc) { + DP_NOTICE(p_hwfn, false, + "Can't post SYN back to chip rc=%d\n", rc); + goto err; + } + return; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Received syn on listening port\n"); + + /* For debugging purpose... */ + if (listener->drop) + goto err; + + /* There may be an open ep on this connection if this is a syn + * retrasnmit... need to make sure there isn't... + */ + if (ecore_iwarp_ep_exists(p_hwfn, listener, &cm_info)) + goto err; + + ep = ecore_iwarp_get_free_ep(p_hwfn); + if (ep == OSAL_NULL) + goto err; + + OSAL_SPIN_LOCK(&listener->lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, &listener->ep_list); + OSAL_SPIN_UNLOCK(&listener->lock); + + OSAL_MEMCPY(ep->remote_mac_addr, + remote_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(ep->local_mac_addr, + local_mac_addr, + ETH_ALEN); + + OSAL_MEMCPY(&ep->cm_info, &cm_info, sizeof(ep->cm_info)); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & ECORE_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + hdr_size = ((cm_info.ip_version == ECORE_TCP_IPV4) ? 40 : 60) + + ts_hdr_size; + ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size; + ep->mss = OSAL_MIN_T(u16, ECORE_IWARP_MAX_FW_MSS, ep->mss); + + ep->listener = listener; + ep->event_cb = listener->event_cb; + ep->cb_context = listener->cb_context; + ep->connect_mode = TCP_CONNECT_PASSIVE; + + ep->syn = buf; + ep->syn_ip_payload_length = (u16)payload_len; + ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset + + tcp_start_offset; + + rc = ecore_iwarp_tcp_offload(p_hwfn, ep); + if (rc != ECORE_SUCCESS) { + ecore_iwarp_return_ep(p_hwfn, ep); + goto err; + } + return; + +err: + ecore_iwarp_ll2_post_rx( + p_hwfn, buf, p_hwfn->p_rdma_info->iwarp.ll2_syn_handle); +} + +static void +ecore_iwarp_ll2_rel_rx_pkt(void *cxt, + u8 OSAL_UNUSED connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED rx_buf_addr, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + buffer->data, + buffer->data_phys_addr, + buffer->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer); +} + +static void +ecore_iwarp_ll2_comp_tx_pkt(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED first_frag_addr, + bool OSAL_UNUSED b_last_fragment, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + struct ecore_iwarp_ll2_buff *piggy; + + if (!buffer) /* can happen in packed mpa unaligned... */ + return; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "LL2 CompTX buf=%p piggy_buf=%p handle=%d\n", + buffer, buffer->piggy_buf, connection_handle); + + /* we got a tx packet -> this was originally a rx packet... now we + * can post it back... + */ + piggy = buffer->piggy_buf; + if (piggy) { + buffer->piggy_buf = OSAL_NULL; + ecore_iwarp_ll2_post_rx(p_hwfn, piggy, + connection_handle); + } + + ecore_iwarp_ll2_post_rx(p_hwfn, buffer, + connection_handle); + + if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle) + ecore_iwarp_process_pending_pkts(p_hwfn); + + return; +} + +static void +ecore_iwarp_ll2_rel_tx_pkt(void *cxt, + u8 OSAL_UNUSED connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED first_frag_addr, + bool OSAL_UNUSED b_last_fragment, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + + if (!buffer) + return; + + if (buffer->piggy_buf) { + OSAL_DMA_FREE_COHERENT( + p_hwfn->p_dev, + buffer->piggy_buf->data, + buffer->piggy_buf->data_phys_addr, + buffer->piggy_buf->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer->piggy_buf); + } + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + buffer->data, + buffer->data_phys_addr, + buffer->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer); + return; +} + +/* Current known slowpath for iwarp ll2 is unalign flush. When this completion + * is received, need to reset the FPDU. + */ +static void +ecore_iwarp_ll2_slowpath(void *cxt, + u8 OSAL_UNUSED connection_handle, + u32 opaque_data_0, + u32 opaque_data_1) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct unaligned_opaque_data unalign_data; + struct ecore_iwarp_fpdu *fpdu; + + ecore_iwarp_mpa_get_data(p_hwfn, &unalign_data, + opaque_data_0, opaque_data_1); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "(0x%x) Flush fpdu\n", + unalign_data.cid); + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid); + if (fpdu) + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); +} + +static int +ecore_iwarp_ll2_stop(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + int rc = 0; + + if (iwarp_info->ll2_syn_handle != ECORE_IWARP_HANDLE_INVAL) { + + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_syn_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate syn connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_syn_handle); + iwarp_info->ll2_syn_handle = ECORE_IWARP_HANDLE_INVAL; + } + + if (iwarp_info->ll2_ooo_handle != ECORE_IWARP_HANDLE_INVAL) { + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate ooo connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + iwarp_info->ll2_ooo_handle = ECORE_IWARP_HANDLE_INVAL; + } + + if (iwarp_info->ll2_mpa_handle != ECORE_IWARP_HANDLE_INVAL) { + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate mpa connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + iwarp_info->ll2_mpa_handle = ECORE_IWARP_HANDLE_INVAL; + } + + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, + p_hwfn->p_rdma_info->iwarp.mac_addr); + + return rc; +} + +static int +ecore_iwarp_ll2_alloc_buffers(struct ecore_hwfn *p_hwfn, + int num_rx_bufs, + int buff_size, + u8 ll2_handle) +{ + struct ecore_iwarp_ll2_buff *buffer; + int rc = 0; + int i; + + for (i = 0; i < num_rx_bufs; i++) { + buffer = OSAL_ZALLOC(p_hwfn->p_dev, + GFP_KERNEL, sizeof(*buffer)); + if (!buffer) { + DP_INFO(p_hwfn, "Failed to allocate LL2 buffer desc\n"); + break; + } + + buffer->data = + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &buffer->data_phys_addr, + buff_size); + + if (!buffer->data) { + DP_INFO(p_hwfn, "Failed to allocate LL2 buffers\n"); + OSAL_FREE(p_hwfn->p_dev, buffer); + rc = ECORE_NOMEM; + break; + } + + buffer->buff_size = buff_size; + rc = ecore_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle); + + if (rc) + break; /* buffers will be deallocated by ecore_ll2 */ + } + return rc; +} + +#define ECORE_IWARP_CACHE_PADDING(size) \ + (((size) + ETH_CACHE_LINE_SIZE - 1) & ~(ETH_CACHE_LINE_SIZE - 1)) + +#define ECORE_IWARP_MAX_BUF_SIZE(mtu) \ + ECORE_IWARP_CACHE_PADDING(mtu + ETH_HLEN + 2*VLAN_HLEN + 2 +\ + ETH_CACHE_LINE_SIZE) + +static int +ecore_iwarp_ll2_start(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_iwarp_info *iwarp_info; + struct ecore_ll2_acquire_data data; + struct ecore_ll2_cbs cbs; + u32 mpa_buff_size; + int rc = ECORE_SUCCESS; + u16 n_ooo_bufs; + int i; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + iwarp_info->ll2_syn_handle = ECORE_IWARP_HANDLE_INVAL; + iwarp_info->ll2_ooo_handle = ECORE_IWARP_HANDLE_INVAL; + iwarp_info->ll2_mpa_handle = ECORE_IWARP_HANDLE_INVAL; + + iwarp_info->max_mtu = params->max_mtu; + + OSAL_MEMCPY(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr, + ETH_ALEN); + + rc = ecore_llh_add_mac_filter(p_hwfn->p_dev, 0, params->mac_addr); + if (rc != ECORE_SUCCESS) + return rc; + + /* Start SYN connection */ + cbs.rx_comp_cb = ecore_iwarp_ll2_comp_syn_pkt; + cbs.rx_release_cb = ecore_iwarp_ll2_rel_rx_pkt; + cbs.tx_comp_cb = ecore_iwarp_ll2_comp_tx_pkt; + cbs.tx_release_cb = ecore_iwarp_ll2_rel_tx_pkt; + cbs.cookie = p_hwfn; + + OSAL_MEMSET(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_IWARP; + data.input.mtu = ECORE_IWARP_MAX_SYN_PKT_SIZE; + data.input.rx_num_desc = ECORE_IWARP_LL2_SYN_RX_SIZE; + data.input.tx_num_desc = ECORE_IWARP_LL2_SYN_TX_SIZE; + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_tc = PKT_LB_TC; + data.input.tx_dest = ECORE_LL2_TX_DEST_LB; + data.p_connection_handle = &iwarp_info->ll2_syn_handle; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) { + DP_NOTICE(p_hwfn, false, "Failed to acquire LL2 connection\n"); + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, params->mac_addr); + return rc; + } + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle); + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to establish LL2 connection\n"); + goto err; + } + + rc = ecore_iwarp_ll2_alloc_buffers(p_hwfn, + ECORE_IWARP_LL2_SYN_RX_SIZE, + ECORE_IWARP_MAX_SYN_PKT_SIZE, + iwarp_info->ll2_syn_handle); + if (rc) + goto err; + + /* Start OOO connection */ + data.input.conn_type = ECORE_LL2_TYPE_OOO; + data.input.mtu = params->max_mtu; + + n_ooo_bufs = params->iwarp.ooo_num_rx_bufs; + + if (n_ooo_bufs > ECORE_IWARP_LL2_OOO_MAX_RX_SIZE) + n_ooo_bufs = ECORE_IWARP_LL2_OOO_MAX_RX_SIZE; + + data.input.rx_num_desc = n_ooo_bufs; + data.input.rx_num_ooo_buffers = n_ooo_bufs; + + p_hwfn->p_rdma_info->iwarp.num_ooo_rx_bufs = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_num_desc = ECORE_IWARP_LL2_OOO_DEF_TX_SIZE; + data.p_connection_handle = &iwarp_info->ll2_ooo_handle; + data.input.secondary_queue = true; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + if (rc) + goto err; + + /* Start MPA connection */ + cbs.rx_comp_cb = ecore_iwarp_ll2_comp_mpa_pkt; + cbs.slowpath_cb = ecore_iwarp_ll2_slowpath; + + OSAL_MEMSET(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_IWARP; + data.input.mtu = params->max_mtu; + data.input.rx_num_desc = n_ooo_bufs * 2; + /* we allocate the same amount for TX to reduce the chance we + * run out of tx descriptors + */ + data.input.tx_num_desc = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = ECORE_IWARP_MAX_BDS_PER_FPDU; + data.p_connection_handle = &iwarp_info->ll2_mpa_handle; + data.input.secondary_queue = true; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + mpa_buff_size = ECORE_IWARP_MAX_BUF_SIZE(params->max_mtu); + rc = ecore_iwarp_ll2_alloc_buffers(p_hwfn, + data.input.rx_num_desc, + mpa_buff_size, + iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + iwarp_info->partial_fpdus = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*iwarp_info->partial_fpdus) * + (u16)p_hwfn->p_rdma_info->num_qps); + + if (!iwarp_info->partial_fpdus) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ecore_iwarp_info(partial_fpdus)\n"); + goto err; + } + + iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; + + /* The mpa_bufs array serves for pending RX packets received on the + * mpa ll2 that don't have place on the tx ring and require later + * processing. We can't fail on allocation of such a struct therefore + * we allocate enough to take care of all rx packets + */ + iwarp_info->mpa_bufs = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*iwarp_info->mpa_bufs) * + data.input.rx_num_desc); + + if (!iwarp_info->mpa_bufs) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate mpa_bufs array mem_size=%d\n", + (u32)(sizeof(*iwarp_info->mpa_bufs) * + data.input.rx_num_desc)); + goto err; + } + + iwarp_info->mpa_intermediate_buf = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, mpa_buff_size); + if (!iwarp_info->mpa_intermediate_buf) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate mpa_intermediate_buf mem_size=%d\n", + mpa_buff_size); + goto err; + } + + OSAL_LIST_INIT(&iwarp_info->mpa_buf_pending_list); + OSAL_LIST_INIT(&iwarp_info->mpa_buf_list); + for (i = 0; i < data.input.rx_num_desc; i++) { + OSAL_LIST_PUSH_TAIL(&iwarp_info->mpa_bufs[i].list_entry, + &iwarp_info->mpa_buf_list); + } + + return rc; + +err: + ecore_iwarp_ll2_stop(p_hwfn); + + return rc; +} + +static void +ecore_iwarp_set_defaults(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + u32 rcv_wnd_size; + u32 n_ooo_bufs; + + /* rcv_wnd_size = 0: use defaults */ + rcv_wnd_size = params->iwarp.rcv_wnd_size; + if (!rcv_wnd_size) { + if (ecore_device_num_ports(p_hwfn->p_dev) == 4) { + rcv_wnd_size = ECORE_IS_AH(p_hwfn->p_dev) ? + ECORE_IWARP_RCV_WND_SIZE_AH_DEF_4_PORTS : + ECORE_IWARP_RCV_WND_SIZE_BB_DEF_4_PORTS; + } else { + rcv_wnd_size = ECORE_IS_AH(p_hwfn->p_dev) ? + ECORE_IWARP_RCV_WND_SIZE_AH_DEF_2_PORTS : + ECORE_IWARP_RCV_WND_SIZE_BB_DEF_2_PORTS; + } + params->iwarp.rcv_wnd_size = rcv_wnd_size; + } + + n_ooo_bufs = params->iwarp.ooo_num_rx_bufs; + if (!n_ooo_bufs) { + n_ooo_bufs = (u32)(((u64)ECORE_MAX_OOO * + params->iwarp.rcv_wnd_size) / + params->max_mtu); + n_ooo_bufs = OSAL_MIN_T(u32, n_ooo_bufs, USHRT_MAX); + params->iwarp.ooo_num_rx_bufs = (u16)n_ooo_bufs; + } +} + +enum _ecore_status_t +ecore_iwarp_setup(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + enum _ecore_status_t rc = ECORE_SUCCESS; + struct ecore_iwarp_info *iwarp_info; + u32 rcv_wnd_size; + + iwarp_info = &(p_hwfn->p_rdma_info->iwarp); + + if (!params->iwarp.rcv_wnd_size || !params->iwarp.ooo_num_rx_bufs) + ecore_iwarp_set_defaults(p_hwfn, params); + + /* Scale 0 will set window of 0xFFFC (64K -4). + * Scale x will set window of 0xFFFC << (x) + * Therefore we subtract log2(64K) so that result is 0 + */ + rcv_wnd_size = params->iwarp.rcv_wnd_size; + if (rcv_wnd_size < ECORE_IWARP_RCV_WND_SIZE_MIN) + rcv_wnd_size = ECORE_IWARP_RCV_WND_SIZE_MIN; + + iwarp_info->rcv_wnd_scale = OSAL_MIN_T(u32, OSAL_LOG2(rcv_wnd_size) - + OSAL_LOG2(ECORE_IWARP_RCV_WND_SIZE_MIN), ECORE_IWARP_MAX_WND_SCALE); + iwarp_info->rcv_wnd_size = rcv_wnd_size >> iwarp_info->rcv_wnd_scale; + + iwarp_info->tcp_flags = params->iwarp.flags; + iwarp_info->crc_needed = params->iwarp.crc_needed; + switch (params->iwarp.mpa_rev) { + case ECORE_MPA_REV1: + iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC; + break; + case ECORE_MPA_REV2: + iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + break; + } + + iwarp_info->peer2peer = params->iwarp.mpa_peer2peer; + iwarp_info->rtr_type = MPA_RTR_TYPE_NONE; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_SEND) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_SEND; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_WRITE) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_READ) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_READ; + + //DAVIDS OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.qp_lock); + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.listen_list); + + ecore_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, + ecore_iwarp_async_event); + ecore_ooo_setup(p_hwfn); + + rc = ecore_iwarp_ll2_start(p_hwfn, params); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_REV = %d. peer2peer=%d rtr=%x\n", + iwarp_info->mpa_rev, + iwarp_info->peer2peer, + iwarp_info->rtr_type); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_stop(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + + ecore_iwarp_free_prealloc_ep(p_hwfn); + rc = ecore_iwarp_wait_for_all_cids(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + ecore_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); + + return ecore_iwarp_ll2_stop(p_hwfn); +} + +static void +ecore_iwarp_qp_in_error(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + ecore_iwarp_modify_qp(p_hwfn, ep->qp, ECORE_IWARP_QP_STATE_ERROR, true); + + params.event = ECORE_IWARP_EVENT_CLOSE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? + ECORE_SUCCESS : ECORE_CONN_RESET; + + ep->state = ECORE_IWARP_EP_CLOSED; + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->event_cb(ep->cb_context, ¶ms); +} + +static void +ecore_iwarp_exception_received(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + int fw_ret_code) +{ + struct ecore_iwarp_cm_event_params params; + bool event_cb = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n", + ep->cid, fw_ret_code); + + switch (fw_ret_code) { + case IWARP_EXCEPTION_DETECTED_LLP_CLOSED: + params.status = ECORE_SUCCESS; + params.event = ECORE_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_RESET: + params.status = ECORE_CONN_RESET; + params.event = ECORE_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_RQ_EMPTY: + params.event = ECORE_IWARP_EVENT_RQ_EMPTY; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_IRQ_FULL: + params.event = ECORE_IWARP_EVENT_IRQ_FULL; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT: + params.event = ECORE_IWARP_EVENT_LLP_TIMEOUT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR: + params.event = ECORE_IWARP_EVENT_REMOTE_PROTECTION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW: + params.event = ECORE_IWARP_EVENT_CQ_OVERFLOW; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC: + params.event = ECORE_IWARP_EVENT_QP_CATASTROPHIC; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR: + params.event = ECORE_IWARP_EVENT_LOCAL_ACCESS_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR: + params.event = ECORE_IWARP_EVENT_REMOTE_OPERATION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED: + params.event = ECORE_IWARP_EVENT_TERMINATE_RECEIVED; + event_cb = true; + break; + default: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Unhandled exception received...\n"); + break; + } + + if (event_cb) { + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->event_cb(ep->cb_context, ¶ms); + } +} + +static void +ecore_iwarp_tcp_connect_unsuccessful(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + OSAL_MEM_ZERO(¶ms, sizeof(params)); + params.event = ECORE_IWARP_EVENT_ACTIVE_COMPLETE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->state = ECORE_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "%s(0x%x) TCP connect got invalid packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECTION_RST: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "%s(0x%x) TCP Connection Reset\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) TCP timeout\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_TIMEOUT; + break; + case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA not supported VER\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Invalid Packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + default: + DP_ERR(p_hwfn, "%s(0x%x) Unexpected return code tcp connect: %d\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid, + fw_return_code); + params.status = ECORE_CONN_RESET; + break; + } + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + } else { + ep->event_cb(ep->cb_context, ¶ms); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } +} + +static void +ecore_iwarp_connect_complete(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + /* Done with the SYN packet, post back to ll2 rx */ + ecore_iwarp_ll2_post_rx( + p_hwfn, ep->syn, + p_hwfn->p_rdma_info->iwarp.ll2_syn_handle); + + ep->syn = OSAL_NULL; + + if (ep->state == ECORE_IWARP_EP_ABORTING) + return; + + /* If connect failed - upper layer doesn't know about it */ + if (fw_return_code == RDMA_RETURN_OK) + ecore_iwarp_mpa_received(p_hwfn, ep); + else + ecore_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + + } else { + if (fw_return_code == RDMA_RETURN_OK) + ecore_iwarp_mpa_offload(p_hwfn, ep); + else + ecore_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + } +} + +static OSAL_INLINE bool +ecore_iwarp_check_ep_ok(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + if (ep == OSAL_NULL) { + DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep); + return false; + } + + if (ep->sig != 0xdeadbeef) { + DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep); + return false; + } + + return true; +} + +static enum _ecore_status_t +ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 fw_return_code) +{ + struct regpair *fw_handle = &data->rdma_data.async_handle; + struct ecore_iwarp_ep *ep = OSAL_NULL; + u16 cid; + + ep = (struct ecore_iwarp_ep *)(osal_uintptr_t)HILO_64(fw_handle->hi, + fw_handle->lo); + + switch (fw_event_code) { + /* Async completion after TCP 3-way handshake */ + case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n", + ep->tcp_cid, fw_return_code); + ecore_iwarp_connect_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_exception_received(p_hwfn, ep, fw_return_code); + break; + /* Async completion for Close Connection ramrod */ + case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_qp_in_error(p_hwfn, ep, fw_return_code); + break; + /* Async event for active side only */ + case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_mpa_reply_arrived(p_hwfn, ep); + break; + /* MPA Negotiations completed */ + case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_mpa_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED: + cid = (u16)OSAL_LE32_TO_CPU(fw_handle->lo); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", + cid); + ecore_iwarp_cid_cleaned(p_hwfn, cid); + + break; + case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW: + DP_NOTICE(p_hwfn, false, + "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n"); + + p_hwfn->p_rdma_info->events.affiliated_event( + p_hwfn->p_rdma_info->events.context, + ECORE_IWARP_EVENT_CQ_OVERFLOW, + (void *)fw_handle); + break; + default: + DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n", + fw_event_code); + return ECORE_INVAL; + } + return ECORE_SUCCESS; +} + +enum _ecore_status_t +ecore_iwarp_create_listen(void *rdma_cxt, + struct ecore_iwarp_listen_in *iparams, + struct ecore_iwarp_listen_out *oparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener; + + listener = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*listener)); + + if (!listener) { + DP_NOTICE(p_hwfn, + false, + "ecore iwarp create listener failed: cannot allocate memory (listener). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + listener->ip_version = iparams->ip_version; + OSAL_MEMCPY(listener->ip_addr, + iparams->ip_addr, + sizeof(listener->ip_addr)); + listener->port = iparams->port; + listener->vlan = iparams->vlan; + + listener->event_cb = iparams->event_cb; + listener->cb_context = iparams->cb_context; + listener->max_backlog = iparams->max_backlog; + listener->state = ECORE_IWARP_LISTENER_STATE_ACTIVE; + oparams->handle = listener; + + OSAL_SPIN_LOCK_INIT(&listener->lock); + OSAL_LIST_INIT(&listener->ep_list); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&listener->list_entry, + &p_hwfn->p_rdma_info->iwarp.listen_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n", + listener->event_cb, + listener, + listener->ip_addr[0], + listener->ip_addr[1], + listener->ip_addr[2], + listener->ip_addr[3], + listener->port, + listener->vlan); + + return ECORE_SUCCESS; +} + +static void +ecore_iwarp_pause_complete(struct ecore_iwarp_listener *listener) +{ + struct ecore_iwarp_cm_event_params params; + + if (listener->state == ECORE_IWARP_LISTENER_STATE_UNPAUSE) + listener->state = ECORE_IWARP_LISTENER_STATE_ACTIVE; + + params.event = ECORE_IWARP_EVENT_LISTEN_PAUSE_COMP; + listener->event_cb(listener->cb_context, ¶ms); +} + +static void +ecore_iwarp_tcp_abort_comp(struct ecore_hwfn *p_hwfn, void *cookie, + union event_ring_data OSAL_UNUSED *data, + u8 OSAL_UNUSED fw_return_code) +{ + struct ecore_iwarp_ep *ep = (struct ecore_iwarp_ep *)cookie; + struct ecore_iwarp_listener *listener = ep->listener; + + ecore_iwarp_return_ep(p_hwfn, ep); + + if (OSAL_LIST_IS_EMPTY(&listener->ep_list)) + listener->done = true; +} + +static void +ecore_iwarp_abort_inflight_connections(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener) +{ + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_iwarp_ep *ep = OSAL_NULL; + struct ecore_sp_init_data init_data; + struct ecore_spq_comp_cb comp_data; + enum _ecore_status_t rc; + + /* remove listener from list before destroying listener */ + OSAL_LIST_REMOVE_ENTRY(&listener->list_entry, + &p_hwfn->p_rdma_info->iwarp.listen_list); + if (OSAL_LIST_IS_EMPTY(&listener->ep_list)) { + listener->done = true; + return; + } + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.p_comp_data = &comp_data; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + init_data.p_comp_data->function = ecore_iwarp_tcp_abort_comp; + + OSAL_LIST_FOR_EACH_ENTRY(ep, &listener->ep_list, + list_entry, struct ecore_iwarp_ep) { + ep->state = ECORE_IWARP_EP_ABORTING; + init_data.p_comp_data->cookie = ep; + init_data.cid = ep->tcp_cid; + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_ABORT_TCP_OFFLOAD, + PROTOCOLID_IWARP, + &init_data); + if (rc == ECORE_SUCCESS) + ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + } +} + +static void +ecore_iwarp_listener_state_transition(struct ecore_hwfn *p_hwfn, void *cookie, + union event_ring_data OSAL_UNUSED *data, + u8 OSAL_UNUSED fw_return_code) +{ + struct ecore_iwarp_listener *listener = (struct ecore_iwarp_listener *)cookie; + + switch (listener->state) { + case ECORE_IWARP_LISTENER_STATE_PAUSE: + case ECORE_IWARP_LISTENER_STATE_UNPAUSE: + ecore_iwarp_pause_complete(listener); + break; + case ECORE_IWARP_LISTENER_STATE_DESTROYING: + ecore_iwarp_abort_inflight_connections(p_hwfn, listener); + break; + default: + break; + } +} + +static enum _ecore_status_t +ecore_iwarp_empty_ramrod(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener) +{ + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_spq_comp_cb comp_data; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.p_comp_data = &comp_data; + init_data.cid = ecore_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + init_data.p_comp_data->function = ecore_iwarp_listener_state_transition; + init_data.p_comp_data->cookie = listener; + rc = ecore_sp_init_request(p_hwfn, &p_ent, + COMMON_RAMROD_EMPTY, + PROTOCOLID_COMMON, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_pause_listen(void *rdma_cxt, void *handle, + bool pause, bool comp) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener = + (struct ecore_iwarp_listener *)handle; + enum _ecore_status_t rc; + + listener->state = pause ? + ECORE_IWARP_LISTENER_STATE_PAUSE : + ECORE_IWARP_LISTENER_STATE_UNPAUSE; + if (!comp) + return ECORE_SUCCESS; + + rc = ecore_iwarp_empty_ramrod(p_hwfn, listener); + if (rc != ECORE_SUCCESS) + return rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener=%p, state=%d\n", + listener, listener->state); + + return ECORE_PENDING; +} + +enum _ecore_status_t +ecore_iwarp_destroy_listen(void *rdma_cxt, void *handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener = + (struct ecore_iwarp_listener *)handle; + enum _ecore_status_t rc; + int wait_count = 0; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "handle=%p\n", handle); + + listener->state = ECORE_IWARP_LISTENER_STATE_DESTROYING; + rc = ecore_iwarp_empty_ramrod(p_hwfn, listener); + if (rc != ECORE_SUCCESS) + return rc; + + while (!listener->done) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Waiting for ep list to be empty...\n"); + OSAL_MSLEEP(100); + if (wait_count++ > 200) { + DP_NOTICE(p_hwfn, false, "ep list close timeout\n"); + break; + } + } + + OSAL_FREE(p_hwfn->p_dev, listener); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t +ecore_iwarp_send_rtr(void *rdma_cxt, struct ecore_iwarp_send_rtr_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + struct ecore_rdma_qp *qp; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n"); + return ECORE_INVAL; + } + + qp = ep->qp; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + qp->icid, ep->tcp_cid); + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR, + PROTOCOLID_IWARP, &init_data); + + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ecore_iwarp_send_rtr, rc = 0x%x\n", + rc); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_query_qp(struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + out_params->state = ecore_iwarp2roce_state(qp->iwarp_state); + return ECORE_SUCCESS; +} + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/ecore_ll2.c b/sys/dev/qlnx/qlnxe/ecore_ll2.c new file mode 100644 index 000000000000..95b31d3bebc9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_ll2.c @@ -0,0 +1,2211 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_ll2.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" + +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_ll2.h" +#include "reg_addr.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_sp_commands.h" +#include "ecore_hw.h" +#include "reg_addr.h" +#include "ecore_dev_api.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore_ooo.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" + +#define ECORE_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) +#define ECORE_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28167) +#pragma warning(disable : 28123) +#pragma warning(disable : 28121) +#endif + +static struct ecore_ll2_info * +__ecore_ll2_handle_sanity(struct ecore_hwfn *p_hwfn, + u8 connection_handle, + bool b_lock, bool b_only_active) +{ + struct ecore_ll2_info *p_ll2_conn, *p_ret = OSAL_NULL; + + if (connection_handle >= ECORE_MAX_NUM_OF_LL2_CONNECTIONS) + return OSAL_NULL; + + if (!p_hwfn->p_ll2_info) + return OSAL_NULL; + + /* TODO - is there really need for the locked vs. unlocked + * variant? I simply used what was already there. + */ + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + if (b_only_active) { + if (b_lock) + OSAL_MUTEX_ACQUIRE(&p_ll2_conn->mutex); + if (p_ll2_conn->b_active) + p_ret = p_ll2_conn; + if (b_lock) + OSAL_MUTEX_RELEASE(&p_ll2_conn->mutex); + } else { + p_ret = p_ll2_conn; + } + + return p_ret; +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + false, true); +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity_lock(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + true, true); +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity_inactive(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + false, false); +} + +#ifndef LINUX_REMOVE +/* TODO - is this really been used by anyone? Is it a on future todo list? */ +enum _ecore_status_t +ecore_ll2_get_fragment_of_tx_packet(struct ecore_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t *p_addr, + bool *b_last_fragment) +{ + struct ecore_ll2_tx_packet *p_pkt; + struct ecore_ll2_info *p_ll2_conn; + u16 cur_frag_idx = 0; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_pkt = &p_ll2_conn->tx_queue.cur_completing_packet; + + if (!p_ll2_conn->tx_queue.b_completing_packet || !p_addr) + return ECORE_INVAL; + + if (p_ll2_conn->tx_queue.cur_completing_bd_idx == p_pkt->bd_used) + return ECORE_INVAL; + + /* Packet is available and has at least one more frag - provide it */ + cur_frag_idx = p_ll2_conn->tx_queue.cur_completing_bd_idx++; + *p_addr = p_pkt->bds_set[cur_frag_idx].tx_frag; + if (b_last_fragment) + *b_last_fragment = p_pkt->bd_used == + p_ll2_conn->tx_queue.cur_completing_bd_idx; + + return ECORE_SUCCESS; +} +#endif + +static void ecore_ll2_txq_flush(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + bool b_last_packet = false, b_last_frag = false; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_ll2_tx_queue *p_tx; + unsigned long flags = 0; + dma_addr_t tx_frag; + + p_ll2_conn = ecore_ll2_handle_sanity_inactive(p_hwfn, + connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + p_tx = &p_ll2_conn->tx_queue; + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + while (!OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) { + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + + if (p_pkt == OSAL_NULL) + break; + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + b_last_packet = OSAL_LIST_IS_EMPTY(&p_tx->active_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) { + struct ecore_ooo_buffer *p_buffer; + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + } else { + p_tx->cur_completing_packet = *p_pkt; + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == + p_pkt->bd_used; + + tx_frag = p_pkt->bds_set[0].tx_frag; + p_ll2_conn->cbs.tx_release_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + } + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + } + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); +} + +static enum _ecore_status_t +ecore_ll2_txq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info*)p_cookie; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0; + struct ecore_ll2_tx_packet *p_pkt; + bool b_last_frag = false; + unsigned long flags; + enum _ecore_status_t rc = ECORE_INVAL; + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + if (p_tx->b_completing_packet) { + /* TODO - this looks completely unnecessary to me - the only + * way we can re-enter is by the DPC calling us again, but this + * would only happen AFTER we return, and we unset this at end + * of the function. + */ + rc = ECORE_BUSY; + goto out; + } + + new_idx = OSAL_LE16_TO_CPU(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + while (num_bds) { + if (OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) + goto out; + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (!p_pkt) + goto out; + + p_tx->b_completing_packet = true; + p_tx->cur_completing_packet = *p_pkt; + num_bds_in_packet = p_pkt->bd_used; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + + if (num_bds < num_bds_in_packet) { + DP_NOTICE(p_hwfn, true, + "Rest of BDs does not cover whole packet\n"); + goto out; + } + + num_bds -= num_bds_in_packet; + p_tx->bds_idx += num_bds_in_packet; + while (num_bds_in_packet--) + ecore_chain_consume(&p_tx->txq_chain); + + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == + p_pkt->bd_used; + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + + p_ll2_conn->cbs.tx_comp_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, + !num_bds); + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + } + + p_tx->b_completing_packet = false; + rc = ECORE_SUCCESS; +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + return rc; +} + +static void ecore_ll2_rxq_parse_gsi(union core_rx_cqe_union *p_cqe, + struct ecore_ll2_comp_rx_data *data) +{ + data->parse_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.parse_flags.flags); + data->length.data_length = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.data_length); + data->vlan = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.vlan); + data->opaque_data_0 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_gsi.src_mac_addrhi); + data->opaque_data_1 = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.src_mac_addrlo); + data->u.data_length_error = + p_cqe->rx_cqe_gsi.data_length_error; + data->qp_id = OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.qp_id); + + data->src_qp = OSAL_LE32_TO_CPU(p_cqe->rx_cqe_gsi.src_qp); +} + +static void ecore_ll2_rxq_parse_reg(union core_rx_cqe_union *p_cqe, + struct ecore_ll2_comp_rx_data *data) +{ + data->parse_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.parse_flags.flags); + data->err_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.err_flags.flags); + data->length.packet_length = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.packet_length); + data->vlan = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.vlan); + data->opaque_data_0 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_fp.opaque_data.data[0]); + data->opaque_data_1 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_fp.opaque_data.data[1]); + data->u.placement_offset = + p_cqe->rx_cqe_fp.placement_offset; +} + +#if defined(_NTDDK_) +#pragma warning(suppress : 28167 26110) +#endif +static enum _ecore_status_t +ecore_ll2_handle_slowpath(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_slow_path_cqe *sp_cqe; + + sp_cqe = &p_cqe->rx_cqe_sp; + if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) { + DP_NOTICE(p_hwfn, true, + "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n", + sp_cqe->ramrod_cmd_id); + return ECORE_INVAL; + } + + if (p_ll2_conn->cbs.slowpath_cb == OSAL_NULL) { + DP_NOTICE(p_hwfn, true, + "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n"); + return ECORE_INVAL; + } + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + OSAL_LE32_TO_CPU(sp_cqe->opaque_data.data[0]), + OSAL_LE32_TO_CPU(sp_cqe->opaque_data.data[1])); + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_ll2_rxq_handle_completion(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags, + bool b_last_cqe) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_comp_rx_data data; + + if (!OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, false, + "[%d] LL2 Rx completion but active_descq is empty\n", + p_ll2_conn->input.conn_type); + + return ECORE_IO; + } + + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, &p_rx->active_descq); + + if (p_cqe->rx_cqe_sp.type == CORE_RX_CQE_TYPE_REGULAR) + ecore_ll2_rxq_parse_reg(p_cqe, &data); + else + ecore_ll2_rxq_parse_gsi(p_cqe, &data); + + if (ecore_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) { + DP_NOTICE(p_hwfn, false, + "Mismatch between active_descq and the LL2 Rx chain\n"); + /* TODO - didn't return error value since this wasn't handled + * before, but this is obviously lacking. + */ + } + + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, &p_rx->free_descq); + + data.connection_handle = p_ll2_conn->my_id; + data.cookie = p_pkt->cookie; + data.rx_buf_addr = p_pkt->rx_buf_addr; + data.b_last_packet = b_last_cqe; + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + p_ll2_conn->cbs.rx_comp_cb(p_ll2_conn->cbs.cookie, + &data); + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_ll2_rxq_completion(struct ecore_hwfn *p_hwfn, + void *cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info*)cookie; + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + union core_rx_cqe_union *cqe = OSAL_NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + unsigned long flags = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + cq_new_idx = OSAL_LE16_TO_CPU(*p_rx->p_fw_cons); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + + while (cq_new_idx != cq_old_idx) { + bool b_last_cqe = (cq_new_idx == cq_old_idx); + + cqe = (union core_rx_cqe_union *)ecore_chain_consume(&p_rx->rcq_chain); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n", + cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type); + + switch (cqe->rx_cqe_sp.type) { + case CORE_RX_CQE_TYPE_SLOW_PATH: + rc = ecore_ll2_handle_slowpath(p_hwfn, p_ll2_conn, + cqe, &flags); + break; + case CORE_RX_CQE_TYPE_GSI_OFFLOAD: + case CORE_RX_CQE_TYPE_REGULAR: + rc = ecore_ll2_rxq_handle_completion(p_hwfn, p_ll2_conn, + cqe, &flags, + b_last_cqe); + break; + default: + rc = ECORE_IO; + } + } + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + return rc; +} + +static void ecore_ll2_rxq_flush(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_rx_queue *p_rx; + unsigned long flags = 0; + + p_ll2_conn = ecore_ll2_handle_sanity_inactive(p_hwfn, + connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + p_rx = &p_ll2_conn->rx_queue; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + while (!OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) { + bool b_last; + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (p_pkt == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_rx->active_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_rx->free_descq); + b_last = OSAL_LIST_IS_EMPTY(&p_rx->active_descq); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) { + struct ecore_ooo_buffer *p_buffer; + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + } else { + dma_addr_t rx_buf_addr = p_pkt->rx_buf_addr; + void *cookie = p_pkt->cookie; + + p_ll2_conn->cbs.rx_release_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + cookie, + rx_buf_addr, + b_last); + } + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + } + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); +} + +static bool +ecore_ll2_lb_rxq_handler_slowpath(struct ecore_hwfn *p_hwfn, + struct core_rx_slow_path_cqe *p_cqe) +{ + struct ooo_opaque *iscsi_ooo; + u32 cid; + + if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) + return false; + + iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data; + if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES) + return false; + + /* Need to make a flush */ + cid = OSAL_LE32_TO_CPU(iscsi_ooo->cid); + ecore_ooo_release_connection_isles(p_hwfn->p_ooo_info, cid); + + return true; +} + +static enum _ecore_status_t +ecore_ll2_lb_rxq_handler(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + u16 packet_length = 0, parse_flags = 0, vlan = 0; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + u32 num_ooo_add_to_peninsula = 0, cid; + union core_rx_cqe_union *cqe = OSAL_NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + struct ecore_ooo_buffer *p_buffer; + struct ooo_opaque *iscsi_ooo; + u8 placement_offset = 0; + u8 cqe_type; + + cq_new_idx = OSAL_LE16_TO_CPU(*p_rx->p_fw_cons); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + if (cq_new_idx == cq_old_idx) + return ECORE_SUCCESS; + + while (cq_new_idx != cq_old_idx) { + struct core_rx_fast_path_cqe *p_cqe_fp; + + cqe = (union core_rx_cqe_union *)ecore_chain_consume(&p_rx->rcq_chain); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + cqe_type = cqe->rx_cqe_sp.type; + + if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH) + if (ecore_ll2_lb_rxq_handler_slowpath(p_hwfn, + &cqe->rx_cqe_sp)) + continue; + + if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) { + DP_NOTICE(p_hwfn, true, + "Got a non-regular LB LL2 completion [type 0x%02x]\n", + cqe_type); + return ECORE_INVAL; + } + p_cqe_fp = &cqe->rx_cqe_fp; + + placement_offset = p_cqe_fp->placement_offset; + parse_flags = OSAL_LE16_TO_CPU(p_cqe_fp->parse_flags.flags); + packet_length = OSAL_LE16_TO_CPU(p_cqe_fp->packet_length); + vlan = OSAL_LE16_TO_CPU(p_cqe_fp->vlan); + iscsi_ooo = (struct ooo_opaque *)&p_cqe_fp->opaque_data; + ecore_ooo_save_history_entry(p_hwfn->p_ooo_info, iscsi_ooo); + cid = OSAL_LE32_TO_CPU(iscsi_ooo->cid); + + /* Process delete isle first*/ + if (iscsi_ooo->drop_size) + ecore_ooo_delete_isles(p_hwfn, p_hwfn->p_ooo_info, cid, + iscsi_ooo->drop_isle, + iscsi_ooo->drop_size); + + if (iscsi_ooo->ooo_opcode == TCP_EVENT_NOP) + continue; + + /* Now process create/add/join isles */ + if (OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) { + DP_NOTICE(p_hwfn, true, + "LL2 OOO RX chain has no submitted buffers\n"); + return ECORE_IO; + } + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + + if ((iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) { + if (!p_pkt) { + DP_NOTICE(p_hwfn, true, + "LL2 OOO RX packet is not valid\n"); + return ECORE_IO; + } +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_rx->active_descq); + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + p_buffer->packet_length = packet_length; + p_buffer->parse_flags = parse_flags; + p_buffer->vlan = vlan; + p_buffer->placement_offset = placement_offset; + if (ecore_chain_consume(&p_rx->rxq_chain) != + p_pkt->rxq_bd) { + /**/ + } + ecore_ooo_dump_rx_event(p_hwfn, iscsi_ooo, p_buffer); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_rx->free_descq); + + switch (iscsi_ooo->ooo_opcode) { + case TCP_EVENT_ADD_NEW_ISLE: + ecore_ooo_add_new_isle(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer); + break; + case TCP_EVENT_ADD_ISLE_RIGHT: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer, + ECORE_OOO_RIGHT_BUF); + break; + case TCP_EVENT_ADD_ISLE_LEFT: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer, + ECORE_OOO_LEFT_BUF); + break; + case TCP_EVENT_JOIN: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle + + 1, + p_buffer, + ECORE_OOO_LEFT_BUF); + ecore_ooo_join_isles(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle); + break; + case TCP_EVENT_ADD_PEN: + num_ooo_add_to_peninsula++; + ecore_ooo_put_ready_buffer(p_hwfn->p_ooo_info, + p_buffer, true); + break; + } + } else { + DP_NOTICE(p_hwfn, true, + "Unexpected event (%d) TX OOO completion\n", + iscsi_ooo->ooo_opcode); + } + } + + return ECORE_SUCCESS; +} + +static void +ecore_ooo_submit_tx_buffers(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + struct ecore_ooo_buffer *p_buffer; + dma_addr_t first_frag; + u16 l4_hdr_offset_w; + u8 bd_flags; + enum _ecore_status_t rc; + + /* Submit Tx buffers here */ + while ((p_buffer = ecore_ooo_get_ready_buffer(p_hwfn->p_ooo_info))) { + l4_hdr_offset_w = 0; + bd_flags = 0; + + first_frag = p_buffer->rx_buffer_phys_addr + + p_buffer->placement_offset; + SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1); + SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1); + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.vlan = p_buffer->vlan; + tx_pkt.bd_flags = bd_flags; + tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w; + tx_pkt.tx_dest = (enum ecore_ll2_tx_dest)p_ll2_conn->tx_dest; + tx_pkt.first_frag = first_frag; + tx_pkt.first_frag_len = p_buffer->packet_length; + tx_pkt.cookie = p_buffer; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, + &tx_pkt, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_ready_buffer(p_hwfn->p_ooo_info, + p_buffer, false); + break; + } + } +} + +static void +ecore_ooo_submit_rx_buffers(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ooo_buffer *p_buffer; + enum _ecore_status_t rc; + + while ((p_buffer = ecore_ooo_get_free_buffer(p_hwfn->p_ooo_info))) { + rc = ecore_ll2_post_rx_buffer(p_hwfn, + p_ll2_conn->my_id, + p_buffer->rx_buffer_phys_addr, + 0, p_buffer, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + break; + } + } +} + +static enum _ecore_status_t +ecore_ll2_lb_rxq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info *)p_cookie; + enum _ecore_status_t rc; + + rc = ecore_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn); + if (rc != ECORE_SUCCESS) + return rc; + + ecore_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn); + ecore_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn); + + return 0; +} + +static enum _ecore_status_t +ecore_ll2_lb_txq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info *)p_cookie; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + struct ecore_ooo_buffer *p_buffer; + bool b_dont_submit_rx = false; + u16 new_idx = 0, num_bds = 0; + enum _ecore_status_t rc; + + new_idx = OSAL_LE16_TO_CPU(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + + if (!num_bds) + return ECORE_SUCCESS; + + while (num_bds) { + + if (OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) + return ECORE_INVAL; + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (!p_pkt) + return ECORE_INVAL; + + if (p_pkt->bd_used != 1) { + DP_NOTICE(p_hwfn, true, + "Unexpectedly many BDs(%d) in TX OOO completion\n", + p_pkt->bd_used); + return ECORE_INVAL; + } + + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + + num_bds--; + p_tx->bds_idx++; + ecore_chain_consume(&p_tx->txq_chain); + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + + if (b_dont_submit_rx) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + continue; + } + + rc = ecore_ll2_post_rx_buffer(p_hwfn, p_ll2_conn->my_id, + p_buffer->rx_buffer_phys_addr, 0, + p_buffer, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + b_dont_submit_rx = true; + } + } + + ecore_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_sp_ll2_rx_queue_start(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + u8 action_on_error) +{ + enum ecore_ll2_conn_type conn_type = p_ll2_conn->input.conn_type; + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_start_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + u16 cqe_pbl_size; + enum _ecore_status_t rc = ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_start; + + p_ramrod->sb_id = OSAL_CPU_TO_LE16(ecore_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_rx->rx_sb_index; + p_ramrod->complete_event_flg = 1; + + p_ramrod->mtu = OSAL_CPU_TO_LE16(p_ll2_conn->input.mtu); + DMA_REGPAIR_LE(p_ramrod->bd_base, + p_rx->rxq_chain.p_phys_addr); + cqe_pbl_size = (u16)ecore_chain_get_page_cnt(&p_rx->rcq_chain); + p_ramrod->num_of_pbl_pages = OSAL_CPU_TO_LE16(cqe_pbl_size); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, + ecore_chain_get_pbl_phys(&p_rx->rcq_chain)); + + p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg; + p_ramrod->inner_vlan_stripping_en = + p_ll2_conn->input.rx_vlan_removal_en; + + if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) && + (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE)) + p_ramrod->report_outer_vlan = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + p_ramrod->main_func_queue = p_ll2_conn->main_func_queue; + + if (OSAL_TEST_BIT(ECORE_MF_LL2_NON_UNICAST, + &p_hwfn->p_dev->mf_bits) && + p_ramrod->main_func_queue && + ((conn_type != ECORE_LL2_TYPE_ROCE) && + (conn_type != ECORE_LL2_TYPE_IWARP))) { + p_ramrod->mf_si_bcast_accept_all = 1; + p_ramrod->mf_si_mcast_accept_all = 1; + } else { + p_ramrod->mf_si_bcast_accept_all = 0; + p_ramrod->mf_si_mcast_accept_all = 0; + } + + p_ramrod->action_on_error.error_type = action_on_error; + p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable; + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t ecore_sp_ll2_tx_queue_start(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + enum ecore_ll2_conn_type conn_type = p_ll2_conn->input.conn_type; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct core_tx_start_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + u16 pq_id = 0, pbl_size; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + if (!ECORE_LL2_TX_REGISTERED(p_ll2_conn)) + return ECORE_SUCCESS; + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) + p_ll2_conn->tx_stats_en = 0; + else + p_ll2_conn->tx_stats_en = 1; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_tx_queue_start; + + p_ramrod->sb_id = OSAL_CPU_TO_LE16(ecore_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_tx->tx_sb_index; + p_ramrod->mtu = OSAL_CPU_TO_LE16(p_ll2_conn->input.mtu); + p_ramrod->stats_en = p_ll2_conn->tx_stats_en; + p_ramrod->stats_id = p_ll2_conn->tx_stats_id; + + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, + ecore_chain_get_pbl_phys(&p_tx->txq_chain)); + pbl_size = (u16)ecore_chain_get_page_cnt(&p_tx->txq_chain); + p_ramrod->pbl_size = OSAL_CPU_TO_LE16(pbl_size); + + /* TODO RESC_ALLOC pq for ll2 */ + switch (p_ll2_conn->input.tx_tc) { + case PURE_LB_TC: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB); + break; + case PKT_LB_TC: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO); + break; + default: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + } + + p_ramrod->qm_pq_id = OSAL_CPU_TO_LE16(pq_id); + + switch (conn_type) { + case ECORE_LL2_TYPE_FCOE: + p_ramrod->conn_type = PROTOCOLID_FCOE; + break; + case ECORE_LL2_TYPE_ISCSI: + p_ramrod->conn_type = PROTOCOLID_ISCSI; + break; + case ECORE_LL2_TYPE_ROCE: + p_ramrod->conn_type = PROTOCOLID_ROCE; + break; + case ECORE_LL2_TYPE_IWARP: + p_ramrod->conn_type = PROTOCOLID_IWARP; + break; + case ECORE_LL2_TYPE_OOO: + if (p_hwfn->hw_info.personality == ECORE_PCI_ISCSI) { + p_ramrod->conn_type = PROTOCOLID_ISCSI; + } else { + p_ramrod->conn_type = PROTOCOLID_IWARP; + } + break; + default: + p_ramrod->conn_type = PROTOCOLID_ETH; + DP_NOTICE(p_hwfn, false, "Unknown connection type: %d\n", + conn_type); + } + + p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_db_recovery_add(p_hwfn->p_dev, p_tx->doorbell_addr, + &p_tx->db_msg, DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; +} + +static enum _ecore_status_t ecore_sp_ll2_rx_queue_stop(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct core_rx_stop_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_stop; + + p_ramrod->complete_event_flg = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t ecore_sp_ll2_tx_queue_stop(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + ecore_db_recovery_del(p_hwfn->p_dev, p_tx->doorbell_addr, + &p_tx->db_msg); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_rx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info) +{ + struct ecore_ll2_rx_packet *p_descq; + u32 capacity; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_ll2_info->input.rx_num_desc) + goto out; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_NEXT_PTR, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.rx_num_desc, + sizeof(struct core_rx_bd), + &p_ll2_info->rx_queue.rxq_chain, OSAL_NULL); + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 rxq chain\n"); + goto out; + } + + capacity = ecore_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain); + p_descq = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + capacity * sizeof(struct ecore_ll2_rx_packet)); + if (!p_descq) { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 Rx desc\n"); + goto out; + } + p_ll2_info->rx_queue.descq_array = p_descq; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.rx_num_desc, + sizeof(struct core_rx_fast_path_cqe), + &p_ll2_info->rx_queue.rcq_chain, OSAL_NULL); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 rcq chain\n"); + goto out; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->input.conn_type, + p_ll2_info->input.rx_num_desc); + +out: + return rc; +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_tx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info) +{ + struct ecore_ll2_tx_packet *p_descq; + u32 capacity; + enum _ecore_status_t rc = ECORE_SUCCESS; + u32 desc_size; + + if (!p_ll2_info->input.tx_num_desc) + goto out; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.tx_num_desc, + sizeof(struct core_tx_bd), + &p_ll2_info->tx_queue.txq_chain, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto out; + + capacity = ecore_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); + desc_size = (sizeof(*p_descq) + + (p_ll2_info->input.tx_max_bds_per_packet - 1) * + sizeof(p_descq->bds_set)); + + p_descq = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + capacity * desc_size); + if (!p_descq) { + rc = ECORE_NOMEM; + goto out; + } + p_ll2_info->tx_queue.descq_array = p_descq; + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->input.conn_type, + p_ll2_info->input.tx_num_desc); + +out: + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, + "Can't allocate memory for Tx LL2 with 0x%08x buffers\n", + p_ll2_info->input.tx_num_desc); + return rc; +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info, u16 mtu) +{ + struct ecore_ooo_buffer *p_buf = OSAL_NULL; + u32 rx_buffer_size = 0; + void *p_virt; + u16 buf_idx; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (p_ll2_info->input.conn_type != ECORE_LL2_TYPE_OOO) + return rc; + + /* Correct number of requested OOO buffers if needed */ + if (!p_ll2_info->input.rx_num_ooo_buffers) { + u16 num_desc = p_ll2_info->input.rx_num_desc; + + if (!num_desc) + return ECORE_INVAL; + p_ll2_info->input.rx_num_ooo_buffers = num_desc * 2; + } + + /* TODO - use some defines for buffer size */ + rx_buffer_size = mtu + 14 + 4 + 8 + ETH_CACHE_LINE_SIZE; + rx_buffer_size = (rx_buffer_size + ETH_CACHE_LINE_SIZE - 1) & + ~(ETH_CACHE_LINE_SIZE - 1); + + for (buf_idx = 0; buf_idx < p_ll2_info->input.rx_num_ooo_buffers; + buf_idx++) { + p_buf = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_buf)); + if (!p_buf) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ooo descriptor\n"); + rc = ECORE_NOMEM; + goto out; + } + + p_buf->rx_buffer_size = rx_buffer_size; + p_virt = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &p_buf->rx_buffer_phys_addr, + p_buf->rx_buffer_size); + if (!p_virt) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ooo buffer\n"); + OSAL_FREE(p_hwfn->p_dev, p_buf); + rc = ECORE_NOMEM; + goto out; + } + p_buf->rx_buffer_virt_addr = p_virt; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buf); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n", + p_ll2_info->input.rx_num_ooo_buffers, rx_buffer_size); + +out: + return rc; +} + +static enum _ecore_status_t +ecore_ll2_set_cbs(struct ecore_ll2_info *p_ll2_info, + const struct ecore_ll2_cbs *cbs) +{ + if (!cbs || (!cbs->rx_comp_cb || + !cbs->rx_release_cb || + !cbs->tx_comp_cb || + !cbs->tx_release_cb || + !cbs->cookie)) + return ECORE_INVAL; + + p_ll2_info->cbs.rx_comp_cb = cbs->rx_comp_cb; + p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb; + p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb; + p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb; + p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb; + p_ll2_info->cbs.cookie = cbs->cookie; + + return ECORE_SUCCESS; +} + +static enum core_error_handle +ecore_ll2_get_error_choice(enum ecore_ll2_error_handle err) +{ + switch (err) { + case ECORE_LL2_DROP_PACKET: + return LL2_DROP_PACKET; + case ECORE_LL2_DO_NOTHING: + return LL2_DO_NOTHING; + case ECORE_LL2_ASSERT: + return LL2_ASSERT; + default: + return LL2_DO_NOTHING; + } +} + +enum _ecore_status_t +ecore_ll2_acquire_connection(void *cxt, + struct ecore_ll2_acquire_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + ecore_int_comp_cb_t comp_rx_cb, comp_tx_cb; + struct ecore_ll2_info *p_ll2_info = OSAL_NULL; + enum _ecore_status_t rc; + u8 i, *p_tx_max; + + if (!data->p_connection_handle || !p_hwfn->p_ll2_info) { + DP_NOTICE(p_hwfn, false, "Invalid connection handle, ll2_info not allocated\n"); + return ECORE_INVAL; + } + + /* Find a free connection to be used */ + for (i = 0; (i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS); i++) { + OSAL_MUTEX_ACQUIRE(&p_hwfn->p_ll2_info[i].mutex); + if (p_hwfn->p_ll2_info[i].b_active) { + OSAL_MUTEX_RELEASE(&p_hwfn->p_ll2_info[i].mutex); + continue; + } + + p_hwfn->p_ll2_info[i].b_active = true; + p_ll2_info = &p_hwfn->p_ll2_info[i]; + OSAL_MUTEX_RELEASE(&p_hwfn->p_ll2_info[i].mutex); + break; + } + if (p_ll2_info == OSAL_NULL) { + DP_NOTICE(p_hwfn, false, "No available ll2 connection\n"); + return ECORE_BUSY; + } + + OSAL_MEMCPY(&p_ll2_info->input, &data->input, + sizeof(p_ll2_info->input)); + + switch (data->input.tx_dest) { + case ECORE_LL2_TX_DEST_NW: + p_ll2_info->tx_dest = CORE_TX_DEST_NW; + break; + case ECORE_LL2_TX_DEST_LB: + p_ll2_info->tx_dest = CORE_TX_DEST_LB; + break; + case ECORE_LL2_TX_DEST_DROP: + p_ll2_info->tx_dest = CORE_TX_DEST_DROP; + break; + default: + return ECORE_INVAL; + } + + if ((data->input.conn_type == ECORE_LL2_TYPE_OOO) || + data->input.secondary_queue) + p_ll2_info->main_func_queue = false; + else + p_ll2_info->main_func_queue = true; + + /* Correct maximum number of Tx BDs */ + p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet; + if (*p_tx_max == 0) + *p_tx_max = CORE_LL2_TX_MAX_BDS_PER_PACKET; + else + *p_tx_max = OSAL_MIN_T(u8, *p_tx_max, + CORE_LL2_TX_MAX_BDS_PER_PACKET); + + rc = ecore_ll2_set_cbs(p_ll2_info, data->cbs); + if (rc) { + DP_NOTICE(p_hwfn, false, "Invalid callback functions\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_rx(p_hwfn, p_ll2_info); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire rx connection failed\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_tx(p_hwfn, p_ll2_info); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire tx connection failed\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_ooo(p_hwfn, p_ll2_info, + data->input.mtu); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire ooo connection failed\n"); + goto q_allocate_fail; + } + + /* Register callbacks for the Rx/Tx queues */ + if (data->input.conn_type == ECORE_LL2_TYPE_OOO) { + comp_rx_cb = ecore_ll2_lb_rxq_completion; + comp_tx_cb = ecore_ll2_lb_txq_completion; + + } else { + comp_rx_cb = ecore_ll2_rxq_completion; + comp_tx_cb = ecore_ll2_txq_completion; + } + + if (data->input.rx_num_desc) { + ecore_int_register_cb(p_hwfn, comp_rx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->rx_queue.rx_sb_index, + &p_ll2_info->rx_queue.p_fw_cons); + p_ll2_info->rx_queue.b_cb_registred = true; + } + + if (data->input.tx_num_desc) { + ecore_int_register_cb(p_hwfn, + comp_tx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->tx_queue.tx_sb_index, + &p_ll2_info->tx_queue.p_fw_cons); + p_ll2_info->tx_queue.b_cb_registred = true; + } + + *(data->p_connection_handle) = i; + return rc; + +q_allocate_fail: + ecore_ll2_release_connection(p_hwfn, i); + return ECORE_NOMEM; +} + +static enum _ecore_status_t ecore_ll2_establish_connection_rx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + enum ecore_ll2_error_handle error_input; + enum core_error_handle error_mode; + u8 action_on_error = 0; + + if (!ECORE_LL2_RX_REGISTERED(p_ll2_conn)) + return ECORE_SUCCESS; + + DIRECT_REG_WR(p_hwfn, p_ll2_conn->rx_queue.set_prod_addr, 0x0); + error_input = p_ll2_conn->input.ai_err_packet_too_big; + error_mode = ecore_ll2_get_error_choice(error_input); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, error_mode); + error_input = p_ll2_conn->input.ai_err_no_buf; + error_mode = ecore_ll2_get_error_choice(error_input); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_NO_BUFF, error_mode); + + return ecore_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); +} + +static void +ecore_ll2_establish_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + if (p_ll2_conn->input.conn_type != ECORE_LL2_TYPE_OOO) + return; + + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + ecore_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn); +} + +enum _ecore_status_t ecore_ll2_establish_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct e4_core_conn_context *p_cxt; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_cxt_info cxt_info; + struct ecore_ll2_rx_queue *p_rx; + struct ecore_ll2_tx_queue *p_tx; + struct ecore_ll2_tx_packet *p_pkt; + struct ecore_ptt *p_ptt; + enum _ecore_status_t rc = ECORE_NOTIMPL; + u32 i, capacity; + u32 desc_size; + u8 qid; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_AGAIN; + + p_ll2_conn = ecore_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) { + rc = ECORE_INVAL; + goto out; + } + + p_rx = &p_ll2_conn->rx_queue; + p_tx = &p_ll2_conn->tx_queue; + + ecore_chain_reset(&p_rx->rxq_chain); + ecore_chain_reset(&p_rx->rcq_chain); + OSAL_LIST_INIT(&p_rx->active_descq); + OSAL_LIST_INIT(&p_rx->free_descq); + OSAL_LIST_INIT(&p_rx->posting_descq); + OSAL_SPIN_LOCK_INIT(&p_rx->lock); + capacity = ecore_chain_get_capacity(&p_rx->rxq_chain); + for (i = 0; i < capacity; i++) + OSAL_LIST_PUSH_TAIL(&p_rx->descq_array[i].list_entry, + &p_rx->free_descq); + *p_rx->p_fw_cons = 0; + + ecore_chain_reset(&p_tx->txq_chain); + OSAL_LIST_INIT(&p_tx->active_descq); + OSAL_LIST_INIT(&p_tx->free_descq); + OSAL_LIST_INIT(&p_tx->sending_descq); + OSAL_SPIN_LOCK_INIT(&p_tx->lock); + capacity = ecore_chain_get_capacity(&p_tx->txq_chain); + /* The size of the element in descq_array is flexible */ + desc_size = (sizeof(*p_pkt) + + (p_ll2_conn->input.tx_max_bds_per_packet - 1) * + sizeof(p_pkt->bds_set)); + + for (i = 0; i < capacity; i++) { + p_pkt = (struct ecore_ll2_tx_packet *)((u8 *)p_tx->descq_array + + desc_size*i); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + } + p_tx->cur_completing_bd_idx = 0; + p_tx->bds_idx = 0; + p_tx->b_completing_packet = false; + p_tx->cur_send_packet = OSAL_NULL; + p_tx->cur_send_frag_num = 0; + p_tx->cur_completing_frag_num = 0; + *p_tx->p_fw_cons = 0; + + rc = ecore_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid); + if (rc) + goto out; + cxt_info.iid = p_ll2_conn->cid; + rc = ecore_cxt_get_cid_info(p_hwfn, &cxt_info); + if (rc) { + DP_NOTICE(p_hwfn, true, "Cannot find context info for cid=%d\n", + p_ll2_conn->cid); + goto out; + } + + p_cxt = cxt_info.p_cxt; + + /* @@@TBD we zero the context until we have ilt_reset implemented. */ + OSAL_MEM_ZERO(p_cxt, sizeof(*p_cxt)); + + qid = ecore_ll2_handle_to_queue_id(p_hwfn, connection_handle); + p_ll2_conn->queue_id = qid; + p_ll2_conn->tx_stats_id = qid; + p_rx->set_prod_addr = (u8 OSAL_IOMEM*)p_hwfn->regview + + GTT_BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_RX_PRODS_OFFSET(qid); + p_tx->doorbell_addr = (u8 OSAL_IOMEM*)p_hwfn->doorbells + + DB_ADDR(p_ll2_conn->cid, + DQ_DEMS_LEGACY); + + /* prepare db data */ + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_CMD, + DB_AGG_CMD_SET); + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_VAL_SEL, + DQ_XCM_CORE_TX_BD_PROD_CMD); + p_tx->db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; + + rc = ecore_ll2_establish_connection_rx(p_hwfn, p_ll2_conn); + if (rc) + goto out; + + rc = ecore_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn); + if (rc) + goto out; + + if (!ECORE_IS_RDMA_PERSONALITY(p_hwfn)) + ecore_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1); + + ecore_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE) { + if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, + &p_hwfn->p_dev->mf_bits)) + ecore_llh_add_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8906, 0); + ecore_llh_add_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8914, 0); + } + +out: + ecore_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +static void ecore_ll2_post_rx_buffer_notify_fw(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_rx_queue *p_rx, + struct ecore_ll2_rx_packet *p_curp) +{ + struct ecore_ll2_rx_packet *p_posting_packet = OSAL_NULL; + struct core_ll2_rx_prod rx_prod = {0, 0, 0}; + bool b_notify_fw = false; + u16 bd_prod, cq_prod; + + /* This handles the flushing of already posted buffers */ + while (!OSAL_LIST_IS_EMPTY(&p_rx->posting_descq)) { + p_posting_packet = OSAL_LIST_FIRST_ENTRY(&p_rx->posting_descq, + struct ecore_ll2_rx_packet, + list_entry); +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_posting_packet->list_entry, &p_rx->posting_descq); + OSAL_LIST_PUSH_TAIL(&p_posting_packet->list_entry, &p_rx->active_descq); + b_notify_fw = true; + } + + /* This handles the supplied packet [if there is one] */ + if (p_curp) { + OSAL_LIST_PUSH_TAIL(&p_curp->list_entry, + &p_rx->active_descq); + b_notify_fw = true; + } + + if (!b_notify_fw) + return; + + bd_prod = ecore_chain_get_prod_idx(&p_rx->rxq_chain); + cq_prod = ecore_chain_get_prod_idx(&p_rx->rcq_chain); + rx_prod.bd_prod = OSAL_CPU_TO_LE16(bd_prod); + rx_prod.cqe_prod = OSAL_CPU_TO_LE16(cq_prod); + DIRECT_REG_WR(p_hwfn, p_rx->set_prod_addr, *((u32 *)&rx_prod)); +} + +enum _ecore_status_t ecore_ll2_post_rx_buffer(void *cxt, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, + void *cookie, + u8 notify_fw) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct core_rx_bd_with_buff_len *p_curb = OSAL_NULL; + struct ecore_ll2_rx_packet *p_curp = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_ll2_rx_queue *p_rx; + unsigned long flags; + void *p_data; + enum _ecore_status_t rc = ECORE_SUCCESS; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_rx = &p_ll2_conn->rx_queue; + if (p_rx->set_prod_addr == OSAL_NULL) + return ECORE_IO; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + if (!OSAL_LIST_IS_EMPTY(&p_rx->free_descq)) + p_curp = OSAL_LIST_FIRST_ENTRY(&p_rx->free_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (p_curp) { + if (ecore_chain_get_elem_left(&p_rx->rxq_chain) && + ecore_chain_get_elem_left(&p_rx->rcq_chain)) { + p_data = ecore_chain_produce(&p_rx->rxq_chain); + p_curb = (struct core_rx_bd_with_buff_len *)p_data; + ecore_chain_produce(&p_rx->rcq_chain); + } + } + + /* If we're lacking entires, let's try to flush buffers to FW */ + if (!p_curp || !p_curb) { + rc = ECORE_BUSY; + p_curp = OSAL_NULL; + goto out_notify; + } + + /* We have an Rx packet we can fill */ + DMA_REGPAIR_LE(p_curb->addr, addr); + p_curb->buff_length = OSAL_CPU_TO_LE16(buf_len); + p_curp->rx_buf_addr = addr; + p_curp->cookie = cookie; + p_curp->rxq_bd = p_curb; + p_curp->buf_length = buf_len; + OSAL_LIST_REMOVE_ENTRY(&p_curp->list_entry, + &p_rx->free_descq); + + /* Check if we only want to enqueue this packet without informing FW */ + if (!notify_fw) { + OSAL_LIST_PUSH_TAIL(&p_curp->list_entry, + &p_rx->posting_descq); + goto out; + } + +out_notify: + ecore_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp); +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + return rc; +} + +static void ecore_ll2_prepare_tx_packet_set(struct ecore_ll2_tx_queue *p_tx, + struct ecore_ll2_tx_packet *p_curp, + struct ecore_ll2_tx_pkt_info *pkt, + u8 notify_fw) +{ + OSAL_LIST_REMOVE_ENTRY(&p_curp->list_entry, + &p_tx->free_descq); + p_curp->cookie = pkt->cookie; + p_curp->bd_used = pkt->num_of_bds; + p_curp->notify_fw = notify_fw; + p_tx->cur_send_packet = p_curp; + p_tx->cur_send_frag_num = 0; + + p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = pkt->first_frag; + p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = pkt->first_frag_len; + p_tx->cur_send_frag_num++; +} + +static void ecore_ll2_prepare_tx_packet_set_bd( + struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2, + struct ecore_ll2_tx_packet *p_curp, + struct ecore_ll2_tx_pkt_info *pkt) +{ + struct ecore_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain; + u16 prod_idx = ecore_chain_get_prod_idx(p_tx_chain); + struct core_tx_bd *start_bd = OSAL_NULL; + enum core_roce_flavor_type roce_flavor; + enum core_tx_dest tx_dest; + u16 bd_data = 0, frag_idx; + + roce_flavor = (pkt->ecore_roce_flavor == ECORE_LL2_ROCE) ? + CORE_ROCE : CORE_RROCE; + + switch (pkt->tx_dest) { + case ECORE_LL2_TX_DEST_NW: + tx_dest = CORE_TX_DEST_NW; + break; + case ECORE_LL2_TX_DEST_LB: + tx_dest = CORE_TX_DEST_LB; + break; + case ECORE_LL2_TX_DEST_DROP: + tx_dest = CORE_TX_DEST_DROP; + break; + default: + tx_dest = CORE_TX_DEST_LB; + break; + } + + start_bd = (struct core_tx_bd*)ecore_chain_produce(p_tx_chain); + + if (ECORE_IS_IWARP_PERSONALITY(p_hwfn) && + (p_ll2->input.conn_type == ECORE_LL2_TYPE_OOO)) { + start_bd->nw_vlan_or_lb_echo = + OSAL_CPU_TO_LE16(IWARP_LL2_IN_ORDER_TX_QUEUE); + } else { + start_bd->nw_vlan_or_lb_echo = OSAL_CPU_TO_LE16(pkt->vlan); + if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) && + (p_ll2->input.conn_type == ECORE_LL2_TYPE_FCOE)) + pkt->remove_stag = true; + } + + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, + OSAL_CPU_TO_LE16(pkt->l4_hdr_offset_w)); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); + bd_data |= pkt->bd_flags; + SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1); + SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds); + SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_DISABLE_STAG_INSERTION, + !!(pkt->remove_stag)); + + start_bd->bd_data.as_bitfield = OSAL_CPU_TO_LE16(bd_data); + DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag); + start_bd->nbytes = OSAL_CPU_TO_LE16(pkt->first_frag_len); + + DP_VERBOSE(p_hwfn, (ECORE_MSG_TX_QUEUED | ECORE_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", + p_ll2->queue_id, p_ll2->cid, p_ll2->input.conn_type, + prod_idx, pkt->first_frag_len, pkt->num_of_bds, + OSAL_LE32_TO_CPU(start_bd->addr.hi), + OSAL_LE32_TO_CPU(start_bd->addr.lo)); + + if (p_ll2->tx_queue.cur_send_frag_num == pkt->num_of_bds) + return; + + /* Need to provide the packet with additional BDs for frags */ + for (frag_idx = p_ll2->tx_queue.cur_send_frag_num; + frag_idx < pkt->num_of_bds; frag_idx++) { + struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd; + + *p_bd = (struct core_tx_bd *)ecore_chain_produce(p_tx_chain); + (*p_bd)->bd_data.as_bitfield = 0; + (*p_bd)->bitfield1 = 0; + p_curp->bds_set[frag_idx].tx_frag = 0; + p_curp->bds_set[frag_idx].frag_len = 0; + } +} + +/* This should be called while the Txq spinlock is being held */ +static void ecore_ll2_tx_packet_notify(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + u16 bd_prod; + + /* If there are missing BDs, don't do anything now */ + if (p_ll2_conn->tx_queue.cur_send_frag_num != + p_ll2_conn->tx_queue.cur_send_packet->bd_used) + return; + + + /* Push the current packet to the list and clean after it */ + OSAL_LIST_PUSH_TAIL(&p_ll2_conn->tx_queue.cur_send_packet->list_entry, + &p_ll2_conn->tx_queue.sending_descq); + p_ll2_conn->tx_queue.cur_send_packet = OSAL_NULL; + p_ll2_conn->tx_queue.cur_send_frag_num = 0; + + /* Notify FW of packet only if requested to */ + if (!b_notify) + return; + + bd_prod = ecore_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain); + + while (!OSAL_LIST_IS_EMPTY(&p_tx->sending_descq)) { + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->sending_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (p_pkt == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->sending_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, &p_tx->active_descq); + } + + p_tx->db_msg.spq_prod = OSAL_CPU_TO_LE16(bd_prod); + + /* Make sure the BDs data is updated before ringing the doorbell */ + OSAL_WMB(p_hwfn->p_dev); + + //DIRECT_REG_WR(p_hwfn, p_tx->doorbell_addr, *((u32 *)&p_tx->db_msg)); + DIRECT_REG_WR_DB(p_hwfn, p_tx->doorbell_addr, *((u32 *)&p_tx->db_msg)); + + DP_VERBOSE(p_hwfn, (ECORE_MSG_TX_QUEUED | ECORE_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", + p_ll2_conn->queue_id, p_ll2_conn->cid, + p_ll2_conn->input.conn_type, + p_tx->db_msg.spq_prod); +} + +enum _ecore_status_t ecore_ll2_prepare_tx_packet( + void *cxt, + u8 connection_handle, + struct ecore_ll2_tx_pkt_info *pkt, + bool notify_fw) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_tx_packet *p_curp = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ll2_tx_queue *p_tx; + struct ecore_chain *p_tx_chain; + unsigned long flags; + enum _ecore_status_t rc = ECORE_SUCCESS; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_tx = &p_ll2_conn->tx_queue; + p_tx_chain = &p_tx->txq_chain; + + if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet) + return ECORE_IO; /* coalescing is requireed */ + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + if (p_tx->cur_send_packet) { + rc = ECORE_EXISTS; + goto out; + } + + /* Get entry, but only if we have tx elements for it */ + if (!OSAL_LIST_IS_EMPTY(&p_tx->free_descq)) + p_curp = OSAL_LIST_FIRST_ENTRY(&p_tx->free_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (p_curp && ecore_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds) + p_curp = OSAL_NULL; + + if (!p_curp) { + rc = ECORE_BUSY; + goto out; + } + + /* Prepare packet and BD, and perhaps send a doorbell to FW */ + ecore_ll2_prepare_tx_packet_set(p_tx, p_curp, pkt, notify_fw); + + ecore_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, + pkt); + + ecore_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + return rc; +} + +enum _ecore_status_t ecore_ll2_set_fragment_of_tx_packet(void *cxt, + u8 connection_handle, + dma_addr_t addr, + u16 nbytes) +{ + struct ecore_ll2_tx_packet *p_cur_send_packet = OSAL_NULL; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + u16 cur_send_frag_num = 0; + struct core_tx_bd *p_bd; + unsigned long flags; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + + if (!p_ll2_conn->tx_queue.cur_send_packet) + return ECORE_INVAL; + + p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; + cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; + + if (cur_send_frag_num >= p_cur_send_packet->bd_used) + return ECORE_INVAL; + + /* Fill the BD information, and possibly notify FW */ + p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd; + DMA_REGPAIR_LE(p_bd->addr, addr); + p_bd->nbytes = OSAL_CPU_TO_LE16(nbytes); + p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr; + p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes; + + p_ll2_conn->tx_queue.cur_send_frag_num++; + + OSAL_SPIN_LOCK_IRQSAVE(&p_ll2_conn->tx_queue.lock, flags); + ecore_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_ll2_conn->tx_queue.lock, flags); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_ll2_terminate_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + enum _ecore_status_t rc = ECORE_NOTIMPL; + struct ecore_ptt *p_ptt; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_AGAIN; + + p_ll2_conn = ecore_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) { + rc = ECORE_INVAL; + goto out; + } + + /* Stop Tx & Rx of connection, if needed */ + if (ECORE_LL2_TX_REGISTERED(p_ll2_conn)) { + rc = ecore_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc != ECORE_SUCCESS) + goto out; + ecore_ll2_txq_flush(p_hwfn, connection_handle); + } + + if (ECORE_LL2_RX_REGISTERED(p_ll2_conn)) { + rc = ecore_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + goto out; + ecore_ll2_rxq_flush(p_hwfn, connection_handle); + } + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE) { + if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, + &p_hwfn->p_dev->mf_bits)) + ecore_llh_remove_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8906, 0); + ecore_llh_remove_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8914, 0); + } + +out: + ecore_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +static void ecore_ll2_release_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ooo_buffer *p_buffer; + + if (p_ll2_conn->input.conn_type != ECORE_LL2_TYPE_OOO) + return; + + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + while ((p_buffer = ecore_ooo_get_free_buffer(p_hwfn->p_ooo_info))) { + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + p_buffer->rx_buffer_virt_addr, + p_buffer->rx_buffer_phys_addr, + p_buffer->rx_buffer_size); + OSAL_FREE(p_hwfn->p_dev, p_buffer); + } +} + +void ecore_ll2_release_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + + if (ECORE_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + ecore_int_unregister_cb(p_hwfn, + p_ll2_conn->rx_queue.rx_sb_index); + } + + if (ECORE_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + ecore_int_unregister_cb(p_hwfn, + p_ll2_conn->tx_queue.tx_sb_index); + } + + OSAL_FREE(p_hwfn->p_dev, p_ll2_conn->tx_queue.descq_array); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->tx_queue.txq_chain); + + OSAL_FREE(p_hwfn->p_dev, p_ll2_conn->rx_queue.descq_array); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->rx_queue.rxq_chain); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->rx_queue.rcq_chain); + + ecore_cxt_release_cid(p_hwfn, p_ll2_conn->cid); + + ecore_ll2_release_connection_ooo(p_hwfn, p_ll2_conn); + + OSAL_MUTEX_ACQUIRE(&p_ll2_conn->mutex); + p_ll2_conn->b_active = false; + OSAL_MUTEX_RELEASE(&p_ll2_conn->mutex); +} + +/* ECORE LL2: internal functions */ + +enum _ecore_status_t ecore_ll2_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_ll2_info *p_ll2_info; + u8 i; + + /* Allocate LL2's set struct */ + p_ll2_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ll2_info) * + ECORE_MAX_NUM_OF_LL2_CONNECTIONS); + if (!p_ll2_info) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate `struct ecore_ll2'\n"); + return ECORE_NOMEM; + } + + p_hwfn->p_ll2_info = p_ll2_info; + + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) { +#ifdef CONFIG_ECORE_LOCK_ALLOC + if (OSAL_MUTEX_ALLOC(p_hwfn, &p_ll2_info[i].mutex)) + goto handle_err; + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_ll2_info[i].rx_queue.lock)) + goto handle_err; + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_ll2_info[i].tx_queue.lock)) + goto handle_err; +#endif + p_ll2_info[i].my_id = i; + } + + return ECORE_SUCCESS; +#ifdef CONFIG_ECORE_LOCK_ALLOC +handle_err: + ecore_ll2_free(p_hwfn); + return ECORE_NOMEM; +#endif +} + +void ecore_ll2_setup(struct ecore_hwfn *p_hwfn) +{ + int i; + + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) + OSAL_MUTEX_INIT(&p_hwfn->p_ll2_info[i].mutex); +} + +void ecore_ll2_free(struct ecore_hwfn *p_hwfn) +{ +#ifdef CONFIG_ECORE_LOCK_ALLOC + int i; +#endif + if (!p_hwfn->p_ll2_info) + return; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) { + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_ll2_info[i].rx_queue.lock); + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_ll2_info[i].tx_queue.lock); + OSAL_MUTEX_DEALLOC(&p_hwfn->p_ll2_info[i].mutex); + } +#endif + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_ll2_info); + p_hwfn->p_ll2_info = OSAL_NULL; +} + +static void _ecore_ll2_get_port_stats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_port_stats port_stats; + + OSAL_MEMSET(&port_stats, 0, sizeof(port_stats)); + ecore_memcpy_from(p_hwfn, p_ptt, &port_stats, + BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)), + sizeof(port_stats)); + + p_stats->gsi_invalid_hdr += + HILO_64_REGPAIR(port_stats.gsi_invalid_hdr); + p_stats->gsi_invalid_pkt_length += + HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length); + p_stats->gsi_unsupported_pkt_typ += + HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ); + p_stats->gsi_crcchksm_error += + HILO_64_REGPAIR(port_stats.gsi_crcchksm_error); +} + +static void _ecore_ll2_get_tstats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_tstorm_per_queue_stat tstats; + u8 qid = p_ll2_conn->queue_id; + u32 tstats_addr; + + OSAL_MEMSET(&tstats, 0, sizeof(tstats)); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); + ecore_memcpy_from(p_hwfn, p_ptt, &tstats, + tstats_addr, + sizeof(tstats)); + + p_stats->packet_too_big_discard += + HILO_64_REGPAIR(tstats.packet_too_big_discard); + p_stats->no_buff_discard += + HILO_64_REGPAIR(tstats.no_buff_discard); +} + +static void _ecore_ll2_get_ustats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_ustorm_per_queue_stat ustats; + u8 qid = p_ll2_conn->queue_id; + u32 ustats_addr; + + OSAL_MEMSET(&ustats, 0, sizeof(ustats)); + ustats_addr = BAR0_MAP_REG_USDM_RAM + + CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); + ecore_memcpy_from(p_hwfn, p_ptt, &ustats, + ustats_addr, + sizeof(ustats)); + + p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts); +} + +static void _ecore_ll2_get_pstats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_pstorm_per_queue_stat pstats; + u8 stats_id = p_ll2_conn->tx_stats_id; + u32 pstats_addr; + + OSAL_MEMSET(&pstats, 0, sizeof(pstats)); + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); + ecore_memcpy_from(p_hwfn, p_ptt, &pstats, + pstats_addr, + sizeof(pstats)); + + p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts); +} + +enum _ecore_status_t __ecore_ll2_get_stats(void *cxt, + u8 connection_handle, + struct ecore_ll2_stats *p_stats) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ptt *p_ptt; + + if ((connection_handle >= ECORE_MAX_NUM_OF_LL2_CONNECTIONS) || + !p_hwfn->p_ll2_info) { + return ECORE_INVAL; + } + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "Failed to acquire ptt\n"); + return ECORE_INVAL; + } + + if (p_ll2_conn->input.gsi_enable) + _ecore_ll2_get_port_stats(p_hwfn, p_ptt, p_stats); + + _ecore_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + _ecore_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + if (p_ll2_conn->tx_stats_en) + _ecore_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + ecore_ptt_release(p_hwfn, p_ptt); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_ll2_get_stats(void *cxt, + u8 connection_handle, + struct ecore_ll2_stats *p_stats) +{ + OSAL_MEMSET(p_stats, 0, sizeof(*p_stats)); + + return __ecore_ll2_get_stats(cxt, connection_handle, p_stats); +} + +/**/ + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/ecore_ooo.c b/sys/dev/qlnx/qlnxe/ecore_ooo.c new file mode 100644 index 000000000000..50359b6e20c6 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_ooo.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_ooo.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" + +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#include "ecore_iscsi.h" +#include "ecore_cxt.h" +/* + * Static OOO functions + */ + +static struct ecore_ooo_archipelago * +ecore_ooo_seek_archipelago(struct ecore_ooo_info *p_ooo_info, u32 cid) +{ + u32 idx = (cid & 0xffff) - p_ooo_info->cid_base; + struct ecore_ooo_archipelago *p_archipelago; + + if (idx >= p_ooo_info->max_num_archipelagos) + return OSAL_NULL; + + p_archipelago = &p_ooo_info->p_archipelagos_mem[idx]; + + if (OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) + return OSAL_NULL; + + return p_archipelago; +} + +static struct ecore_ooo_isle *ecore_ooo_seek_isle(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 isle) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + u8 the_num_of_isle = 1; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago) { + DP_NOTICE(p_hwfn, true, + "Connection %d is not found in OOO list\n", cid); + return OSAL_NULL; + } + + OSAL_LIST_FOR_EACH_ENTRY(p_isle, + &p_archipelago->isles_list, + list_entry, struct ecore_ooo_isle) { + if (the_num_of_isle == isle) + return p_isle; + the_num_of_isle++; + } + + return OSAL_NULL; +} + +void ecore_ooo_save_history_entry(struct ecore_ooo_info *p_ooo_info, + struct ooo_opaque *p_cqe) +{ + struct ecore_ooo_history *p_history = &p_ooo_info->ooo_history; + + if (p_history->head_idx == p_history->num_of_cqes) + p_history->head_idx = 0; + p_history->p_cqes[p_history->head_idx] = *p_cqe; + p_history->head_idx++; +} + +//#ifdef CONFIG_ECORE_ISCSI +#if defined(CONFIG_ECORE_ISCSI) || defined(CONFIG_ECORE_IWARP) +enum _ecore_status_t ecore_ooo_alloc(struct ecore_hwfn *p_hwfn) +{ + u16 max_num_archipelagos = 0, cid_base; + struct ecore_ooo_info *p_ooo_info; + u16 max_num_isles = 0; + u32 i; + + switch (p_hwfn->hw_info.personality) { + case ECORE_PCI_ISCSI: + max_num_archipelagos = + p_hwfn->pf_params.iscsi_pf_params.num_cons; + cid_base =(u16)ecore_cxt_get_proto_cid_start(p_hwfn, + PROTOCOLID_ISCSI); + break; + case ECORE_PCI_ETH_RDMA: + case ECORE_PCI_ETH_IWARP: + max_num_archipelagos = + (u16)ecore_cxt_get_proto_cid_count(p_hwfn, + PROTOCOLID_IWARP, + OSAL_NULL); + cid_base = (u16)ecore_cxt_get_proto_cid_start(p_hwfn, + PROTOCOLID_IWARP); + break; + default: + DP_NOTICE(p_hwfn, true, + "Failed to allocate ecore_ooo_info: unknown personalization\n"); + return ECORE_INVAL; + } + + max_num_isles = ECORE_MAX_NUM_ISLES + max_num_archipelagos; + + if (!max_num_archipelagos) { + DP_NOTICE(p_hwfn, true, + "Failed to allocate ecore_ooo_info: unknown amount of connections\n"); + return ECORE_INVAL; + } + + p_ooo_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*p_ooo_info)); + if (!p_ooo_info) { + DP_NOTICE(p_hwfn, true, "Failed to allocate ecore_ooo_info\n"); + return ECORE_NOMEM; + } + p_ooo_info->cid_base = cid_base; /* We look only at the icid */ + p_ooo_info->max_num_archipelagos = max_num_archipelagos; + + OSAL_LIST_INIT(&p_ooo_info->free_buffers_list); + OSAL_LIST_INIT(&p_ooo_info->ready_buffers_list); + OSAL_LIST_INIT(&p_ooo_info->free_isles_list); + + p_ooo_info->p_isles_mem = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ooo_isle) * + max_num_isles); + if (!p_ooo_info->p_isles_mem) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info (isles)\n"); + goto no_isles_mem; + } + + for (i = 0; i < max_num_isles; i++) { + OSAL_LIST_INIT(&p_ooo_info->p_isles_mem[i].buffers_list); + OSAL_LIST_PUSH_TAIL(&p_ooo_info->p_isles_mem[i].list_entry, + &p_ooo_info->free_isles_list); + } + + p_ooo_info->p_archipelagos_mem = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ooo_archipelago) * + max_num_archipelagos); + if (!p_ooo_info->p_archipelagos_mem) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info(archpelagos)\n"); + goto no_archipelagos_mem; + } + + for (i = 0; i < max_num_archipelagos; i++) { + OSAL_LIST_INIT(&p_ooo_info->p_archipelagos_mem[i].isles_list); + } + + p_ooo_info->ooo_history.p_cqes = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ooo_opaque) * + ECORE_MAX_NUM_OOO_HISTORY_ENTRIES); + if (!p_ooo_info->ooo_history.p_cqes) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info(history)\n"); + goto no_history_mem; + } + p_ooo_info->ooo_history.num_of_cqes = + ECORE_MAX_NUM_OOO_HISTORY_ENTRIES; + + p_hwfn->p_ooo_info = p_ooo_info; + return ECORE_SUCCESS; + +no_history_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_archipelagos_mem); +no_archipelagos_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_isles_mem); +no_isles_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info); + return ECORE_NOMEM; +} +#endif + +void ecore_ooo_release_connection_isles(struct ecore_ooo_info *p_ooo_info, + u32 cid) +{ + struct ecore_ooo_archipelago *p_archipelago; + struct ecore_ooo_buffer *p_buffer; + struct ecore_ooo_isle *p_isle; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago) + return; + + while (!OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) { + p_isle = OSAL_LIST_FIRST_ENTRY( + &p_archipelago->isles_list, + struct ecore_ooo_isle, list_entry); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + + while (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_isle->buffers_list , + struct ecore_ooo_buffer, list_entry); + + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_isle->buffers_list); + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } + +} + +void ecore_ooo_release_all_isles(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_archipelago *p_archipelago; + struct ecore_ooo_buffer *p_buffer; + struct ecore_ooo_isle *p_isle; + u32 i; + + for (i = 0; i < p_ooo_info->max_num_archipelagos; i++) { + p_archipelago = &(p_ooo_info->p_archipelagos_mem[i]); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + while (!OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) { + p_isle = OSAL_LIST_FIRST_ENTRY( + &p_archipelago->isles_list, + struct ecore_ooo_isle, list_entry); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + + while (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_isle->buffers_list , + struct ecore_ooo_buffer, list_entry); + + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_isle->buffers_list); + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } + } + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->ready_buffers_list)) { + OSAL_LIST_SPLICE_TAIL_INIT(&p_ooo_info->ready_buffers_list, + &p_ooo_info->free_buffers_list); + } +} + +//#ifdef CONFIG_ECORE_ISCSI +#if defined(CONFIG_ECORE_ISCSI) || defined(CONFIG_ECORE_IWARP) +void ecore_ooo_setup(struct ecore_hwfn *p_hwfn) +{ + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + OSAL_MEM_ZERO(p_hwfn->p_ooo_info->ooo_history.p_cqes, + p_hwfn->p_ooo_info->ooo_history.num_of_cqes * + sizeof(struct ooo_opaque)); + p_hwfn->p_ooo_info->ooo_history.head_idx = 0; +} + +void ecore_ooo_free(struct ecore_hwfn *p_hwfn) +{ + struct ecore_ooo_info *p_ooo_info = p_hwfn->p_ooo_info; + struct ecore_ooo_buffer *p_buffer; + + if (!p_ooo_info) + return; + + ecore_ooo_release_all_isles(p_ooo_info); + while (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_buffers_list)) { + p_buffer = OSAL_LIST_FIRST_ENTRY(&p_ooo_info-> + free_buffers_list, + struct ecore_ooo_buffer, + list_entry); + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + p_buffer->rx_buffer_virt_addr, + p_buffer->rx_buffer_phys_addr, + p_buffer->rx_buffer_size); + OSAL_FREE(p_hwfn->p_dev, p_buffer); + } + + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_isles_mem); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_archipelagos_mem); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->ooo_history.p_cqes); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info); + p_hwfn->p_ooo_info = OSAL_NULL; +} +#endif + +void ecore_ooo_put_free_buffer(struct ecore_ooo_info *p_ooo_info, + struct ecore_ooo_buffer *p_buffer) +{ + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); +} + +struct ecore_ooo_buffer * +ecore_ooo_get_free_buffer(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_buffer *p_buffer = OSAL_NULL; + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->free_buffers_list, + struct ecore_ooo_buffer, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + + return p_buffer; +} + +void ecore_ooo_put_ready_buffer(struct ecore_ooo_info *p_ooo_info, + struct ecore_ooo_buffer *p_buffer, u8 on_tail) +{ + if (on_tail) { + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } else { + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } +} + +struct ecore_ooo_buffer * +ecore_ooo_get_ready_buffer(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_buffer *p_buffer = OSAL_NULL; + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->ready_buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->ready_buffers_list, + struct ecore_ooo_buffer, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } + + return p_buffer; +} + +void ecore_ooo_delete_isles(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, + u8 drop_isle, + u8 drop_size) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + u8 isle_idx; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + for (isle_idx = 0; isle_idx < drop_size; isle_idx++) { + p_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, + cid, drop_isle); + if (!p_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + drop_isle, cid); + return; + } + if (OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + DP_NOTICE(p_hwfn, true, + "Isle %d is empty(cid %d)\n", + drop_isle, cid); + } else { + OSAL_LIST_SPLICE_TAIL_INIT(&p_isle->buffers_list, + &p_ooo_info->free_buffers_list); + } +#if defined(_NTDDK_) +#pragma warning(suppress : 6011) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + p_ooo_info->cur_isles_number--; + OSAL_LIST_PUSH_HEAD(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } +} + +void ecore_ooo_add_new_isle(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 ooo_isle, + struct ecore_ooo_buffer *p_buffer) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_prev_isle = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + + if (ooo_isle > 1) { + p_prev_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle - 1); + if (!p_prev_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + ooo_isle - 1, cid); + return; + } + } + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago && (ooo_isle != 1)) { + DP_NOTICE(p_hwfn, true, + "Connection %d is not found in OOO list\n", cid); + return; + } + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_isles_list)) { + p_isle = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->free_isles_list, + struct ecore_ooo_isle, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + if (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + DP_NOTICE(p_hwfn, true, "Free isle is not empty\n"); + OSAL_LIST_INIT(&p_isle->buffers_list); + } + } else { + DP_NOTICE(p_hwfn, true, "No more free isles\n"); + return; + } + + if (!p_archipelago) { + u32 idx = (cid & 0xffff) - p_ooo_info->cid_base; + + p_archipelago = &p_ooo_info->p_archipelagos_mem[idx]; + } + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, &p_isle->buffers_list); + p_ooo_info->cur_isles_number++; + p_ooo_info->gen_isles_number++; + if (p_ooo_info->cur_isles_number > p_ooo_info->max_isles_number) + p_ooo_info->max_isles_number = p_ooo_info->cur_isles_number; + if (!p_prev_isle) { + OSAL_LIST_PUSH_HEAD(&p_isle->list_entry, &p_archipelago->isles_list); + } else { + OSAL_LIST_INSERT_ENTRY_AFTER(&p_isle->list_entry, + &p_prev_isle->list_entry, + &p_archipelago->isles_list); + } +} + +void ecore_ooo_add_new_buffer(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, + u8 ooo_isle, + struct ecore_ooo_buffer *p_buffer, + u8 buffer_side) +{ + struct ecore_ooo_isle * p_isle = OSAL_NULL; + p_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle); + if (!p_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + ooo_isle, cid); + return; + } + if (buffer_side == ECORE_OOO_LEFT_BUF) { + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, + &p_isle->buffers_list); + } else { + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_isle->buffers_list); + } +} + +void ecore_ooo_join_isles(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 left_isle) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_right_isle = OSAL_NULL; + struct ecore_ooo_isle *p_left_isle = OSAL_NULL; + + p_right_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, + left_isle + 1); + if (!p_right_isle) { + DP_NOTICE(p_hwfn, true, + "Right isle %d is not found(cid %d)\n", + left_isle + 1, cid); + return; + } + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + OSAL_LIST_REMOVE_ENTRY(&p_right_isle->list_entry, + &p_archipelago->isles_list); + p_ooo_info->cur_isles_number--; + if (left_isle) { + p_left_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, + left_isle); + if (!p_left_isle) { + DP_NOTICE(p_hwfn, true, + "Left isle %d is not found(cid %d)\n", + left_isle, cid); + return; + } + OSAL_LIST_SPLICE_TAIL_INIT(&p_right_isle->buffers_list, + &p_left_isle->buffers_list); + } else { + OSAL_LIST_SPLICE_TAIL_INIT(&p_right_isle->buffers_list, + &p_ooo_info->ready_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_right_isle->list_entry, + &p_ooo_info->free_isles_list); +} + +void ecore_ooo_dump_rx_event(struct ecore_hwfn *p_hwfn, + struct ooo_opaque *iscsi_ooo, + struct ecore_ooo_buffer *p_buffer) +{ + int i; + u32 dp_module = ECORE_MSG_OOO; + u32 ph_hi, ph_lo; + u8 *packet_buffer = 0; + + if (p_hwfn->dp_level > ECORE_LEVEL_VERBOSE) + return; + if (!(p_hwfn->dp_module & dp_module)) + return; + + packet_buffer = (u8 *)p_buffer->rx_buffer_virt_addr + + p_buffer->placement_offset; + DP_VERBOSE(p_hwfn, dp_module, + "******************************************************\n"); + ph_hi = DMA_HI(p_buffer->rx_buffer_phys_addr); + ph_lo = DMA_LO(p_buffer->rx_buffer_phys_addr); + DP_VERBOSE(p_hwfn, dp_module, + "0x%x-%x: CID 0x%x, OP 0x%x, ISLE 0x%x\n", + ph_hi, ph_lo, + iscsi_ooo->cid, iscsi_ooo->ooo_opcode, iscsi_ooo->ooo_isle); + for (i = 0; i < 64; i = i + 8) { + DP_VERBOSE(p_hwfn, dp_module, + "0x%x-%x: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + ph_hi, ph_lo, + packet_buffer[i], + packet_buffer[i + 1], + packet_buffer[i + 2], + packet_buffer[i + 3], + packet_buffer[i + 4], + packet_buffer[i + 5], + packet_buffer[i + 6], + packet_buffer[i + 7]); + } +} diff --git a/sys/dev/qlnx/qlnxe/ecore_rdma.c b/sys/dev/qlnx/qlnxe/ecore_rdma.c new file mode 100644 index 000000000000..eb23aeb5cbfe --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_rdma.c @@ -0,0 +1,2697 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_rdma.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_int.h" +#include "pcics_reg_driver.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore_hsi_iwarp.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +enum _ecore_status_t ecore_rdma_bmap_alloc(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 max_count, + char *name) +{ + u32 size_in_bytes; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "max_count = %08x\n", max_count); + + bmap->max_count = max_count; + + if (!max_count) { + bmap->bitmap = OSAL_NULL; + return ECORE_SUCCESS; + } + + size_in_bytes = sizeof(unsigned long) * + DIV_ROUND_UP(max_count, (sizeof(unsigned long) * 8)); + + bmap->bitmap = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, size_in_bytes); + if (!bmap->bitmap) + { + DP_NOTICE(p_hwfn, false, + "ecore bmap alloc failed: cannot allocate memory (bitmap). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + + OSAL_SNPRINTF(bmap->name, QEDR_MAX_BMAP_NAME, "%s", name); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_SUCCESS\n"); + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_rdma_bmap_alloc_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 *id_num) +{ + *id_num = OSAL_FIND_FIRST_ZERO_BIT(bmap->bitmap, bmap->max_count); + if (*id_num >= bmap->max_count) + return ECORE_INVAL; + + OSAL_SET_BIT(*id_num, bmap->bitmap); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: allocated id %d\n", + bmap->name, *id_num); + + return ECORE_SUCCESS; +} + +void ecore_bmap_set_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + if (id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, true, + "%s bitmap: cannot set id %d max is %d\n", + bmap->name, id_num, bmap->max_count); + + return; + } + + OSAL_SET_BIT(id_num, bmap->bitmap); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: set id %d\n", + bmap->name, id_num); +} + +void ecore_bmap_release_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + bool b_acquired; + + if (id_num >= bmap->max_count) + return; + + b_acquired = OSAL_TEST_AND_CLEAR_BIT(id_num, bmap->bitmap); + if (!b_acquired) + { + DP_NOTICE(p_hwfn, false, "%s bitmap: id %d already released\n", + bmap->name, id_num); + return; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: released id %d\n", + bmap->name, id_num); +} + +int ecore_bmap_test_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + if (id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, true, + "%s bitmap: id %d too high. max is %d\n", + bmap->name, id_num, bmap->max_count); + return -1; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: tested id %d\n", + bmap->name, id_num); + + return OSAL_TEST_BIT(id_num, bmap->bitmap); +} + +static bool ecore_bmap_is_empty(struct ecore_bmap *bmap) +{ + return (bmap->max_count == + OSAL_FIND_FIRST_BIT(bmap->bitmap, bmap->max_count)); +} + +#ifndef LINUX_REMOVE +u32 ecore_rdma_get_sb_id(struct ecore_hwfn *p_hwfn, u32 rel_sb_id) +{ + /* first sb id for RoCE is after all the l2 sb */ + return FEAT_NUM(p_hwfn, ECORE_PF_L2_QUE) + rel_sb_id; +} + +u32 ecore_rdma_query_cau_timer_res(void) +{ + return ECORE_CAU_DEF_RX_TIMER_RES; +} +#endif + +enum _ecore_status_t ecore_rdma_info_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_info *p_rdma_info; + + p_rdma_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info)); + if (!p_rdma_info) { + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info).\n"); + return ECORE_NOMEM; + } + p_hwfn->p_rdma_info = p_rdma_info; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_rdma_info->lock)) { + ecore_rdma_info_free(p_hwfn); + return ECORE_NOMEM; + } +#endif + OSAL_SPIN_LOCK_INIT(&p_rdma_info->lock); + + return ECORE_SUCCESS; +} + +void ecore_rdma_info_free(struct ecore_hwfn *p_hwfn) +{ +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_rdma_info->lock); +#endif + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info); + p_hwfn->p_rdma_info = OSAL_NULL; +} + +static enum _ecore_status_t ecore_rdma_inc_ref_cnt(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc = ECORE_INVAL; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + if (p_hwfn->p_rdma_info->active) { + p_hwfn->p_rdma_info->ref_cnt++; + rc = ECORE_SUCCESS; + } else { + DP_INFO(p_hwfn, "Ref cnt requested for inactive rdma\n"); + } + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + return rc; +} + +static void ecore_rdma_dec_ref_cnt(struct ecore_hwfn *p_hwfn) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->ref_cnt--; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static void ecore_rdma_activate(struct ecore_hwfn *p_hwfn) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->active = true; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +/* Part of deactivating rdma is letting all the relevant flows complete before + * we start shutting down: Currently query-stats which can be called from MCP + * context. + */ +/* The longest time it can take a rdma flow to complete */ +#define ECORE_RDMA_MAX_FLOW_TIME (100) +static enum _ecore_status_t ecore_rdma_deactivate(struct ecore_hwfn *p_hwfn) +{ + int wait_count; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->active = false; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + /* We'll give each flow it's time to complete... */ + wait_count = p_hwfn->p_rdma_info->ref_cnt; + + while (p_hwfn->p_rdma_info->ref_cnt) { + OSAL_MSLEEP(ECORE_RDMA_MAX_FLOW_TIME); + if (--wait_count == 0) { + DP_NOTICE(p_hwfn, false, + "Timeout on refcnt=%d\n", + p_hwfn->p_rdma_info->ref_cnt); + return ECORE_TIMEOUT; + } + } + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_rdma_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 num_cons, num_tasks; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocating RDMA\n"); + + if (!p_rdma_info) + return ECORE_INVAL; + + if (p_hwfn->hw_info.personality == ECORE_PCI_ETH_IWARP) + p_rdma_info->proto = PROTOCOLID_IWARP; + else + p_rdma_info->proto = PROTOCOLID_ROCE; + + num_cons = ecore_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, + OSAL_NULL); + + if (IS_IWARP(p_hwfn)) + p_rdma_info->num_qps = num_cons; + else + p_rdma_info->num_qps = num_cons / 2; + + /* INTERNAL: RoCE & iWARP use the same taskid */ + num_tasks = ecore_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); + + /* Each MR uses a single task */ + p_rdma_info->num_mrs = num_tasks; + + /* Queue zone lines are shared between RoCE and L2 in such a way that + * they can be used by each without obstructing the other. + */ + p_rdma_info->queue_zone_base = (u16) RESC_START(p_hwfn, ECORE_L2_QUEUE); + p_rdma_info->max_queue_zones = (u16) RESC_NUM(p_hwfn, ECORE_L2_QUEUE); + + /* Allocate a struct with device params and fill it */ + p_rdma_info->dev = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info->dev)); + if (!p_rdma_info->dev) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n", + rc); + return rc; + } + + /* Allocate a struct with port params and fill it */ + p_rdma_info->port = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info->port)); + if (!p_rdma_info->port) + { + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info port)\n"); + return ECORE_NOMEM; + } + + /* Allocate bit map for pd's */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS, + "PD"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate pd_map,rc = %d\n", + rc); + return rc; + } + + /* Allocate bit map for XRC Domains */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrcd_map, + ECORE_RDMA_MAX_XRCDS, "XRCD"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate xrcd_map,rc = %d\n", + rc); + return rc; + } + + /* Allocate DPI bitmap */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, + p_hwfn->dpi_count, "DPI"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate DPI bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for cq's. The maximum number of CQs is bounded to + * twice the number of QPs. + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, + num_cons, "CQ"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate cq bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for toggle bit for cq icids + * We toggle the bit every time we create or resize cq for a given icid. + * The maximum number of CQs is bounded to the number of connections we + * support. (num_qps in iWARP or num_qps/2 in RoCE). + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, + num_cons, "Toggle"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate toogle bits, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for itids */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, + p_rdma_info->num_mrs, "MR"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate itids bitmaps, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for qps. */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->qp_map, + p_rdma_info->num_qps, "QP"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate qp bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for cids used for responders/requesters. */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons, + "REAL CID"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate cid bitmap, rc = %d\n", rc); + return rc; + } + + /* The first SRQ follows the last XRC SRQ. This means that the + * SRQ IDs start from an offset equals to max_xrc_srqs. + */ + p_rdma_info->srq_id_offset = (u16)ecore_cxt_get_xrc_srq_count(p_hwfn); + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrc_srq_map, + p_rdma_info->srq_id_offset, "XRC SRQ"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate xrc srq bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for srqs */ + p_rdma_info->num_srqs = ecore_cxt_get_srq_count(p_hwfn); + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->srq_map, + p_rdma_info->num_srqs, + "SRQ"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate srq bitmap, rc = %d\n", rc); + + return rc; + } + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_alloc(p_hwfn); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; +} + +void ecore_rdma_bmap_free(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + bool check) +{ + int weight, line, item, last_line, last_item; + u64 *pmap; + + if (!bmap || !bmap->bitmap) + return; + + if (!check) + goto end; + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + if (!weight) + goto end; + + DP_NOTICE(p_hwfn, false, + "%s bitmap not free - size=%d, weight=%d, 512 bits per line\n", + bmap->name, bmap->max_count, weight); + + pmap = (u64 *)bmap->bitmap; + last_line = bmap->max_count / (64*8); + last_item = last_line * 8 + (((bmap->max_count % (64*8)) + 63) / 64); + + /* print aligned non-zero lines, if any */ + for (item = 0, line = 0; line < last_line; line++, item += 8) { + if (OSAL_BITMAP_WEIGHT((unsigned long *)&pmap[item], 64*8)) + DP_NOTICE(p_hwfn, false, + "line 0x%04x: 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + line, (unsigned long long)pmap[item], + (unsigned long long)pmap[item+1], + (unsigned long long)pmap[item+2], + (unsigned long long)pmap[item+3], + (unsigned long long)pmap[item+4], + (unsigned long long)pmap[item+5], + (unsigned long long)pmap[item+6], + (unsigned long long)pmap[item+7]); + } + + /* print last unaligned non-zero line, if any */ + if ((bmap->max_count % (64*8)) && + (OSAL_BITMAP_WEIGHT((unsigned long *)&pmap[item], + bmap->max_count-item*64))) { + u8 str_last_line[200] = { 0 }; + int offset; + + offset = OSAL_SPRINTF(str_last_line, "line 0x%04x: ", line); + for (; item < last_item; item++) { + offset += OSAL_SPRINTF(str_last_line+offset, + "0x%016llx ", + (unsigned long long)pmap[item]); + } + DP_NOTICE(p_hwfn, false, "%s\n", str_last_line); + } + +end: + OSAL_FREE(p_hwfn->p_dev, bmap->bitmap); + bmap->bitmap = OSAL_NULL; +} + + +void ecore_rdma_resc_free(struct ecore_hwfn *p_hwfn) +{ + if (IS_IWARP(p_hwfn)) + ecore_iwarp_resc_free(p_hwfn); + + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->qp_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cq_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->toggle_bits, 0); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tid_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->srq_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrc_srq_map, 1); + + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info->port); + p_hwfn->p_rdma_info->port = OSAL_NULL; + + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info->dev); + p_hwfn->p_rdma_info->dev = OSAL_NULL; +} + +static OSAL_INLINE void ecore_rdma_free_reserved_lkey(struct ecore_hwfn *p_hwfn) +{ + ecore_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey); +} + +static void ecore_rdma_free_ilt(struct ecore_hwfn *p_hwfn) +{ + /* Free Connection CXT */ + ecore_cxt_free_ilt_range( + p_hwfn, ECORE_ELEM_CXT, + ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto), + ecore_cxt_get_proto_cid_count(p_hwfn, + p_hwfn->p_rdma_info->proto, + OSAL_NULL)); + + /* Free Task CXT ( Intentionally RoCE as task-id is shared between + * RoCE and iWARP + */ + ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_TASK, 0, + ecore_cxt_get_proto_tid_count( + p_hwfn, PROTOCOLID_ROCE)); + + /* Free TSDM CXT */ + ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_SRQ, 0, + ecore_cxt_get_srq_count(p_hwfn)); +} + +static void ecore_rdma_free(struct ecore_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + + ecore_rdma_free_reserved_lkey(p_hwfn); + + ecore_rdma_resc_free(p_hwfn); + + ecore_rdma_free_ilt(p_hwfn); +} + +static void ecore_rdma_get_guid(struct ecore_hwfn *p_hwfn, u8 *guid) +{ + u8 mac_addr[6]; + + OSAL_MEMCPY(&mac_addr[0], &p_hwfn->hw_info.hw_mac_addr[0], ETH_ALEN); + guid[0] = mac_addr[0] ^ 2; + guid[1] = mac_addr[1]; + guid[2] = mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = mac_addr[3]; + guid[6] = mac_addr[4]; + guid[7] = mac_addr[5]; +} + + +static void ecore_rdma_init_events( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_rdma_events *events; + + events = &p_hwfn->p_rdma_info->events; + + events->unaffiliated_event = params->events->unaffiliated_event; + events->affiliated_event = params->events->affiliated_event; + events->context = params->events->context; +} + +static void ecore_rdma_init_devinfo( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + u32 pci_status_control; + + /* Vendor specific information */ + dev->vendor_id = p_hwfn->p_dev->vendor_id; + dev->vendor_part_id = p_hwfn->p_dev->device_id; + dev->hw_ver = 0; + dev->fw_ver = STORM_FW_VERSION; + + ecore_rdma_get_guid(p_hwfn, (u8 *)(&dev->sys_image_guid)); + dev->node_guid = dev->sys_image_guid; + + dev->max_sge = OSAL_MIN_T(u32, RDMA_MAX_SGE_PER_SQ_WQE, + RDMA_MAX_SGE_PER_RQ_WQE); + + if (p_hwfn->p_dev->rdma_max_sge) { + dev->max_sge = OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_sge, + dev->max_sge); + } + + /* Set these values according to configuration + * MAX SGE for SRQ is not defined by FW for now + * define it in driver. + * TODO: Get this value from FW. + */ + dev->max_srq_sge = ECORE_RDMA_MAX_SGE_PER_SRQ_WQE; + if (p_hwfn->p_dev->rdma_max_srq_sge) { + dev->max_srq_sge = OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_srq_sge, + dev->max_srq_sge); + } + + dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE; + dev->max_inline = (p_hwfn->p_dev->rdma_max_inline) ? + OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_inline, + dev->max_inline) : + dev->max_inline; + + dev->max_wqe = ECORE_RDMA_MAX_WQE; + dev->max_cnq = (u8)FEAT_NUM(p_hwfn, ECORE_RDMA_CNQ); + + /* The number of QPs may be higher than ECORE_ROCE_MAX_QPS. because + * it is up-aligned to 16 and then to ILT page size within ecore cxt. + * This is OK in terms of ILT but we don't want to configure the FW + * above its abilities + */ + dev->max_qp = OSAL_MIN_T(u64, ROCE_MAX_QPS, + p_hwfn->p_rdma_info->num_qps); + + /* CQs uses the same icids that QPs use hence they are limited by the + * number of icids. There are two icids per QP. + */ + dev->max_cq = dev->max_qp * 2; + + /* The number of mrs is smaller by 1 since the first is reserved */ + dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1; + dev->max_mr_size = ECORE_RDMA_MAX_MR_SIZE; + /* The maximum CQE capacity per CQ supported */ + /* max number of cqes will be in two layer pbl, + * 8 is the pointer size in bytes + * 32 is the size of cq element in bytes + */ + if (params->roce.cq_mode == ECORE_RDMA_CQ_MODE_32_BITS) + dev->max_cqe = ECORE_RDMA_MAX_CQE_32_BIT; + else + dev->max_cqe = ECORE_RDMA_MAX_CQE_16_BIT; + + dev->max_mw = 0; + dev->max_fmr = ECORE_RDMA_MAX_FMR; + dev->max_mr_mw_fmr_pbl = (OSAL_PAGE_SIZE/8) * (OSAL_PAGE_SIZE/8); + dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * OSAL_PAGE_SIZE; + dev->max_pkey = ECORE_RDMA_MAX_P_KEY; + /* Right now we dont take any parameters from user + * So assign predefined max_srq to num_srqs. + */ + dev->max_srq = p_hwfn->p_rdma_info->num_srqs; + + /* SRQ WQE size */ + dev->max_srq_wr = ECORE_RDMA_MAX_SRQ_WQE_ELEM; + + dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + (RDMA_RESP_RD_ATOMIC_ELM_SIZE*2); + dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + RDMA_REQ_RD_ATOMIC_ELM_SIZE; + + dev->max_dev_resp_rd_atomic_resc = + dev->max_qp_resp_rd_atomic_resc * p_hwfn->p_rdma_info->num_qps; + dev->page_size_caps = ECORE_RDMA_PAGE_SIZE_CAPS; + dev->dev_ack_delay = ECORE_RDMA_ACK_DELAY; + dev->max_pd = RDMA_MAX_PDS; + dev->max_ah = dev->max_qp; + dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, ECORE_RDMA_STATS_QUEUE); + + /* Set capablities */ + dev->dev_caps = 0; + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_RNR_NAK, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_RESIZE_CQ, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_ZBVA, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); + + /* Check atomic operations support in PCI configuration space. */ + OSAL_PCI_READ_CONFIG_DWORD(p_hwfn->p_dev, + PCICFG_DEVICE_STATUS_CONTROL_2, + &pci_status_control); + + if (pci_status_control & + PCICFG_DEVICE_STATUS_CONTROL_2_ATOMIC_REQ_ENABLE) + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_ATOMIC_OP, 1); + + if (IS_IWARP(p_hwfn)) + ecore_iwarp_init_devinfo(p_hwfn); +} + +static void ecore_rdma_init_port( + struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_port *port = p_hwfn->p_rdma_info->port; + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + port->port_state = p_hwfn->mcp_info->link_output.link_up ? + ECORE_RDMA_PORT_UP : ECORE_RDMA_PORT_DOWN; + + port->max_msg_size = OSAL_MIN_T(u64, + (dev->max_mr_mw_fmr_size * + p_hwfn->p_dev->rdma_max_sge), + ((u64)1 << 31)); + + port->pkey_bad_counter = 0; +} + +static enum _ecore_status_t ecore_rdma_init_hw( + struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Initializing HW\n"); + p_hwfn->b_rdma_enabled_in_prs = false; + + if (IS_IWARP(p_hwfn)) + return ecore_iwarp_init_hw(p_hwfn, p_ptt); + + ecore_wr(p_hwfn, + p_ptt, + PRS_REG_ROCE_DEST_QP_MAX_PF, + 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + /* We delay writing to this reg until first cid is allocated. See + * ecore_cxt_dynamic_ilt_alloc function for more details + */ + + ll2_ethertype_en = ecore_rd(p_hwfn, + p_ptt, + PRS_REG_LIGHT_L2_ETHERTYPE_EN); + ecore_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + +#ifndef REAL_ASIC_ONLY + if (ECORE_IS_BB_A0(p_hwfn->p_dev) && ECORE_IS_CMT(p_hwfn->p_dev)) { + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL, + 0); + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL + 4, + 0); + } +#endif + + if (ecore_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) + { + DP_NOTICE(p_hwfn, + true, + "The first RoCE's cid should be even\n"); + return ECORE_UNKNOWN_ERROR; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Initializing HW - Done\n"); + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_rdma_start_fw(struct ecore_hwfn *p_hwfn, +#ifdef CONFIG_DCQCN + struct ecore_ptt *p_ptt, +#else + struct ecore_ptt OSAL_UNUSED *p_ptt, +#endif + struct ecore_rdma_start_in_params *params) +{ + struct rdma_init_func_ramrod_data *p_ramrod; + struct rdma_init_func_hdr *pheader; + struct ecore_rdma_info *p_rdma_info; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 igu_sb_id, sb_id; + u8 ll2_queue_id; + u32 cnq_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Starting FW\n"); + + p_rdma_info = p_hwfn->p_rdma_info; + + /* Save the number of cnqs for the function close ramrod */ + p_rdma_info->num_cnqs = params->desired_cnq; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT, + p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + if (IS_IWARP(p_hwfn)) { + ecore_iwarp_init_fw_ramrod(p_hwfn, + &p_ent->ramrod.iwarp_init_func); + p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; + } else { + +#ifdef CONFIG_DCQCN + rc = ecore_roce_dcqcn_cfg(p_hwfn, ¶ms->roce.dcqcn_params, + &p_ent->ramrod.roce_init_func, p_ptt); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "Failed to configure DCQCN. rc = %d.\n", rc); + return rc; + } +#endif + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + + /* The ll2_queue_id is used only for UD QPs */ + ll2_queue_id = ecore_ll2_handle_to_queue_id( + p_hwfn, params->roce.ll2_handle); + p_ent->ramrod.roce_init_func.roce.ll2_queue_id = ll2_queue_id; + + } + + pheader = &p_ramrod->params_header; + pheader->cnq_start_offset = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM); + pheader->num_cnqs = params->desired_cnq; + + /* The first SRQ ILT page is used for XRC SRQs and all the following + * pages contain regular SRQs. Hence the first regular SRQ ID is the + * maximum number XRC SRQs. + */ + pheader->first_reg_srq_id = p_rdma_info->srq_id_offset; + pheader->reg_srq_base_addr = + ecore_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM); + + if (params->roce.cq_mode == ECORE_RDMA_CQ_MODE_16_BITS) + pheader->cq_ring_mode = 1; /* 1=16 bits */ + else + pheader->cq_ring_mode = 0; /* 0=32 bits */ + + for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) + { + sb_id = (u16)OSAL_GET_RDMA_SB_ID(p_hwfn, cnq_id); + igu_sb_id = ecore_get_igu_sb_id(p_hwfn, sb_id); + p_ramrod->cnq_params[cnq_id].sb_num = + OSAL_CPU_TO_LE16(igu_sb_id); + + p_ramrod->cnq_params[cnq_id].sb_index = + p_hwfn->pf_params.rdma_pf_params.gl_pi; + + p_ramrod->cnq_params[cnq_id].num_pbl_pages = + params->cnq_pbl_list[cnq_id].num_pbl_pages; + + p_ramrod->cnq_params[cnq_id].pbl_base_addr.hi = + DMA_HI_LE(params->cnq_pbl_list[cnq_id].pbl_ptr); + p_ramrod->cnq_params[cnq_id].pbl_base_addr.lo = + DMA_LO_LE(params->cnq_pbl_list[cnq_id].pbl_ptr); + + /* we arbitrarily decide that cnq_id will be as qz_offset */ + p_ramrod->cnq_params[cnq_id].queue_zone_num = + OSAL_CPU_TO_LE16(p_rdma_info->queue_zone_base + cnq_id); + } + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + return rc; +} + +enum _ecore_status_t ecore_rdma_alloc_tid(void *rdma_cxt, + u32 *itid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocate TID\n"); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, + itid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "Failed in allocating tid\n"); + goto out; + } + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_rdma_reserve_lkey( + struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + /* Tid 0 will be used as the key for "reserved MR". + * The driver should allocate memory for it so it can be loaded but no + * ramrod should be passed on it. + */ + ecore_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey); + if (dev->reserved_lkey != RDMA_RESERVED_LKEY) + { + DP_NOTICE(p_hwfn, true, + "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n"); + return ECORE_INVAL; + } + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_rdma_setup(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_rdma_start_in_params *params) +{ + enum _ecore_status_t rc = 0; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA setup\n"); + + ecore_rdma_init_devinfo(p_hwfn, params); + ecore_rdma_init_port(p_hwfn); + ecore_rdma_init_events(p_hwfn, params); + + rc = ecore_rdma_reserve_lkey(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_rdma_init_hw(p_hwfn, p_ptt); + if (rc != ECORE_SUCCESS) + return rc; + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_setup(p_hwfn, params); + if (rc != ECORE_SUCCESS) + return rc; + } else { + rc = ecore_roce_setup(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + } + + return ecore_rdma_start_fw(p_hwfn, p_ptt, params); +} + + +enum _ecore_status_t ecore_rdma_stop(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_close_func_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + struct ecore_ptt *p_ptt; + u32 ll2_ethertype_en; + enum _ecore_status_t rc = ECORE_TIMEOUT; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA stop\n"); + + rc = ecore_rdma_deactivate(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Failed to acquire PTT\n"); + return rc; + } + +#ifdef CONFIG_DCQCN + ecore_roce_stop_rl(p_hwfn); +#endif + + /* Disable RoCE search */ + ecore_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); + p_hwfn->b_rdma_enabled_in_prs = false; + + ecore_wr(p_hwfn, + p_ptt, + PRS_REG_ROCE_DEST_QP_MAX_PF, + 0); + + ll2_ethertype_en = ecore_rd(p_hwfn, + p_ptt, + PRS_REG_LIGHT_L2_ETHERTYPE_EN); + + ecore_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en & 0xFFFE)); + +#ifndef REAL_ASIC_ONLY + /* INTERNAL: In CMT mode, re-initialize nig to direct packets to both + * enginesfor L2 performance, Roce requires all traffic to go just to + * engine 0. + */ + if (ECORE_IS_BB_A0(p_hwfn->p_dev) && ECORE_IS_CMT(p_hwfn->p_dev)) { + DP_ERR(p_hwfn->p_dev, + "On Everest 4 Big Bear Board revision A0 when RoCE driver is loaded L2 performance is sub-optimal (all traffic is routed to engine 0). For optimal L2 results either remove RoCE driver or use board revision B0\n"); + + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL, + 0x55555555); + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL + 0x4, + 0x55555555); + } +#endif + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_stop(p_hwfn); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return 0; + } + } else { + rc = ecore_roce_stop(p_hwfn); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return 0; + } + } + + ecore_ptt_release(p_hwfn, p_ptt); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Stop RoCE */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto out; + + p_ramrod = &p_ent->ramrod.rdma_close_func; + + p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs; + p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + +out: + ecore_rdma_free(p_hwfn); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA stop done, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_add_user(void *rdma_cxt, + struct ecore_rdma_add_user_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 dpi_start_offset; + u32 returned_id = 0; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Adding User\n"); + + /* Allocate DPI */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating dpi\n"); + + out_params->dpi = (u16)returned_id; + + /* Calculate the corresponding DPI address */ + dpi_start_offset = p_hwfn->dpi_start_offset; + + out_params->dpi_addr = (u64)(osal_int_ptr_t)((u8 OSAL_IOMEM*)p_hwfn->doorbells + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size)); + + out_params->dpi_phys_addr = p_hwfn->db_phys_addr + dpi_start_offset + + out_params->dpi * p_hwfn->dpi_size; + + out_params->dpi_size = p_hwfn->dpi_size; + out_params->wid_count = p_hwfn->wid_count; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Adding user - done, rc = %d\n", rc); + return rc; +} + +struct ecore_rdma_port *ecore_rdma_query_port(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_port *p_port = p_hwfn->p_rdma_info->port; + struct ecore_mcp_link_state *p_link_output; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA Query port\n"); + + /* The link state is saved only for the leading hwfn */ + p_link_output = + &ECORE_LEADING_HWFN(p_hwfn->p_dev)->mcp_info->link_output; + + /* Link may have changed... */ + p_port->port_state = p_link_output->link_up ? ECORE_RDMA_PORT_UP + : ECORE_RDMA_PORT_DOWN; + + p_port->link_speed = p_link_output->speed; + + p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE; + + return p_port; +} + +struct ecore_rdma_device *ecore_rdma_query_device(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Query device\n"); + + /* Return struct with device parameters */ + return p_hwfn->p_rdma_info->dev; +} + +void ecore_rdma_free_tid(void *rdma_cxt, + u32 itid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "itid = %08x\n", itid); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, + itid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +void ecore_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +{ + struct ecore_hwfn *p_hwfn; + u16 qz_num; + u32 addr; + + p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + if (qz_offset > p_hwfn->p_rdma_info->max_queue_zones) { + DP_NOTICE(p_hwfn, false, + "queue zone offset %d is too large (max is %d)\n", + qz_offset, p_hwfn->p_rdma_info->max_queue_zones); + return; + } + + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); + + REG_WR16(p_hwfn, addr, prod); + + /* keep prod updates ordered */ + OSAL_WMB(p_hwfn->p_dev); +} + +enum _ecore_status_t ecore_rdma_alloc_pd(void *rdma_cxt, + u16 *pd) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 returned_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc PD\n"); + + /* Allocates an unused protection domain */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->pd_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating pd id\n"); + + *pd = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc); + return rc; +} + +void ecore_rdma_free_pd(void *rdma_cxt, + u16 pd) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "pd = %08x\n", pd); + + /* Returns a previously allocated protection domain for reuse */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +enum _ecore_status_t ecore_rdma_alloc_xrcd(void *rdma_cxt, + u16 *xrcd_id) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 returned_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc XRCD\n"); + + /* Allocates an unused XRC domain */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->xrcd_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating xrcd id\n"); + + *xrcd_id = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc XRCD - done, rc = %d\n", rc); + return rc; +} + +void ecore_rdma_free_xrcd(void *rdma_cxt, + u16 xrcd_id) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "xrcd_id = %08x\n", xrcd_id); + + /* Returns a previously allocated protection domain for reuse */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, xrcd_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static enum ecore_rdma_toggle_bit +ecore_rdma_toggle_bit_create_resize_cq(struct ecore_hwfn *p_hwfn, + u16 icid) +{ + struct ecore_rdma_info *p_info = p_hwfn->p_rdma_info; + enum ecore_rdma_toggle_bit toggle_bit; + u32 bmap_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", icid); + + /* the function toggle the bit that is related to a given icid + * and returns the new toggle bit's value + */ + bmap_id = icid - ecore_cxt_get_proto_cid_start(p_hwfn, p_info->proto); + + OSAL_SPIN_LOCK(&p_info->lock); + toggle_bit = !OSAL_TEST_AND_FLIP_BIT(bmap_id, p_info->toggle_bits.bitmap); + OSAL_SPIN_UNLOCK(&p_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_RDMA_TOGGLE_BIT_= %d\n", + toggle_bit); + + return toggle_bit; +} + +enum _ecore_status_t ecore_rdma_create_cq(void *rdma_cxt, + struct ecore_rdma_create_cq_in_params *params, + u16 *icid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_info *p_info = p_hwfn->p_rdma_info; + struct rdma_create_cq_ramrod_data *p_ramrod; + enum ecore_rdma_toggle_bit toggle_bit; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "cq_handle = %08x%08x\n", + params->cq_handle_hi, params->cq_handle_lo); + + /* Allocate icid */ + OSAL_SPIN_LOCK(&p_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_info->cq_map, &returned_id); + OSAL_SPIN_UNLOCK(&p_info->lock); + + if (rc != ECORE_SUCCESS) + { + DP_NOTICE(p_hwfn, false, "Can't create CQ, rc = %d\n", rc); + return rc; + } + + *icid = (u16)(returned_id + + ecore_cxt_get_proto_cid_start( + p_hwfn, p_info->proto)); + + /* Check if icid requires a page allocation */ + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, *icid); + if (rc != ECORE_SUCCESS) + goto err; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = *icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send create CQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_CQ, + p_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_cq; + + p_ramrod->cq_handle.hi = OSAL_CPU_TO_LE32(params->cq_handle_hi); + p_ramrod->cq_handle.lo = OSAL_CPU_TO_LE32(params->cq_handle_lo); + p_ramrod->dpi = OSAL_CPU_TO_LE16(params->dpi); + p_ramrod->is_two_level_pbl = params->pbl_two_level; + p_ramrod->max_cqes = OSAL_CPU_TO_LE32(params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr); + p_ramrod->pbl_num_pages = OSAL_CPU_TO_LE16(params->pbl_num_pages); + p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM) + + params->cnq_id; + p_ramrod->int_timeout = params->int_timeout; + /* INTERNAL: Two layer PBL is currently not supported, ignoring next line */ + /* INTERNAL: p_ramrod->pbl_log_page_size = params->pbl_page_size_log - 12; */ + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + + p_ramrod->toggle_bit = toggle_bit; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) { + /* restore toggle bit */ + ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + goto err; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Created CQ, rc = %d\n", rc); + return rc; + +err: + /* release allocated icid */ + OSAL_SPIN_LOCK(&p_info->lock); + ecore_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id); + OSAL_SPIN_UNLOCK(&p_info->lock); + + DP_NOTICE(p_hwfn, false, "Create CQ failed, rc = %d\n", rc); + + return rc; +} + +enum _ecore_status_t ecore_rdma_destroy_cq(void *rdma_cxt, + struct ecore_rdma_destroy_cq_in_params *in_params, + struct ecore_rdma_destroy_cq_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_destroy_cq_output_params *p_ramrod_res; + struct rdma_destroy_cq_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc = ECORE_NOMEM; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = (struct rdma_destroy_cq_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + if (!p_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore destroy cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send destroy CQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_destroy_cq; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->num_cq_notif = + OSAL_LE16_TO_CPU(p_ramrod_res->cnq_num); + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + + /* Free icid */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cq_map, + (in_params->icid - ecore_cxt_get_proto_cid_start( + p_hwfn, p_hwfn->p_rdma_info->proto))); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc); + return rc; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + + return rc; +} + +void ecore_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_ecore_mac) +{ + p_fw_mac[0] = OSAL_CPU_TO_LE16((p_ecore_mac[0] << 8) + p_ecore_mac[1]); + p_fw_mac[1] = OSAL_CPU_TO_LE16((p_ecore_mac[2] << 8) + p_ecore_mac[3]); + p_fw_mac[2] = OSAL_CPU_TO_LE16((p_ecore_mac[4] << 8) + p_ecore_mac[5]); +} + + +enum _ecore_status_t ecore_rdma_query_qp(void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) + +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc = ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* The following fields are filled in from qp and not FW as they can't + * be modified by FW + */ + out_params->mtu = qp->mtu; + out_params->dest_qp = qp->dest_qp; + out_params->incoming_atomic_en = qp->incoming_atomic_en; + out_params->e2e_flow_control_en = qp->e2e_flow_control_en; + out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en; + out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en; + out_params->dgid = qp->dgid; + out_params->flow_label = qp->flow_label; + out_params->hop_limit_ttl = qp->hop_limit_ttl; + out_params->traffic_class_tos = qp->traffic_class_tos; + out_params->timeout = qp->ack_timeout; + out_params->rnr_retry = qp->rnr_retry_cnt; + out_params->retry_cnt = qp->retry_cnt; + out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer; + out_params->pkey_index = 0; + out_params->max_rd_atomic = qp->max_rd_atomic_req; + out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; + out_params->sqd_async = qp->sqd_async; + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_query_qp(qp, out_params); + else + rc = ecore_roce_query_qp(p_hwfn, qp, out_params); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Query QP, rc = %d\n", rc); + return rc; +} + + +enum _ecore_status_t ecore_rdma_destroy_qp(void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_destroy_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!rdma_cxt || !qp) { + DP_ERR(p_hwfn, + "ecore rdma destroy qp failed: invalid NULL input. rdma_cxt=%p, qp=%p\n", + rdma_cxt, qp); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x)\n", qp->icid); + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_destroy_qp(p_hwfn, qp); + else + rc = ecore_roce_destroy_qp(p_hwfn, qp, out_params); + + /* free qp params struct */ + OSAL_FREE(p_hwfn->p_dev, qp); + + return rc; +} + + +struct ecore_rdma_qp *ecore_rdma_create_qp(void *rdma_cxt, + struct ecore_rdma_create_qp_in_params *in_params, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_qp *qp; + u8 max_stats_queues; + enum _ecore_status_t rc = 0; + + if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + DP_ERR(p_hwfn->p_dev, + "ecore roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + rdma_cxt, + in_params, + out_params); + return OSAL_NULL; + } + + /* Some sanity checks... */ + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (in_params->stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->p_dev, + "ecore rdma create qp failed due to invalid statistics queue %d. maximum is %d\n", + in_params->stats_queue, max_stats_queues); + return OSAL_NULL; + } + + if (IS_IWARP(p_hwfn)) { + if (in_params->sq_num_pages*sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->p_dev, true, "Sq num pages: %d exceeds maximum\n", + in_params->sq_num_pages); + return OSAL_NULL; + } + if (in_params->rq_num_pages*sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->p_dev, true, + "Rq num pages: %d exceeds maximum\n", + in_params->rq_num_pages); + return OSAL_NULL; + } + } + + qp = OSAL_ZALLOC(p_hwfn->p_dev, + GFP_KERNEL, + sizeof(struct ecore_rdma_qp)); + if (!qp) + { + DP_NOTICE(p_hwfn, false, "Failed to allocate ecore_rdma_qp\n"); + return OSAL_NULL; + } + + qp->cur_state = ECORE_ROCE_QP_STATE_RESET; +#ifdef CONFIG_ECORE_IWARP + qp->iwarp_state = ECORE_IWARP_QP_STATE_IDLE; +#endif + qp->qp_handle.hi = OSAL_CPU_TO_LE32(in_params->qp_handle_hi); + qp->qp_handle.lo = OSAL_CPU_TO_LE32(in_params->qp_handle_lo); + qp->qp_handle_async.hi = OSAL_CPU_TO_LE32(in_params->qp_handle_async_hi); + qp->qp_handle_async.lo = OSAL_CPU_TO_LE32(in_params->qp_handle_async_lo); + qp->use_srq = in_params->use_srq; + qp->signal_all = in_params->signal_all; + qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey; + qp->pd = in_params->pd; + qp->dpi = in_params->dpi; + qp->sq_cq_id = in_params->sq_cq_id; + qp->sq_num_pages = in_params->sq_num_pages; + qp->sq_pbl_ptr = in_params->sq_pbl_ptr; + qp->rq_cq_id = in_params->rq_cq_id; + qp->rq_num_pages = in_params->rq_num_pages; + qp->rq_pbl_ptr = in_params->rq_pbl_ptr; + qp->srq_id = in_params->srq_id; + qp->req_offloaded = false; + qp->resp_offloaded = false; + /* e2e_flow_control cannot be done in case of S-RQ. + * Refer to 9.7.7.2 End-to-End Flow Control section of IB spec + */ + qp->e2e_flow_control_en = qp->use_srq ? false : true; + qp->stats_queue = in_params->stats_queue; + qp->qp_type = in_params->qp_type; + qp->xrcd_id = in_params->xrcd_id; + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_create_qp(p_hwfn, qp, out_params); + qp->qpid = qp->icid; + } else { + rc = ecore_roce_alloc_qp_idx(p_hwfn, &qp->qp_idx); + qp->icid = ECORE_ROCE_QP_TO_ICID(qp->qp_idx); + qp->qpid = ((0xFF << 16) | qp->icid); + } + + if (rc != ECORE_SUCCESS) { + OSAL_FREE(p_hwfn->p_dev, qp); + return OSAL_NULL; + } + + out_params->icid = qp->icid; + out_params->qp_id = qp->qpid; + + /* INTERNAL: max_sq_sges future use only*/ + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Create QP, rc = %d\n", rc); + return qp; +} + +#define ECORE_RDMA_ECN_SHIFT 0 +#define ECORE_RDMA_ECN_MASK 0x3 +#define ECORE_RDMA_DSCP_SHIFT 2 +#define ECORE_RDMA_DSCP_MASK 0x3f +#define ECORE_RDMA_VLAN_PRIO_SHIFT 13 +#define ECORE_RDMA_VLAN_PRIO_MASK 0x7 +enum _ecore_status_t ecore_rdma_modify_qp( + void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_modify_qp_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum ecore_roce_qp_state prev_state; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) + { + qp->incoming_rdma_read_en = params->incoming_rdma_read_en; + qp->incoming_rdma_write_en = params->incoming_rdma_write_en; + qp->incoming_atomic_en = params->incoming_atomic_en; + } + + /* Update QP structure with the updated values */ + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ROCE_MODE)) + { + qp->roce_mode = params->roce_mode; + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)) + { + qp->pkey = params->pkey; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN)) + { + qp->e2e_flow_control_en = params->e2e_flow_control_en; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_DEST_QP)) + { + qp->dest_qp = params->dest_qp; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) + { + /* Indicates that the following parameters have changed: + * Traffic class, flow label, hop limit, source GID, + * destination GID, loopback indicator + */ + qp->flow_label = params->flow_label; + qp->hop_limit_ttl = params->hop_limit_ttl; + + qp->sgid = params->sgid; + qp->dgid = params->dgid; + qp->udp_src_port = params->udp_src_port; + qp->vlan_id = params->vlan_id; + qp->traffic_class_tos = params->traffic_class_tos; + + /* apply global override values */ + if (p_hwfn->p_rdma_info->glob_cfg.vlan_pri_en) + SET_FIELD(qp->vlan_id, ECORE_RDMA_VLAN_PRIO, + p_hwfn->p_rdma_info->glob_cfg.vlan_pri); + + if (p_hwfn->p_rdma_info->glob_cfg.ecn_en) + SET_FIELD(qp->traffic_class_tos, ECORE_RDMA_ECN, + p_hwfn->p_rdma_info->glob_cfg.ecn); + + if (p_hwfn->p_rdma_info->glob_cfg.dscp_en) + SET_FIELD(qp->traffic_class_tos, ECORE_RDMA_DSCP, + p_hwfn->p_rdma_info->glob_cfg.dscp); + + qp->mtu = params->mtu; + + OSAL_MEMCPY((u8 *)&qp->remote_mac_addr[0], + (u8 *)¶ms->remote_mac_addr[0], ETH_ALEN); + if (params->use_local_mac) { + OSAL_MEMCPY((u8 *)&qp->local_mac_addr[0], + (u8 *)¶ms->local_mac_addr[0], + ETH_ALEN); + } else { + OSAL_MEMCPY((u8 *)&qp->local_mac_addr[0], + (u8 *)&p_hwfn->hw_info.hw_mac_addr, + ETH_ALEN); + } + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_RQ_PSN)) + { + qp->rq_psn = params->rq_psn; + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_SQ_PSN)) + { + qp->sq_psn = params->sq_psn; + } + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)) + { + qp->max_rd_atomic_req = params->max_rd_atomic_req; + } + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)) + { + qp->max_rd_atomic_resp = params->max_rd_atomic_resp; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)) + { + qp->ack_timeout = params->ack_timeout; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT)) + { + qp->retry_cnt = params->retry_cnt; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)) + { + qp->rnr_retry_cnt = params->rnr_retry_cnt; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)) + { + qp->min_rnr_nak_timer = params->min_rnr_nak_timer; + } + + qp->sqd_async = params->sqd_async; + + prev_state = qp->cur_state; + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE)) + { + qp->cur_state = params->new_state; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "qp->cur_state=%d\n", + qp->cur_state); + } + + if (qp->qp_type == ECORE_RDMA_QP_TYPE_XRC_INI) { + qp->has_req = 1; + } else if (qp->qp_type == ECORE_RDMA_QP_TYPE_XRC_TGT) + { + qp->has_resp = 1; + } else { + qp->has_req = 1; + qp->has_resp = 1; + } + + if (IS_IWARP(p_hwfn)) { + enum ecore_iwarp_qp_state new_state = + ecore_roce2iwarp_state(qp->cur_state); + + rc = ecore_iwarp_modify_qp(p_hwfn, qp, new_state, 0); + } else { + rc = ecore_roce_modify_qp(p_hwfn, qp, prev_state, params); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify QP, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_register_tid(void *rdma_cxt, + struct ecore_rdma_register_tid_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_register_tid_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum rdma_tid_type tid_type; + u8 fw_return_code; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "itid = %08x\n", params->itid); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (p_hwfn->p_rdma_info->last_tid < params->itid) { + p_hwfn->p_rdma_info->last_tid = params->itid; + } + + p_ramrod = &p_ent->ramrod.rdma_register_tid; + + p_ramrod->flags = 0; + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL, + params->pbl_two_level); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, + params->zbva); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, + params->phy_mr); + + /* Don't initialize D/C field, as it may override other bits. */ + if (!(params->tid_type == ECORE_RDMA_TID_FMR) && + !(params->dma_mr)) + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG, + params->page_size_log - 12); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ, + params->remote_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE, + params->remote_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC, + params->remote_atomic); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE, + params->local_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, + params->local_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND, + params->mw_bind); + + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG, + params->pbl_page_size_log - 12); + + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, + params->dma_mr); + + switch (params->tid_type) + { + case ECORE_RDMA_TID_REGISTERED_MR: + tid_type = RDMA_TID_REGISTERED_MR; + break; + case ECORE_RDMA_TID_FMR: + tid_type = RDMA_TID_FMR; + break; + case ECORE_RDMA_TID_MW_TYPE1: + tid_type = RDMA_TID_MW_TYPE1; + break; + case ECORE_RDMA_TID_MW_TYPE2A: + tid_type = RDMA_TID_MW_TYPE2A; + break; + default: + rc = ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, + tid_type); + + p_ramrod->itid = OSAL_CPU_TO_LE32(params->itid); + p_ramrod->key = params->key; + p_ramrod->pd = OSAL_CPU_TO_LE16(params->pd); + p_ramrod->length_hi = (u8)(params->length >> 32); + p_ramrod->length_lo = DMA_LO_LE(params->length); + if (params->zbva) + { + /* Lower 32 bits of the registered MR address. + * In case of zero based MR, will hold FBO + */ + p_ramrod->va.hi = 0; + p_ramrod->va.lo = OSAL_CPU_TO_LE32(params->fbo); + } else { + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); + } + DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); + + /* DIF */ + if (params->dif_enabled) { + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1); + DMA_REGPAIR_LE(p_ramrod->dif_error_addr, + params->dif_error_addr); + DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr); + } + + rc = ecore_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + return rc; + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, true, "fw_return_code = %d\n", fw_return_code); + return ECORE_UNKNOWN_ERROR; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Register TID, rc = %d\n", rc); + return rc; +} + +static OSAL_INLINE int ecore_rdma_send_deregister_tid_ramrod( + struct ecore_hwfn *p_hwfn, + u32 itid, + u8 *fw_return_code) +{ + struct ecore_sp_init_data init_data; + struct rdma_deregister_tid_ramrod_data *p_ramrod; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.rdma_deregister_tid; + p_ramrod->itid = OSAL_CPU_TO_LE32(itid); + + rc = ecore_spq_post(p_hwfn, p_ent, fw_return_code); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + return rc; +} + +#define ECORE_RDMA_DEREGISTER_TIMEOUT_MSEC (1) + +enum _ecore_status_t ecore_rdma_deregister_tid(void *rdma_cxt, + u32 itid) +{ + enum _ecore_status_t rc; + u8 fw_ret_code; + struct ecore_ptt *p_ptt; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + /* First attempt */ + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + + if (fw_ret_code != RDMA_RETURN_NIG_DRAIN_REQ) + goto done; + + /* Second attempt, after 1msec, if device still holds data. + * This can occur since 'destroy QP' returns to the caller rather fast. + * The synchronous part of it returns after freeing a few of the + * resources but not all of them, allowing the consumer to continue its + * flow. All of the resources will be freed after the asynchronous part + * of the destroy QP is complete. + */ + OSAL_MSLEEP(ECORE_RDMA_DEREGISTER_TIMEOUT_MSEC); + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + + if (fw_ret_code != RDMA_RETURN_NIG_DRAIN_REQ) + goto done; + + /* Third and last attempt, perform NIG drain and resend the ramrod */ + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_TIMEOUT; + + rc = ecore_mcp_drain(p_hwfn, p_ptt); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return rc; + } + + ecore_ptt_release(p_hwfn, p_ptt); + + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + +done: + if (fw_ret_code == RDMA_RETURN_OK) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "De-registered itid=%d\n", + itid); + return ECORE_SUCCESS; + } else if (fw_ret_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) { + /* INTERNAL: This error is returned in case trying to deregister + * a MR that is not allocated. We define "allocated" as either: + * 1. Registered. + * 2. This is an FMR MR type, which is not currently registered + * but can accept FMR WQEs on SQ. + */ + DP_NOTICE(p_hwfn, false, "itid=%d, fw_ret_code=%d\n", itid, + fw_ret_code); + return ECORE_INVAL; + } else { /* fw_ret_code == RDMA_RETURN_NIG_DRAIN_REQ */ + DP_NOTICE(p_hwfn, true, + "deregister failed after three attempts. itid=%d, fw_ret_code=%d\n", + itid, fw_ret_code); + return ECORE_UNKNOWN_ERROR; + } +} + +static struct ecore_bmap *ecore_rdma_get_srq_bmap(struct ecore_hwfn *p_hwfn, bool is_xrc) +{ + if (is_xrc) + return &p_hwfn->p_rdma_info->xrc_srq_map; + + return &p_hwfn->p_rdma_info->srq_map; +} + +u16 ecore_rdma_get_fw_srq_id(struct ecore_hwfn *p_hwfn, u16 id, bool is_xrc) +{ + if (is_xrc) + return id; + + return id + p_hwfn->p_rdma_info->srq_id_offset; +} + +enum _ecore_status_t +ecore_rdma_modify_srq(void *rdma_cxt, + struct ecore_rdma_modify_srq_in_params *in_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_modify_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + enum _ecore_status_t rc; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + /* Send modify SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_MODIFY_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.rdma_modify_srq; + + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, in_params->srq_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + opaque_fid = p_hwfn->hw_info.opaque_fid; + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + p_ramrod->wqe_limit = OSAL_CPU_TO_LE16(in_params->wqe_limit); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "modified SRQ id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_destroy_srq(void *rdma_cxt, + struct ecore_rdma_destroy_srq_in_params *in_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_destroy_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + struct ecore_bmap *bmap; + enum _ecore_status_t rc; + + opaque_fid = p_hwfn->hw_info.opaque_fid; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send destroy SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.rdma_destroy_srq; + + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, in_params->srq_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + return rc; + + bmap = ecore_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, bmap, in_params->srq_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "XRC/SRQ destroyed Id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_create_srq(void *rdma_cxt, + struct ecore_rdma_create_srq_in_params *in_params, + struct ecore_rdma_create_srq_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_create_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + enum ecore_cxt_elem_type elem_type; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + struct ecore_bmap *bmap; + u32 returned_id; + enum _ecore_status_t rc; + + /* Allocate XRC/SRQ ID */ + bmap = ecore_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, bmap, &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "failed to allocate xrc/srq id (is_xrc=%u)\n", + in_params->is_xrc); + return rc; + } + /* Allocate XRC/SRQ ILT page */ + elem_type = (in_params->is_xrc) ? (ECORE_ELEM_XRC_SRQ) : (ECORE_ELEM_SRQ); + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, elem_type, returned_id); + if (rc != ECORE_SUCCESS) + goto err; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Create XRC/SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_srq; + + p_ramrod->pbl_base_addr.hi = DMA_HI_LE(in_params->pbl_base_addr); + p_ramrod->pbl_base_addr.lo = DMA_LO_LE(in_params->pbl_base_addr); + p_ramrod->pages_in_srq_pbl = OSAL_CPU_TO_LE16(in_params->num_pages); + p_ramrod->pd_id = OSAL_CPU_TO_LE16(in_params->pd_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + p_ramrod->page_size = OSAL_CPU_TO_LE16(in_params->page_size); + p_ramrod->producers_addr.hi = DMA_HI_LE(in_params->prod_pair_addr); + p_ramrod->producers_addr.lo = DMA_LO_LE(in_params->prod_pair_addr); + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, (u16) returned_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + + if (in_params->is_xrc) { + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG, + 1); + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN, + in_params->reserved_key_en); + p_ramrod->xrc_srq_cq_cid = OSAL_CPU_TO_LE32(in_params->cq_cid); + p_ramrod->xrc_domain = OSAL_CPU_TO_LE16(in_params->xrcd_id); + } + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + goto err; + + out_params->srq_id = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "XRC/SRQ created Id = %x (is_xrc=%u)\n", + out_params->srq_id, in_params->is_xrc); + return rc; + +err: + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, bmap, returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + return rc; +} + +bool ecore_rdma_allocated_qps(struct ecore_hwfn *p_hwfn) +{ + bool result; + + /* if rdma info has not been allocated, naturally there are no qps */ + if (!p_hwfn->p_rdma_info) + return false; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + if (!p_hwfn->p_rdma_info->qp_map.bitmap) + result = false; + else + result = !ecore_bmap_is_empty(&p_hwfn->p_rdma_info->qp_map); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + return result; +} + +enum _ecore_status_t ecore_rdma_resize_cq(void *rdma_cxt, + struct ecore_rdma_resize_cq_in_params *in_params, + struct ecore_rdma_resize_cq_out_params *out_params) +{ + enum _ecore_status_t rc; + enum ecore_rdma_toggle_bit toggle_bit; + struct ecore_spq_entry *p_ent; + struct rdma_resize_cq_ramrod_data *p_ramrod; + u8 fw_return_code; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + dma_addr_t ramrod_res_phys; + struct rdma_resize_cq_output_params *p_ramrod_res; + struct ecore_sp_init_data init_data; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", in_params->icid); + + /* Send resize CQ ramrod */ + + p_ramrod_res = (struct rdma_resize_cq_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(*p_ramrod_res)); + if (!p_ramrod_res) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore resize cq failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_RESIZE_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_resize_cq; + + p_ramrod->flags = 0; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, + in_params->icid); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, + toggle_bit); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, + in_params->pbl_two_level); + + p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; + p_ramrod->pbl_num_pages = OSAL_CPU_TO_LE16(in_params->pbl_num_pages); + p_ramrod->max_cqes = OSAL_CPU_TO_LE32(in_params->cq_size); + p_ramrod->pbl_addr.hi = DMA_HI_LE(in_params->pbl_ptr); + p_ramrod->pbl_addr.lo = DMA_LO_LE(in_params->pbl_ptr); + + p_ramrod->output_params_addr.hi = DMA_HI_LE(ramrod_res_phys); + p_ramrod->output_params_addr.lo = DMA_LO_LE(ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc != ECORE_SUCCESS) + goto err; + + if (fw_return_code != RDMA_RETURN_OK) + { + DP_NOTICE(p_hwfn, fw_return_code != RDMA_RETURN_RESIZE_CQ_ERR, + "fw_return_code = %d\n", fw_return_code); + DP_NOTICE(p_hwfn, + true, "fw_return_code = %d\n", fw_return_code); + rc = ECORE_UNKNOWN_ERROR; + goto err; + } + + out_params->prod = OSAL_LE32_TO_CPU(p_ramrod_res->old_cq_prod); + out_params->cons = OSAL_LE32_TO_CPU(p_ramrod_res->old_cq_cons); + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + + return rc; +} + +enum _ecore_status_t ecore_rdma_start(void *rdma_cxt, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_ptt *p_ptt; + enum _ecore_status_t rc = ECORE_TIMEOUT; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "desired_cnq = %08x\n", params->desired_cnq); + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + goto err; + + rc = ecore_rdma_alloc(p_hwfn); + if (rc) + goto err1; + + rc = ecore_rdma_setup(p_hwfn, p_ptt, params); + if (rc) + goto err2; + + ecore_ptt_release(p_hwfn, p_ptt); + + ecore_rdma_activate(p_hwfn); + return rc; + +err2: + ecore_rdma_free(p_hwfn); +err1: + ecore_ptt_release(p_hwfn, p_ptt); +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA start - error, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_query_stats(void *rdma_cxt, u8 stats_queue, + struct ecore_rdma_stats_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u8 abs_stats_queue, max_stats_queues; + u32 pstats_addr, tstats_addr, addr; + struct ecore_rdma_info *info; + struct ecore_ptt *p_ptt; +#ifdef CONFIG_ECORE_IWARP + u32 xstats_addr; +#endif + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_hwfn) + return ECORE_INVAL; + + if (!p_hwfn->p_rdma_info) { + DP_INFO(p_hwfn->p_dev, "ecore rdma query stats failed due to NULL rdma_info\n"); + return ECORE_INVAL; + } + + info = p_hwfn->p_rdma_info; + + rc = ecore_rdma_inc_ref_cnt(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->p_dev, + "ecore rdma query stats failed due to invalid statistics queue %d. maximum is %d\n", + stats_queue, max_stats_queues); + rc = ECORE_INVAL; + goto err; + } + + /* Statistics collected in statistics queues (for PF/VF) */ + abs_stats_queue = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + stats_queue; + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + PSTORM_RDMA_QUEUE_STAT_OFFSET(abs_stats_queue); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + TSTORM_RDMA_QUEUE_STAT_OFFSET(abs_stats_queue); + +#ifdef CONFIG_ECORE_IWARP + /* Statistics per PF ID */ + xstats_addr = BAR0_MAP_REG_XSDM_RAM + + XSTORM_IWARP_RXMIT_STATS_OFFSET(p_hwfn->rel_pf_id); +#endif + + OSAL_MEMSET(&info->rdma_sent_pstats, 0, sizeof(info->rdma_sent_pstats)); + OSAL_MEMSET(&info->rdma_rcv_tstats, 0, sizeof(info->rdma_rcv_tstats)); + OSAL_MEMSET(&info->roce.event_stats, 0, sizeof(info->roce.event_stats)); + OSAL_MEMSET(&info->roce.dcqcn_rx_stats, 0,sizeof(info->roce.dcqcn_rx_stats)); + OSAL_MEMSET(&info->roce.dcqcn_tx_stats, 0,sizeof(info->roce.dcqcn_tx_stats)); +#ifdef CONFIG_ECORE_IWARP + OSAL_MEMSET(&info->iwarp.stats, 0, sizeof(info->iwarp.stats)); +#endif + + p_ptt = ecore_ptt_acquire(p_hwfn); + + if (!p_ptt) { + rc = ECORE_TIMEOUT; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + goto err; + } + + ecore_memcpy_from(p_hwfn, p_ptt, &info->rdma_sent_pstats, + pstats_addr, sizeof(struct rdma_sent_stats)); + + ecore_memcpy_from(p_hwfn, p_ptt, &info->rdma_rcv_tstats, + tstats_addr, sizeof(struct rdma_rcv_stats)); + + addr = BAR0_MAP_REG_TSDM_RAM + + TSTORM_ROCE_EVENTS_STAT_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.event_stats, addr, + sizeof(struct roce_events_stats)); + + addr = BAR0_MAP_REG_YSDM_RAM + + YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.dcqcn_rx_stats, addr, + sizeof(struct roce_dcqcn_received_stats)); + + addr = BAR0_MAP_REG_PSDM_RAM + + PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.dcqcn_tx_stats, addr, + sizeof(struct roce_dcqcn_sent_stats)); + +#ifdef CONFIG_ECORE_IWARP + ecore_memcpy_from(p_hwfn, p_ptt, &info->iwarp.stats, + xstats_addr, sizeof(struct iwarp_rxmit_stats_drv)); +#endif + + ecore_ptt_release(p_hwfn, p_ptt); + + OSAL_MEMSET(out_params, 0, sizeof(*out_params)); + + out_params->sent_bytes = + HILO_64_REGPAIR(info->rdma_sent_pstats.sent_bytes); + out_params->sent_pkts = + HILO_64_REGPAIR(info->rdma_sent_pstats.sent_pkts); + out_params->rcv_bytes = + HILO_64_REGPAIR(info->rdma_rcv_tstats.rcv_bytes); + out_params->rcv_pkts = + HILO_64_REGPAIR(info->rdma_rcv_tstats.rcv_pkts); + + out_params->silent_drops = + OSAL_LE16_TO_CPU(info->roce.event_stats.silent_drops); + out_params->rnr_nacks_sent = + OSAL_LE16_TO_CPU(info->roce.event_stats.rnr_naks_sent); + out_params->icrc_errors = + OSAL_LE32_TO_CPU(info->roce.event_stats.icrc_error_count); + out_params->retransmit_events = + OSAL_LE32_TO_CPU(info->roce.event_stats.retransmit_count); + out_params->ecn_pkt_rcv = + HILO_64_REGPAIR(info->roce.dcqcn_rx_stats.ecn_pkt_rcv); + out_params->cnp_pkt_rcv = + HILO_64_REGPAIR(info->roce.dcqcn_rx_stats.cnp_pkt_rcv); + out_params->cnp_pkt_sent = + HILO_64_REGPAIR(info->roce.dcqcn_tx_stats.cnp_pkt_sent); + +#ifdef CONFIG_ECORE_IWARP + out_params->iwarp_tx_fast_rxmit_cnt = + HILO_64_REGPAIR(info->iwarp.stats.tx_fast_retransmit_event_cnt); + out_params->iwarp_tx_slow_start_cnt = + HILO_64_REGPAIR( + info->iwarp.stats.tx_go_to_slow_start_event_cnt); + out_params->unalign_rx_comp = info->iwarp.unalign_rx_comp; +#endif + +err: + ecore_rdma_dec_ref_cnt(p_hwfn); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_query_counters(void *rdma_cxt, + struct ecore_rdma_counters_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + unsigned long *bitmap; + unsigned int nbits; + + if (!p_hwfn->p_rdma_info) + return ECORE_INVAL; + + OSAL_MEMSET(out_params, 0, sizeof(*out_params)); + + bitmap = p_hwfn->p_rdma_info->pd_map.bitmap; + nbits = p_hwfn->p_rdma_info->pd_map.max_count; + out_params->pd_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_pd = nbits; + + bitmap = p_hwfn->p_rdma_info->dpi_map.bitmap; + nbits = p_hwfn->p_rdma_info->dpi_map.max_count; + out_params->dpi_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_dpi = nbits; + + bitmap = p_hwfn->p_rdma_info->cq_map.bitmap; + nbits = p_hwfn->p_rdma_info->cq_map.max_count; + out_params->cq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_cq = nbits; + + bitmap = p_hwfn->p_rdma_info->qp_map.bitmap; + nbits = p_hwfn->p_rdma_info->qp_map.max_count; + out_params->qp_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_qp = nbits; + + bitmap = p_hwfn->p_rdma_info->tid_map.bitmap; + nbits = p_hwfn->p_rdma_info->tid_map.max_count; + out_params->tid_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_tid = nbits; + + bitmap = p_hwfn->p_rdma_info->srq_map.bitmap; + nbits = p_hwfn->p_rdma_info->srq_map.max_count; + out_params->srq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_srq = nbits; + + bitmap = p_hwfn->p_rdma_info->xrc_srq_map.bitmap; + nbits = p_hwfn->p_rdma_info->xrc_srq_map.max_count; + out_params->xrc_srq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_xrc_srq = nbits; + + bitmap = p_hwfn->p_rdma_info->xrcd_map.bitmap; + nbits = p_hwfn->p_rdma_info->xrcd_map.max_count; + out_params->xrcd_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_xrcd = nbits; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_rdma_resize_cnq(void *rdma_cxt, + struct ecore_rdma_resize_cnq_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "cnq_id = %08x\n", params->cnq_id); + + /* @@@TBD: waiting for fw (there is no ramrod yet) */ + return ECORE_NOTIMPL; +} + +void ecore_rdma_remove_user(void *rdma_cxt, + u16 dpi) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "dpi = %08x\n", dpi); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +#ifndef LINUX_REMOVE +enum _ecore_status_t +ecore_rdma_set_glob_cfg(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_glob_cfg *in_params, + u32 glob_cfg_bits) +{ + struct ecore_rdma_glob_cfg glob_cfg; + enum _ecore_status_t rc = ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn->p_dev, ECORE_MSG_RDMA, + "dscp %d dscp en %d ecn %d ecn en %d vlan pri %d vlan_pri_en %d\n", + in_params->dscp, in_params->dscp_en, + in_params->ecn, in_params->ecn_en, in_params->vlan_pri, + in_params->vlan_pri_en); + + /* Read global cfg to local */ + OSAL_MEMCPY(&glob_cfg, &p_hwfn->p_rdma_info->glob_cfg, + sizeof(glob_cfg)); + + if (glob_cfg_bits & ECORE_RDMA_DCSP_BIT_MASK) { + if (in_params->dscp > MAX_DSCP) { + DP_ERR(p_hwfn->p_dev, "invalid glob dscp %d\n", + in_params->dscp); + return ECORE_INVAL; + } + glob_cfg.dscp = in_params->dscp; + } + + if (glob_cfg_bits & ECORE_RDMA_DCSP_EN_BIT_MASK) { + if (in_params->dscp_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob_dscp_en %d\n", + in_params->dscp_en); + return ECORE_INVAL; + } + glob_cfg.dscp_en = in_params->dscp_en; + } + + if (glob_cfg_bits & ECORE_RDMA_ECN_BIT_MASK) { + if (in_params->ecn > INET_ECN_ECT_0) { + DP_ERR(p_hwfn->p_dev, "invalid glob ecn %d\n", + in_params->ecn); + return ECORE_INVAL; + } + glob_cfg.ecn = in_params->ecn; + } + + if (glob_cfg_bits & ECORE_RDMA_ECN_EN_BIT_MASK) { + if (in_params->ecn_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob ecn en %d\n", + in_params->ecn_en); + return ECORE_INVAL; + } + glob_cfg.ecn_en = in_params->ecn_en; + } + + if (glob_cfg_bits & ECORE_RDMA_VLAN_PRIO_BIT_MASK) { + if (in_params->vlan_pri > MAX_VLAN_PRIO) { + DP_ERR(p_hwfn->p_dev, "invalid glob vlan pri %d\n", + in_params->vlan_pri); + return ECORE_INVAL; + } + glob_cfg.vlan_pri = in_params->vlan_pri; + } + + if (glob_cfg_bits & ECORE_RDMA_VLAN_PRIO_EN_BIT_MASK) { + if (in_params->vlan_pri_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob vlan pri en %d\n", + in_params->vlan_pri_en); + return ECORE_INVAL; + } + glob_cfg.vlan_pri_en = in_params->vlan_pri_en; + } + + /* Write back local cfg to global */ + OSAL_MEMCPY(&p_hwfn->p_rdma_info->glob_cfg, &glob_cfg, + sizeof(glob_cfg)); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_get_glob_cfg(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_glob_cfg *out_params) +{ + OSAL_MEMCPY(out_params, &p_hwfn->p_rdma_info->glob_cfg, + sizeof(struct ecore_rdma_glob_cfg)); + + return ECORE_SUCCESS; +} +#endif /* LINUX_REMOVE */ diff --git a/sys/dev/qlnx/qlnxe/ecore_roce.c b/sys/dev/qlnx/qlnxe/ecore_roce.c new file mode 100644 index 000000000000..7a5d1f6c38e9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_roce.c @@ -0,0 +1,1579 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_roce.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_int.h" +#include "pcics_reg_driver.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28167) +#pragma warning(disable : 28123) +#pragma warning(disable : 28182) +#pragma warning(disable : 6011) +#endif + +static void ecore_roce_free_icid(struct ecore_hwfn *p_hwfn, u16 icid); + +static enum _ecore_status_t +ecore_roce_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 OSAL_UNUSED fw_return_code) +{ + if (fw_event_code == ROCE_ASYNC_EVENT_DESTROY_QP_DONE) { + u16 icid = (u16)OSAL_LE32_TO_CPU( + data->rdma_data.rdma_destroy_qp_data.cid); + + /* icid release in this async event can occur only if the icid + * was offloaded to the FW. In case it wasn't offloaded this is + * handled in ecore_roce_sp_destroy_qp. + */ + ecore_roce_free_icid(p_hwfn, icid); + } else + p_hwfn->p_rdma_info->events.affiliated_event( + p_hwfn->p_rdma_info->events.context, + fw_event_code, + (void *)&data->rdma_data.async_handle); + + return ECORE_SUCCESS; +} + + + +#ifdef CONFIG_DCQCN +static enum _ecore_status_t ecore_roce_start_rl( + struct ecore_hwfn *p_hwfn, + struct ecore_roce_dcqcn_params *dcqcn_params) +{ + struct ecore_rl_update_params params; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + OSAL_MEMSET(¶ms, 0, sizeof(params)); + + params.rl_id_first = (u8)RESC_START(p_hwfn, ECORE_RL); + params.rl_id_last = RESC_START(p_hwfn, ECORE_RL) + + ecore_init_qm_get_num_pf_rls(p_hwfn); + params.dcqcn_update_param_flg = 1; + params.rl_init_flg = 1; + params.rl_start_flg = 1; + params.rl_stop_flg = 0; + params.rl_dc_qcn_flg = 1; + + params.rl_bc_rate = dcqcn_params->rl_bc_rate; + params.rl_max_rate = dcqcn_params->rl_max_rate; + params.rl_r_ai = dcqcn_params->rl_r_ai; + params.rl_r_hai = dcqcn_params->rl_r_hai; + params.dcqcn_gd = dcqcn_params->dcqcn_gd; + params.dcqcn_k_us = dcqcn_params->dcqcn_k_us; + params.dcqcn_timeuot_us = dcqcn_params->dcqcn_timeout_us; + + return ecore_sp_rl_update(p_hwfn, ¶ms); +} + +enum _ecore_status_t ecore_roce_stop_rl(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rl_update_params params; + + if (!p_hwfn->p_rdma_info->roce.dcqcn_reaction_point) + return ECORE_SUCCESS; + + OSAL_MEMSET(¶ms, 0, sizeof(params)); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + + params.rl_id_first = (u8)RESC_START(p_hwfn, ECORE_RL); + params.rl_id_last = RESC_START(p_hwfn, ECORE_RL) + + ecore_init_qm_get_num_pf_rls(p_hwfn); + params.rl_stop_flg = 1; + + return ecore_sp_rl_update(p_hwfn, ¶ms); +} + +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_BTH 2 +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_ECN 1 + +enum _ecore_status_t ecore_roce_dcqcn_cfg( + struct ecore_hwfn *p_hwfn, + struct ecore_roce_dcqcn_params *params, + struct roce_init_func_ramrod_data *p_ramrod, + struct ecore_ptt *p_ptt) +{ + u32 val = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_hwfn->pf_params.rdma_pf_params.enable_dcqcn || + p_hwfn->p_rdma_info->proto == PROTOCOLID_IWARP) + return rc; + + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 0; + if (params->notification_point) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Configuring dcqcn notification point: timeout = 0x%x\n", + params->cnp_send_timeout); + p_ramrod->roce.cnp_send_timeout = params->cnp_send_timeout; + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 1; + /* Configure NIG to duplicate to host and storm when: + * - (ECN == 2'b11 (notification point) + */ + val |= 1 << NIG_REG_ROCE_DUPLICATE_TO_HOST_ECN; + } + + if (params->reaction_point) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Configuring dcqcn reaction point\n"); + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 1; + p_hwfn->p_rdma_info->roce.dcqcn_reaction_point = 1; + /* Configure NIG to duplicate to host and storm when: + * - BTH opcode equals bth_hdr_flow_ctrl_opcode_2 + * (reaction point) + */ + val |= 1 << NIG_REG_ROCE_DUPLICATE_TO_HOST_BTH; + + rc = ecore_roce_start_rl(p_hwfn, params); + } + + if (rc) + return rc; + + p_ramrod->roce.cnp_dscp = params->cnp_dscp; + p_ramrod->roce.cnp_vlan_priority = params->cnp_vlan_priority; + + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_ROCE_DUPLICATE_TO_HOST, + val); + + return rc; +} +#endif + + +enum _ecore_status_t ecore_roce_stop(struct ecore_hwfn *p_hwfn) +{ + struct ecore_bmap *cid_map = &p_hwfn->p_rdma_info->cid_map; + int wait_count = 0; + + /* when destroying a_RoCE QP the control is returned to the + * user after the synchronous part. The asynchronous part may + * take a little longer. We delay for a short while if an + * asyn destroy QP is still expected. Beyond the added delay + * we clear the bitmap anyway. + */ + while (OSAL_BITMAP_WEIGHT(cid_map->bitmap, cid_map->max_count)) { + OSAL_MSLEEP(100); + if (wait_count++ > 20) { + DP_NOTICE(p_hwfn, false, + "cid bitmap wait timed out\n"); + break; + } + } + + ecore_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ROCE); + + return ECORE_SUCCESS; +} + + +static void ecore_rdma_copy_gids(struct ecore_rdma_qp *qp, __le32 *src_gid, + __le32 *dst_gid) { + u32 i; + + if (qp->roce_mode == ROCE_V2_IPV4) { + /* The IPv4 addresses shall be aligned to the highest word. + * The lower words must be zero. + */ + OSAL_MEMSET(src_gid, 0, sizeof(union ecore_gid)); + OSAL_MEMSET(dst_gid, 0, sizeof(union ecore_gid)); + src_gid[3] = OSAL_CPU_TO_LE32(qp->sgid.ipv4_addr); + dst_gid[3] = OSAL_CPU_TO_LE32(qp->dgid.ipv4_addr); + } else { + /* RoCE, and RoCE v2 - IPv6: GIDs and IPv6 addresses coincide in + * location and size + */ + for (i = 0; i < OSAL_ARRAY_SIZE(qp->sgid.dwords); i++) { + src_gid[i] = OSAL_CPU_TO_LE32(qp->sgid.dwords[i]); + dst_gid[i] = OSAL_CPU_TO_LE32(qp->dgid.dwords[i]); + } + } +} + +static enum roce_flavor ecore_roce_mode_to_flavor(enum roce_mode roce_mode) +{ + enum roce_flavor flavor; + + switch (roce_mode) { + case ROCE_V1: + flavor = PLAIN_ROCE; + break; + case ROCE_V2_IPV4: + flavor = RROCE_IPV4; + break; + case ROCE_V2_IPV6: + flavor = (enum roce_flavor)ROCE_V2_IPV6; + break; + default: + flavor = (enum roce_flavor)MAX_ROCE_MODE; + break; + } + return flavor; +} + +#if 0 +static void ecore_roce_free_cid_pair(struct ecore_hwfn *p_hwfn, u16 cid) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, cid); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, cid + 1); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} +#endif + +static void ecore_roce_free_qp(struct ecore_hwfn *p_hwfn, u16 qp_idx) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, qp_idx); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +#define ECORE_ROCE_CREATE_QP_ATTEMPTS (20) +#define ECORE_ROCE_CREATE_QP_MSLEEP (10) + +static enum _ecore_status_t ecore_roce_wait_free_cids(struct ecore_hwfn *p_hwfn, u32 qp_idx) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + bool cids_free = false; + u32 icid, iter = 0; + int req, resp; + + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + + /* Make sure that the cids that were used by the QP index are free. + * This is necessary because the destroy flow returns to the user before + * the device finishes clean up. + * It can happen in the following flows: + * (1) ib_destroy_qp followed by an ib_create_qp + * (2) ib_modify_qp to RESET followed (not immediately), by an + * ib_modify_qp to RTR + */ + + do { + OSAL_SPIN_LOCK(&p_rdma_info->lock); + resp = ecore_bmap_test_id(p_hwfn, &p_rdma_info->cid_map, icid); + req = ecore_bmap_test_id(p_hwfn, &p_rdma_info->cid_map, icid + 1); + if (!resp && !req) + cids_free = true; + + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + + if (!cids_free) { + OSAL_MSLEEP(ECORE_ROCE_CREATE_QP_MSLEEP); + iter++; + } + } while (!cids_free && iter < ECORE_ROCE_CREATE_QP_ATTEMPTS); + + if (!cids_free) { + DP_ERR(p_hwfn->p_dev, + "responder and/or requester CIDs are still in use. resp=%d, req=%d\n", + resp, req); + return ECORE_AGAIN; + } + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_alloc_qp_idx( + struct ecore_hwfn *p_hwfn, u16 *qp_idx16) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 start_cid, icid, cid, qp_idx; + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->qp_map, &qp_idx); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "failed to allocate qp\n"); + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + return rc; + } + + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + + /* Verify the cid bits that of this qp index are clear */ + rc = ecore_roce_wait_free_cids(p_hwfn, qp_idx); + if (rc) { + rc = ECORE_UNKNOWN_ERROR; + goto err; + } + + /* Allocate a DMA-able context for an ILT page, if not existing, for the + * associated iids. + * Note: If second allocation fails there's no need to free the first as + * it will be used in the future. + */ + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + start_cid = ecore_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto); + cid = start_cid + icid; + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, cid); + if (rc != ECORE_SUCCESS) + goto err; + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, cid + 1); + if (rc != ECORE_SUCCESS) + goto err; + + /* qp index is under 2^16 */ + *qp_idx16 = (u16)qp_idx; + + return ECORE_SUCCESS; + +err: + ecore_roce_free_qp(p_hwfn, (u16)qp_idx); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; +} + +static void ecore_roce_set_cid(struct ecore_hwfn *p_hwfn, + u32 cid) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_set_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static enum _ecore_status_t ecore_roce_sp_create_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct roce_create_qp_resp_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; + struct ecore_sp_init_data init_data; + enum roce_flavor roce_flavor; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 cid_start; + u16 fw_srq_id; + bool is_xrc; + + if (!qp->has_resp) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "qp_idx = %08x\n", qp->qp_idx); + + /* Allocate DMA-able memory for IRQ */ + qp->irq_num_pages = 1; + qp->irq = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->irq_phys_addr, + RDMA_RING_PAGE_SIZE); + if (!qp->irq) { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore create responder failed: cannot allocate memory (irq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_resp; + + p_ramrod->flags = 0; + + roce_flavor = ecore_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, + roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, + qp->use_srq); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG, + ecore_rdma_is_xrc_qp(qp)); + + /* TBD: future use only + * #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK + * #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT + */ + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = + qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->irq_num_pages = qp->irq_num_pages; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->dst_qp_id = OSAL_CPU_TO_LE32(qp->dest_qp); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + p_ramrod->initial_psn = OSAL_CPU_TO_LE32(qp->rq_psn); + p_ramrod->pd = OSAL_CPU_TO_LE16(qp->pd); + p_ramrod->rq_num_pages = OSAL_CPU_TO_LE16(qp->rq_num_pages); + DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = + OSAL_CPU_TO_LE32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = + OSAL_CPU_TO_LE32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + p_ramrod->cq_cid = OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); + p_ramrod->xrc_domain = OSAL_CPU_TO_LE16(qp->xrcd_id); + +#ifdef CONFIG_DCQCN + /* when dcqcn is enabled physical queues are determined accoridng to qp id */ + if (p_hwfn->p_rdma_info->roce.dcqcn_enabled) + regular_latency_queue = + ecore_get_cm_pq_idx_rl(p_hwfn, + (qp->icid >> 1) % + ROCE_DCQCN_RP_MAX_QPS); + else +#endif + regular_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + low_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT); + + p_ramrod->regular_latency_phy_queue = OSAL_CPU_TO_LE16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = OSAL_CPU_TO_LE16(low_latency_queue); + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + ecore_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + ecore_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = OSAL_CPU_TO_LE16(qp->vlan_id); + is_xrc = ecore_rdma_is_xrc_qp(qp); + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, qp->srq_id, is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(p_hwfn->hw_info.opaque_fid); + + p_ramrod->stats_counter_id = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d regular physical queue = 0x%x, low latency physical queue 0x%x\n", + rc, regular_latency_queue, low_latency_queue); + + if (rc != ECORE_SUCCESS) + goto err; + + qp->resp_offloaded = true; + qp->cq_prod.resp = 0; + + cid_start = ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + ecore_roce_set_cid(p_hwfn, qp->icid - cid_start); + + return rc; + +err: + DP_NOTICE(p_hwfn, false, "create responder - failed, rc = %d\n", rc); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->irq, + qp->irq_phys_addr, + qp->irq_num_pages * + RDMA_RING_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_create_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct roce_create_qp_req_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; + struct ecore_sp_init_data init_data; + enum roce_flavor roce_flavor; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 cid_start; + + if (!qp->has_req) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for ORQ */ + qp->orq_num_pages = 1; + qp->orq = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->orq_phys_addr, + RDMA_RING_PAGE_SIZE); + if (!qp->orq) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore create requester failed: cannot allocate memory (orq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_req; + + p_ramrod->flags = 0; + + roce_flavor = ecore_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, + roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, + qp->signal_all); + + /* TBD: + * future use only + * #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_MASK + * #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_SHIFT + */ + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, + qp->retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG, + ecore_rdma_is_xrc_qp(qp)); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->orq_num_pages = qp->orq_num_pages; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->dst_qp_id = OSAL_CPU_TO_LE32(qp->dest_qp); + p_ramrod->ack_timeout_val = OSAL_CPU_TO_LE32(qp->ack_timeout); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + p_ramrod->initial_psn = OSAL_CPU_TO_LE32(qp->sq_psn); + p_ramrod->pd = OSAL_CPU_TO_LE16(qp->pd); + p_ramrod->sq_num_pages = OSAL_CPU_TO_LE16(qp->sq_num_pages); + DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = + OSAL_CPU_TO_LE32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = + OSAL_CPU_TO_LE32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + p_ramrod->cq_cid = OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + +#ifdef CONFIG_DCQCN + /* when dcqcn is enabled physical queues are determined accoridng to qp id */ + if (p_hwfn->p_rdma_info->roce.dcqcn_enabled) + regular_latency_queue = + ecore_get_cm_pq_idx_rl(p_hwfn, + (qp->icid >> 1) % + ROCE_DCQCN_RP_MAX_QPS); + else +#endif + regular_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + low_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT); + + p_ramrod->regular_latency_phy_queue = OSAL_CPU_TO_LE16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = OSAL_CPU_TO_LE16(low_latency_queue); + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + ecore_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + ecore_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = OSAL_CPU_TO_LE16(qp->vlan_id); + p_ramrod->stats_counter_id = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + if (rc != ECORE_SUCCESS) + goto err; + + qp->req_offloaded = true; + qp->cq_prod.req = 0; + + cid_start = ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + ecore_roce_set_cid(p_hwfn, qp->icid + 1 - cid_start); + + return rc; + +err: + DP_NOTICE(p_hwfn, false, "Create requested - failed, rc = %d\n", rc); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->orq, + qp->orq_phys_addr, + qp->orq_num_pages * + RDMA_RING_PAGE_SIZE); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_modify_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + bool move_to_err, + u32 modify_flags) +{ + struct roce_modify_qp_resp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!qp->has_resp) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !qp->resp_offloaded) + return ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + { + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_resp; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, + move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)); + + /* TBD: future use only + * #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_MASK + * #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_SHIFT + */ + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify responder, rc = %d\n", rc); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_modify_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + bool move_to_sqd, + bool move_to_err, + u32 modify_flags) +{ + struct roce_modify_qp_req_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!qp->has_req) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !(qp->req_offloaded)) + return ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_req; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, + move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, + move_to_sqd); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY, + qp->sqd_async); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)); + + /* TBD: future use only + * #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_MASK + * #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_SHIFT + */ + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, + qp->retry_cnt); + + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->ack_timeout_val = OSAL_CPU_TO_LE32(qp->ack_timeout); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify requester, rc = %d\n", rc); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_destroy_qp_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + u32 *num_invalidated_mw, + u32 *cq_prod) +{ + struct roce_destroy_qp_resp_output_params *p_ramrod_res; + struct roce_destroy_qp_resp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc; + + if (!qp->has_resp) { + *num_invalidated_mw = 0; + *cq_prod = 0; + return ECORE_SUCCESS; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + *num_invalidated_mw = 0; + + if (!qp->resp_offloaded) { + *cq_prod = qp->cq_prod.resp; + return ECORE_SUCCESS; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp; + + p_ramrod_res = (struct roce_destroy_qp_resp_output_params *)OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &ramrod_res_phys, sizeof(*p_ramrod_res)); + + if (!p_ramrod_res) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + *num_invalidated_mw + = OSAL_LE32_TO_CPU(p_ramrod_res->num_invalidated_mw); + *cq_prod = OSAL_LE32_TO_CPU(p_ramrod_res->cq_prod); + qp->cq_prod.resp = *cq_prod; + + /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->irq, + qp->irq_phys_addr, + qp->irq_num_pages * + RDMA_RING_PAGE_SIZE); + + qp->resp_offloaded = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroy responder, rc = %d\n", rc); + + /* "fall through" */ + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_destroy_qp_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + u32 *num_bound_mw, + u32 *cq_prod) +{ + struct roce_destroy_qp_req_output_params *p_ramrod_res; + struct roce_destroy_qp_req_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc; + + if (!qp->has_req) { + *num_bound_mw = 0; + *cq_prod = 0; + return ECORE_SUCCESS; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->req_offloaded) { + *cq_prod = qp->cq_prod.req; + return ECORE_SUCCESS; + } + + p_ramrod_res = (struct roce_destroy_qp_req_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(*p_ramrod_res)); + if (!p_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore destroy requester failed: cannot allocate memory (ramrod)\n"); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_req; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + *num_bound_mw = OSAL_LE32_TO_CPU(p_ramrod_res->num_bound_mw); + *cq_prod = OSAL_LE32_TO_CPU(p_ramrod_res->cq_prod); + qp->cq_prod.req = *cq_prod; + + /* Free ORQ - only if ramrod succeeded, in case FW is still using it */ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->orq, + qp->orq_phys_addr, + qp->orq_num_pages * + RDMA_RING_PAGE_SIZE); + + qp->req_offloaded = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroy requester, rc = %d\n", rc); + + /* "fall through" */ + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_roce_sp_query_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + struct roce_query_qp_resp_output_params *p_resp_ramrod_res; + struct roce_query_qp_resp_ramrod_data *p_resp_ramrod; + struct ecore_sp_init_data init_data; + dma_addr_t resp_ramrod_res_phys; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc = ECORE_SUCCESS; + bool error_flag; + + if (!qp->resp_offloaded) { + /* Don't send query qp for the responder */ + out_params->rq_psn = qp->rq_psn; + + return ECORE_SUCCESS; + } + + /* Send a query responder ramrod to the FW */ + p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &resp_ramrod_res_phys, + sizeof(*p_resp_ramrod_res)); + if (!p_resp_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore query qp failed: cannot allocate memory (ramrod)\n"); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp; + DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->rq_psn = OSAL_LE32_TO_CPU(p_resp_ramrod_res->psn); + error_flag = GET_FIELD( + OSAL_LE32_TO_CPU(p_resp_ramrod_res->err_flag), + ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + if (error_flag) + qp->cur_state = ECORE_ROCE_QP_STATE_ERR; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_resp_ramrod_res, + resp_ramrod_res_phys, + sizeof(*p_resp_ramrod_res)); + + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_roce_sp_query_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params, + bool *sq_draining) +{ + struct roce_query_qp_req_output_params *p_req_ramrod_res; + struct roce_query_qp_req_ramrod_data *p_req_ramrod; + struct ecore_sp_init_data init_data; + dma_addr_t req_ramrod_res_phys; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc = ECORE_SUCCESS; + bool error_flag; + + if (!qp->req_offloaded) + { + /* Don't send query qp for the requester */ + out_params->sq_psn = qp->sq_psn; + out_params->draining = false; + + *sq_draining = 0; + + return ECORE_SUCCESS; + } + + /* Send a query requester ramrod to the FW */ + p_req_ramrod_res = (struct roce_query_qp_req_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &req_ramrod_res_phys, + sizeof(*p_req_ramrod_res)); + if (!p_req_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore query qp failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + init_data.cid = qp->icid + 1; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_req_ramrod = &p_ent->ramrod.roce_query_qp_req; + DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->sq_psn = OSAL_LE32_TO_CPU(p_req_ramrod_res->psn); + error_flag = GET_FIELD(OSAL_LE32_TO_CPU(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); + if (error_flag) + qp->cur_state = ECORE_ROCE_QP_STATE_ERR; + else + *sq_draining = GET_FIELD( + OSAL_LE32_TO_CPU(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_req_ramrod_res, + req_ramrod_res_phys, sizeof(*p_req_ramrod_res)); + + return rc; +} + +enum _ecore_status_t ecore_roce_query_qp( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + enum _ecore_status_t rc; + + rc = ecore_roce_sp_query_responder(p_hwfn, qp, out_params); + if (rc) + return rc; + + rc = ecore_roce_sp_query_requester(p_hwfn, qp, out_params, + &out_params->draining); + if (rc) + return rc; + + out_params->state = qp->cur_state; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_destroy_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_destroy_qp_out_params *out_params) +{ + u32 cq_prod_resp = qp->cq_prod.resp, cq_prod_req = qp->cq_prod.req; + u32 num_invalidated_mw = 0; + u32 num_bound_mw = 0; + enum _ecore_status_t rc; + + /* Destroys the specified QP + * Note: if qp state != RESET/ERR/INIT then upper driver first need to + * call modify qp to move the qp to ERR state + */ + if ((qp->cur_state != ECORE_ROCE_QP_STATE_RESET) && + (qp->cur_state != ECORE_ROCE_QP_STATE_ERR) && + (qp->cur_state != ECORE_ROCE_QP_STATE_INIT)) + { + DP_NOTICE(p_hwfn, + true, + "QP must be in error, reset or init state before destroying it\n"); + return ECORE_INVAL; + } + + if (qp->cur_state != ECORE_ROCE_QP_STATE_RESET) { + rc = ecore_roce_sp_destroy_qp_responder(p_hwfn, + qp, + &num_invalidated_mw, + &cq_prod_resp); + if (rc != ECORE_SUCCESS) + return rc; + + /* Send destroy requester ramrod */ + rc = ecore_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw, + &cq_prod_req); + if (rc != ECORE_SUCCESS) + return rc; + + /* resp_ofload was true, num_invalidated_mw is valid */ + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + true, + "number of invalidate memory windows is different from bounded ones\n"); + return ECORE_INVAL; + } + } + + ecore_roce_free_qp(p_hwfn, qp->qp_idx); + + out_params->rq_cq_prod = cq_prod_resp; + out_params->sq_cq_prod = cq_prod_req; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_destroy_ud_qp(void *rdma_cxt, u16 cid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!rdma_cxt) { + DP_ERR(p_hwfn->p_dev, + "destroy ud qp failed due to NULL rdma_cxt\n"); + return ECORE_INVAL; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_UD_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + ecore_roce_free_qp(p_hwfn, ECORE_ROCE_ICID_TO_QP(cid)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "freed a ud qp with cid=%d\n", cid); + + return ECORE_SUCCESS; + +err: + DP_ERR(p_hwfn, "failed destroying a ud qp with cid=%d\n", cid); + + return rc; +} + + +enum _ecore_status_t ecore_roce_create_ud_qp(void *rdma_cxt, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u16 icid, qp_idx; + + if (!rdma_cxt || !out_params) { + DP_ERR(p_hwfn->p_dev, + "ecore roce create ud qp failed due to NULL entry (rdma_cxt=%p, out=%p)\n", + rdma_cxt, out_params); + return ECORE_INVAL; + } + + rc = ecore_roce_alloc_qp_idx(p_hwfn, &qp_idx); + if (rc != ECORE_SUCCESS) + goto err; + + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_UD_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err1; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err1; + + out_params->icid = icid; + out_params->qp_id = ((0xFF << 16) | icid); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "created a ud qp with icid=%d\n", + icid); + + return ECORE_SUCCESS; + +err1: + ecore_roce_free_qp(p_hwfn, qp_idx); + +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "failed creating a ud qp\n"); + + return rc; +} + + +enum _ecore_status_t +ecore_roce_modify_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + enum ecore_roce_qp_state prev_state, + struct ecore_rdma_modify_qp_in_params *params) +{ + u32 num_invalidated_mw = 0, num_bound_mw = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + /* Perform additional operations according to the current state and the + * next state + */ + if (((prev_state == ECORE_ROCE_QP_STATE_INIT) || + (prev_state == ECORE_ROCE_QP_STATE_RESET)) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTR)) + { + /* Init->RTR or Reset->RTR */ + + /* Verify the cid bits that of this qp index are clear */ + rc = ecore_roce_wait_free_cids(p_hwfn, qp->qp_idx); + if (rc) + return rc; + + rc = ecore_roce_sp_create_responder(p_hwfn, qp); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTR) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* RTR-> RTS */ + rc = ecore_roce_sp_create_requester(p_hwfn, qp); + if (rc != ECORE_SUCCESS) + return rc; + + /* Send modify responder ramrod */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTS) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* RTS->RTS */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTS) && + (qp->cur_state == ECORE_ROCE_QP_STATE_SQD)) + { + /* RTS->SQD */ + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, true, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_SQD) && + (qp->cur_state == ECORE_ROCE_QP_STATE_SQD)) + { + /* SQD->SQD */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_SQD) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* SQD->RTS */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + + return rc; + } else if (qp->cur_state == ECORE_ROCE_QP_STATE_ERR) { + /* ->ERR */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, true, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, true, + params->modify_flags); + return rc; + + } else if (qp->cur_state == ECORE_ROCE_QP_STATE_RESET) { + /* Any state -> RESET */ + + /* Send destroy responder ramrod */ + rc = ecore_roce_sp_destroy_qp_responder(p_hwfn, qp, + &num_invalidated_mw, + &qp->cq_prod.resp); + + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw, + &qp->cq_prod.req); + + + if (rc != ECORE_SUCCESS) + return rc; + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + true, + "number of invalidate memory windows is different from bounded ones\n"); + return ECORE_INVAL; + } + } else { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_SUCCESS\n"); + } + + return rc; +} + +static void ecore_roce_free_icid(struct ecore_hwfn *p_hwfn, u16 icid) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 start_cid, cid; + + start_cid = ecore_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto); + cid = icid - start_cid; + + OSAL_SPIN_LOCK(&p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static void ecore_rdma_dpm_conf(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 val; + + val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1; + + ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val); + DP_VERBOSE(p_hwfn, (ECORE_MSG_DCB | ECORE_MSG_RDMA), + "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n", + val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm); +} + +/* This function disables EDPM due to DCBx considerations */ +void ecore_roce_dpm_dcbx(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + u8 val; + + /* if any QPs are already active, we want to disable DPM, since their + * context information contains information from before the latest DCBx + * update. Otherwise enable it. + */ + val = (ecore_rdma_allocated_qps(p_hwfn)) ? true : false; + p_hwfn->dcbx_no_edpm = (u8)val; + + ecore_rdma_dpm_conf(p_hwfn, p_ptt); +} + +/* This function disables EDPM due to doorbell bar considerations */ +void ecore_rdma_dpm_bar(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + p_hwfn->db_bar_no_edpm = true; + + ecore_rdma_dpm_conf(p_hwfn, p_ptt); +} + +enum _ecore_status_t ecore_roce_setup(struct ecore_hwfn *p_hwfn) +{ + return ecore_spq_register_async_cb(p_hwfn, PROTOCOLID_ROCE, + ecore_roce_async_event); +} + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/qlnx_rdma.c b/sys/dev/qlnx/qlnxe/qlnx_rdma.c new file mode 100644 index 000000000000..dc105e1e9e45 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/qlnx_rdma.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : qlnx_rdma.c + * Author: David C Somayajulu + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + + +#include "qlnx_os.h" +#include "bcm_osal.h" + +#include "reg_addr.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore.h" +#include "ecore_chain.h" +#include "ecore_status.h" +#include "ecore_hw.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_spq.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_sp_commands.h" +#include "ecore_dev_api.h" +#include "ecore_l2_api.h" +#ifdef CONFIG_ECORE_SRIOV +#include "ecore_sriov.h" +#include "ecore_vf.h" +#endif +#ifdef CONFIG_ECORE_LL2 +#include "ecore_ll2.h" +#endif +#ifdef CONFIG_ECORE_FCOE +#include "ecore_fcoe.h" +#endif +#ifdef CONFIG_ECORE_ISCSI +#include "ecore_iscsi.h" +#endif +#include "ecore_mcp.h" +#include "ecore_hw_defs.h" +#include "mcp_public.h" + +#ifdef CONFIG_ECORE_RDMA +#include "ecore_rdma.h" +#endif + +#ifdef CONFIG_ECORE_ROCE +#include "ecore_roce.h" +#endif + +#ifdef CONFIG_ECORE_IWARP +#include "ecore_iwarp.h" +#endif + +#include "ecore_iro.h" +#include "nvm_cfg.h" +#include "ecore_dev_api.h" +#include "ecore_dbg_fw_funcs.h" + +#include "qlnx_ioctl.h" +#include "qlnx_def.h" +#include "qlnx_rdma.h" +#include "qlnx_ver.h" +#include <sys/smp.h> + +struct mtx qlnx_rdma_dev_lock; +struct qlnx_rdma_if *qlnx_rdma_if = NULL; + +qlnx_host_t *qlnx_host_list = NULL; + +void +qlnx_rdma_init(void) +{ + if (!mtx_initialized(&qlnx_rdma_dev_lock)) { + mtx_init(&qlnx_rdma_dev_lock, "qlnx_rdma_dev_lock", NULL, MTX_DEF); + } + return; +} + +void +qlnx_rdma_deinit(void) +{ + if (mtx_initialized(&qlnx_rdma_dev_lock) && (qlnx_host_list == NULL)) { + mtx_destroy(&qlnx_rdma_dev_lock); + } + return; +} + +static void +_qlnx_rdma_dev_add(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + ha->qlnx_rdma = qlnx_rdma_if->add(ha); + + QL_DPRINT12(ha, "exit (ha = %p, qlnx_rdma = %p)\n", ha, ha->qlnx_rdma); + return; +} + +void +qlnx_rdma_dev_add(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p\n", ha); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + mtx_lock(&qlnx_rdma_dev_lock); + + if (qlnx_host_list == NULL) { + qlnx_host_list = ha; + ha->next = NULL; + } else { + ha->next = qlnx_host_list; + qlnx_host_list = ha; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + _qlnx_rdma_dev_add(ha); + + QL_DPRINT12(ha, "exit (%p)\n", ha); + + return; +} + +static int +_qlnx_rdma_dev_remove(struct qlnx_host *ha) +{ + int ret = 0; + + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return (ret); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return (ret); + + ret = qlnx_rdma_if->remove(ha, ha->qlnx_rdma); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return (ret); +} + +int +qlnx_rdma_dev_remove(struct qlnx_host *ha) +{ + int ret = 0; + qlnx_host_t *ha_prev; + qlnx_host_t *ha_cur; + + QL_DPRINT12(ha, "enter ha = %p\n", ha); + + if ((qlnx_host_list == NULL) || (ha == NULL)) + return (ret); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return (ret); + + ret = _qlnx_rdma_dev_remove(ha); + + if (ret) + return (ret); + + mtx_lock(&qlnx_rdma_dev_lock); + + if (qlnx_host_list == ha) { + qlnx_host_list = ha->next; + ha->next = NULL; + mtx_unlock(&qlnx_rdma_dev_lock); + QL_DPRINT12(ha, "exit0 ha = %p\n", ha); + return (ret); + } + + ha_prev = ha_cur = qlnx_host_list; + + while ((ha_cur != ha) && (ha_cur != NULL)) { + ha_prev = ha_cur; + ha_cur = ha_cur->next; + } + + if (ha_cur == ha) { + ha_prev = ha->next; + ha->next = NULL; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + QL_DPRINT12(ha, "exit1 ha = %p\n", ha); + return (ret); +} + +int +qlnx_rdma_register_if(qlnx_rdma_if_t *rdma_if) +{ + qlnx_host_t *ha; + + if (mtx_initialized(&qlnx_rdma_dev_lock)) { + + mtx_lock(&qlnx_rdma_dev_lock); + qlnx_rdma_if = rdma_if; + + ha = qlnx_host_list; + + while (ha != NULL) { + _qlnx_rdma_dev_add(ha); + ha = ha->next; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + return (0); + } + + return (-1); +} + +int +qlnx_rdma_deregister_if(qlnx_rdma_if_t *rdma_if) +{ + int ret = 0; + qlnx_host_t *ha; + + printf("%s: enter rdma_if = %p\n", __func__, rdma_if); + + if (mtx_initialized(&qlnx_rdma_dev_lock)) { + + mtx_lock(&qlnx_rdma_dev_lock); + + ha = qlnx_host_list; + + while (ha != NULL) { + + mtx_unlock(&qlnx_rdma_dev_lock); + + if (ha->dbg_level & 0xF000) + ret = EBUSY; + else + ret = _qlnx_rdma_dev_remove(ha); + + device_printf(ha->pci_dev, "%s [%d]: ret = 0x%x\n", + __func__, __LINE__, ret); + if (ret) + return (ret); + + mtx_lock(&qlnx_rdma_dev_lock); + + ha->qlnx_rdma = NULL; + + ha = ha->next; + } + + if (!ret) + qlnx_rdma_if = NULL; + + mtx_unlock(&qlnx_rdma_dev_lock); + + } + printf("%s: exit rdma_if = %p\n", __func__, rdma_if); + + return (ret); +} + + +void +qlnx_rdma_dev_open(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + qlnx_rdma_if->notify(ha, ha->qlnx_rdma, QLNX_ETHDEV_UP); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return; +} + + +void +qlnx_rdma_dev_close(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + qlnx_rdma_if->notify(ha, ha->qlnx_rdma, QLNX_ETHDEV_DOWN); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return; +} + +int +qlnx_rdma_get_num_irqs(struct qlnx_host *ha) +{ + return (QLNX_NUM_CNQ + ecore_rdma_get_sb_id(&ha->cdev.hwfns[0], 0) + 2); +} + + diff --git a/sys/dev/qlnx/qlnxe/qlnx_rdma.h b/sys/dev/qlnx/qlnxe/qlnx_rdma.h new file mode 100644 index 000000000000..03c43c8b8201 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/qlnx_rdma.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +/* + * File: qlnx_rdma.h + * Author: David C Somayajulu + */ + +#ifndef _QLNX_RDMA_H_ +#define _QLNX_RDMA_H_ + +enum qlnx_rdma_event { + QLNX_ETHDEV_UP = 0x10, + QLNX_ETHDEV_DOWN = 0x11, + QLNX_ETHDEV_CHANGE_ADDR = 0x12 +}; + +struct qlnx_rdma_if { + void * (*add)(void *ha); + int (*remove)(void *ha, void *qlnx_rdma_dev); + void (*notify)(void *ha, void *qlnx_rdma_dev, enum qlnx_rdma_event); +}; +typedef struct qlnx_rdma_if qlnx_rdma_if_t; + +extern int qlnx_rdma_register_if(qlnx_rdma_if_t *rdma_if); +extern int qlnx_rdma_deregister_if(qlnx_rdma_if_t *rdma_if); + +#define QLNX_NUM_CNQ 1 + +extern int qlnx_rdma_get_num_irqs(struct qlnx_host *ha); +extern void qlnx_rdma_dev_add(struct qlnx_host *ha); +extern void qlnx_rdma_dev_open(struct qlnx_host *ha); +extern void qlnx_rdma_dev_close(struct qlnx_host *ha); +extern int qlnx_rdma_dev_remove(struct qlnx_host *ha); +extern void qlnx_rdma_changeaddr(struct qlnx_host *ha); + +extern void qlnx_rdma_init(void); +extern void qlnx_rdma_deinit(void); + +#endif /* #ifndef _QLNX_RDMA_H_ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_cm.c b/sys/dev/qlnx/qlnxr/qlnxr_cm.c new file mode 100644 index 000000000000..23c8c3000765 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_cm.c @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" +#include "rdma_common.h" +#include "qlnxr_cm.h" + +void +qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info) +{ + info->gsi_cons = (info->gsi_cons + 1) % info->max_wr; +} + +void +qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs) +{ + QL_DPRINT12(dev->ha, "enter\n"); + + dev->gsi_qp_created = 1; + dev->gsi_sqcq = get_qlnxr_cq((attrs->send_cq)); + dev->gsi_rqcq = get_qlnxr_cq((attrs->recv_cq)); + dev->gsi_qp = qp; + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void +qlnxr_ll2_complete_tx_packet(void *cxt, + uint8_t connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt; + struct ecore_roce_ll2_packet *pkt = cookie; + struct qlnxr_cq *cq = dev->gsi_sqcq; + struct qlnxr_qp *qp = dev->gsi_qp; + unsigned long flags; + + QL_DPRINT12(dev->ha, "enter\n"); + + qlnx_dma_free_coherent(&dev->ha->cdev, pkt->header.vaddr, + pkt->header.baddr, pkt->header.len); + kfree(pkt); + + spin_lock_irqsave(&qp->q_lock, flags); + + qlnxr_inc_sw_gsi_cons(&qp->sq); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void +qlnxr_ll2_complete_rx_packet(void *cxt, + struct ecore_ll2_comp_rx_data *data) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt; + struct qlnxr_cq *cq = dev->gsi_rqcq; + // struct qlnxr_qp *qp = dev->gsi_qp; + struct qlnxr_qp *qp = NULL; + unsigned long flags; + uint32_t qp_num = 0; + // uint32_t delay_count = 0, gsi_cons = 0; + //void * dest_va; + + QL_DPRINT12(dev->ha, "enter\n"); + + if (data->u.data_length_error) { + /* TODO: add statistic */ + } + + if (data->cookie == NULL) { + QL_DPRINT12(dev->ha, "cookie is NULL, bad sign\n"); + } + + qp_num = (0xFF << 16) | data->qp_id; + + if (data->qp_id == 1) { + qp = dev->gsi_qp; + } else { + /* TODO: This will be needed for UD QP support */ + /* For RoCEv1 this is invalid */ + QL_DPRINT12(dev->ha, "invalid QP\n"); + return; + } + /* note: currently only one recv sg is supported */ + QL_DPRINT12(dev->ha, "MAD received on QP : %x\n", data->rx_buf_addr); + + spin_lock_irqsave(&qp->q_lock, flags); + + qp->rqe_wr_id[qp->rq.gsi_cons].rc = + data->u.data_length_error ? -EINVAL : 0; + qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan; + /* note: length stands for data length i.e. GRH is excluded */ + qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = + data->length.data_length; + *((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) = + ntohl(data->opaque_data_0); + *((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) = + ntohs((u16)data->opaque_data_1); + + qlnxr_inc_sw_gsi_cons(&qp->rq); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void qlnxr_ll2_release_rx_packet(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + bool b_last_packet) +{ + /* Do nothing... */ +} + +static void +qlnxr_destroy_gsi_cq(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_destroy_cq_in_params iparams; + struct ecore_rdma_destroy_cq_out_params oparams; + struct qlnxr_cq *cq; + + QL_DPRINT12(dev->ha, "enter\n"); + + cq = get_qlnxr_cq((attrs->send_cq)); + iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + + cq = get_qlnxr_cq((attrs->recv_cq)); + /* if a dedicated recv_cq was used, delete it too */ + if (iparams.icid != cq->icid) { + iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + } + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +static inline int +qlnxr_check_gsi_qp_attrs(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + QL_DPRINT12(dev->ha, "enter\n"); + + if (attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE)\n"); + return -EINVAL; + } + + if (attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR)\n"); + return -EINVAL; + } + + if (attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR)\n"); + return -EINVAL; + } + + QL_DPRINT12(dev->ha, "exit\n"); + + return 0; +} + + +static int +qlnxr_ll2_post_tx(struct qlnxr_dev *dev, struct ecore_roce_ll2_packet *pkt) +{ + enum ecore_ll2_roce_flavor_type roce_flavor; + struct ecore_ll2_tx_pkt_info ll2_tx_pkt; + int rc; + int i; + + QL_DPRINT12(dev->ha, "enter\n"); + + memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt)); + + if (pkt->roce_mode != ROCE_V1) { + QL_DPRINT11(dev->ha, "roce_mode != ROCE_V1\n"); + return (-1); + } + + roce_flavor = (pkt->roce_mode == ROCE_V1) ? + ECORE_LL2_ROCE : ECORE_LL2_RROCE; + + ll2_tx_pkt.num_of_bds = 1 /* hdr */ + pkt->n_seg; + ll2_tx_pkt.vlan = 0; /* ??? */ + ll2_tx_pkt.tx_dest = ECORE_LL2_TX_DEST_NW; + ll2_tx_pkt.ecore_roce_flavor = roce_flavor; + ll2_tx_pkt.first_frag = pkt->header.baddr; + ll2_tx_pkt.first_frag_len = pkt->header.len; + ll2_tx_pkt.cookie = pkt; + ll2_tx_pkt.enable_ip_cksum = 1; // Only for RoCEv2:IPv4 + + /* tx header */ + rc = ecore_ll2_prepare_tx_packet(dev->rdma_ctx, + dev->gsi_ll2_handle, + &ll2_tx_pkt, + 1); + if (rc) { + + QL_DPRINT11(dev->ha, "ecore_ll2_prepare_tx_packet failed\n"); + + /* TX failed while posting header - release resources*/ + qlnx_dma_free_coherent(&dev->ha->cdev, + pkt->header.vaddr, + pkt->header.baddr, + pkt->header.len); + + kfree(pkt); + + return rc; + } + + /* tx payload */ + for (i = 0; i < pkt->n_seg; i++) { + rc = ecore_ll2_set_fragment_of_tx_packet(dev->rdma_ctx, + dev->gsi_ll2_handle, + pkt->payload[i].baddr, + pkt->payload[i].len); + if (rc) { + /* if failed not much to do here, partial packet has + * been posted we can't free memory, will need to wait + * for completion + */ + QL_DPRINT11(dev->ha, + "ecore_ll2_set_fragment_of_tx_packet failed\n"); + return rc; + } + } + struct ecore_ll2_stats stats = {0}; + rc = ecore_ll2_get_stats(dev->rdma_ctx, dev->gsi_ll2_handle, &stats); + if (rc) { + QL_DPRINT11(dev->ha, "failed to obtain ll2 stats\n"); + } + QL_DPRINT12(dev->ha, "exit\n"); + + return 0; +} + +int +qlnxr_ll2_stop(struct qlnxr_dev *dev) +{ + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + if (dev->gsi_ll2_handle == 0xFF) + return 0; + + /* remove LL2 MAC address filter */ + rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, + dev->gsi_ll2_mac_address, NULL); + + rc = ecore_ll2_terminate_connection(dev->rdma_ctx, + dev->gsi_ll2_handle); + + ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle); + + dev->gsi_ll2_handle = 0xFF; + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +} + +int qlnxr_ll2_start(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp) +{ + struct ecore_ll2_acquire_data data; + struct ecore_ll2_cbs cbs; + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + /* configure and start LL2 */ + cbs.rx_comp_cb = qlnxr_ll2_complete_rx_packet; + cbs.tx_comp_cb = qlnxr_ll2_complete_tx_packet; + cbs.rx_release_cb = qlnxr_ll2_release_rx_packet; + cbs.tx_release_cb = qlnxr_ll2_complete_tx_packet; + cbs.cookie = dev; + dev->gsi_ll2_handle = 0xFF; + + memset(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_ROCE; + data.input.mtu = dev->ha->ifp->if_mtu; + data.input.rx_num_desc = 8 * 1024; + data.input.rx_drop_ttl0_flg = 1; + data.input.rx_vlan_removal_en = 0; + data.input.tx_num_desc = 8 * 1024; + data.input.tx_tc = 0; + data.input.tx_dest = ECORE_LL2_TX_DEST_NW; + data.input.ai_err_packet_too_big = ECORE_LL2_DROP_PACKET; + data.input.ai_err_no_buf = ECORE_LL2_DROP_PACKET; + data.input.gsi_enable = 1; + data.p_connection_handle = &dev->gsi_ll2_handle; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(dev->rdma_ctx, &data); + + if (rc) { + QL_DPRINT11(dev->ha, + "ecore_ll2_acquire_connection failed: %d\n", + rc); + return rc; + } + + QL_DPRINT11(dev->ha, + "ll2 connection acquired successfully\n"); + rc = ecore_ll2_establish_connection(dev->rdma_ctx, + dev->gsi_ll2_handle); + + if (rc) { + QL_DPRINT11(dev->ha, + "ecore_ll2_establish_connection failed\n", rc); + goto err1; + } + + QL_DPRINT11(dev->ha, + "ll2 connection established successfully\n"); + rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, NULL, + dev->ha->primary_mac); + if (rc) { + QL_DPRINT11(dev->ha, "qlnx_rdma_ll2_set_mac_filter failed\n", rc); + goto err2; + } + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return 0; + +err2: + ecore_ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle); +err1: + ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +} + +struct ib_qp* +qlnxr_create_gsi_qp(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp) +{ + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + rc = qlnxr_check_gsi_qp_attrs(dev, attrs); + + if (rc) { + QL_DPRINT11(dev->ha, "qlnxr_check_gsi_qp_attrs failed\n"); + return ERR_PTR(rc); + } + + rc = qlnxr_ll2_start(dev, attrs, qp); + if (rc) { + QL_DPRINT11(dev->ha, "qlnxr_ll2_start failed\n"); + return ERR_PTR(rc); + } + + /* create QP */ + qp->ibqp.qp_num = 1; + qp->rq.max_wr = attrs->cap.max_recv_wr; + qp->sq.max_wr = attrs->cap.max_send_wr; + + qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) { + QL_DPRINT11(dev->ha, "(!qp->rqe_wr_id)\n"); + goto err; + } + + qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) { + QL_DPRINT11(dev->ha, "(!qp->wqe_wr_id)\n"); + goto err; + } + + qlnxr_store_gsi_qp_cq(dev, qp, attrs); + memcpy(dev->gsi_ll2_mac_address, dev->ha->primary_mac, ETH_ALEN); + + /* the GSI CQ is handled by the driver so remove it from the FW */ + qlnxr_destroy_gsi_cq(dev, attrs); + dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI; + dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI; + + QL_DPRINT12(dev->ha, "exit &qp->ibqp = %p\n", &qp->ibqp); + + return &qp->ibqp; +err: + kfree(qp->rqe_wr_id); + + rc = qlnxr_ll2_stop(dev); + + QL_DPRINT12(dev->ha, "exit with error\n"); + + return ERR_PTR(-ENOMEM); +} + +int +qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev) +{ + int rc = 0; + + QL_DPRINT12(dev->ha, "enter\n"); + + rc = qlnxr_ll2_stop(dev); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return (rc); +} + + +static inline bool +qlnxr_get_vlan_id_gsi(struct ib_ah_attr *ah_attr, u16 *vlan_id) +{ + u16 tmp_vlan_id; + union ib_gid *dgid = &ah_attr->grh.dgid; + + tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12]; + if (tmp_vlan_id < 0x1000) { + *vlan_id = tmp_vlan_id; + return true; + } else { + *vlan_id = 0; + return false; + } +} + +#define QLNXR_MAX_UD_HEADER_SIZE (100) +#define QLNXR_GSI_QPN (1) +static inline int +qlnxr_gsi_build_header(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_send_wr *swr, + struct ib_ud_header *udh, + int *roce_mode) +{ + bool has_vlan = false, has_grh_ipv6 = true; + struct ib_ah_attr *ah_attr = &get_qlnxr_ah((ud_wr(swr)->ah))->attr; + struct ib_global_route *grh = &ah_attr->grh; + union ib_gid sgid; + int send_size = 0; + u16 vlan_id = 0; + u16 ether_type; + +#if __FreeBSD_version >= 1102000 + int rc = 0; + int ip_ver = 0; + bool has_udp = false; +#endif /* #if __FreeBSD_version >= 1102000 */ + + +#if !DEFINE_IB_AH_ATTR_WITH_DMAC + u8 mac[ETH_ALEN]; +#endif + int i; + + send_size = 0; + for (i = 0; i < swr->num_sge; ++i) + send_size += swr->sg_list[i].length; + + has_vlan = qlnxr_get_vlan_id_gsi(ah_attr, &vlan_id); + ether_type = ETH_P_ROCE; + *roce_mode = ROCE_V1; + if (grh->sgid_index < QLNXR_MAX_SGID) + sgid = dev->sgid_tbl[grh->sgid_index]; + else + sgid = dev->sgid_tbl[0]; + +#if __FreeBSD_version >= 1102000 + + rc = ib_ud_header_init(send_size, false /* LRH */, true /* ETH */, + has_vlan, has_grh_ipv6, ip_ver, has_udp, + 0 /* immediate */, udh); + + if (rc) { + QL_DPRINT11(dev->ha, "gsi post send: failed to init header\n"); + return rc; + } + +#else + ib_ud_header_init(send_size, false /* LRH */, true /* ETH */, + has_vlan, has_grh_ipv6, 0 /* immediate */, udh); + +#endif /* #if __FreeBSD_version >= 1102000 */ + + /* ENET + VLAN headers*/ +#if DEFINE_IB_AH_ATTR_WITH_DMAC + memcpy(udh->eth.dmac_h, ah_attr->dmac, ETH_ALEN); +#else + qlnxr_get_dmac(dev, ah_attr, mac); + memcpy(udh->eth.dmac_h, mac, ETH_ALEN); +#endif + memcpy(udh->eth.smac_h, dev->ha->primary_mac, ETH_ALEN); + if (has_vlan) { + udh->eth.type = htons(ETH_P_8021Q); + udh->vlan.tag = htons(vlan_id); + udh->vlan.type = htons(ether_type); + } else { + udh->eth.type = htons(ether_type); + } + + for (int j = 0; j < 4; j++) { + QL_DPRINT12(dev->ha, "destination mac: %x\n", + udh->eth.dmac_h[j]); + } + for (int j = 0; j < 4; j++) { + QL_DPRINT12(dev->ha, "source mac: %x\n", + udh->eth.smac_h[j]); + } + + QL_DPRINT12(dev->ha, "QP: %p, opcode: %d, wq: %lx, roce: %x, hops:%d," + "imm : %d, vlan :%d, AH: %p\n", + qp, swr->opcode, swr->wr_id, *roce_mode, grh->hop_limit, + 0, has_vlan, get_qlnxr_ah((ud_wr(swr)->ah))); + + if (has_grh_ipv6) { + /* GRH / IPv6 header */ + udh->grh.traffic_class = grh->traffic_class; + udh->grh.flow_label = grh->flow_label; + udh->grh.hop_limit = grh->hop_limit; + udh->grh.destination_gid = grh->dgid; + memcpy(&udh->grh.source_gid.raw, &sgid.raw, + sizeof(udh->grh.source_gid.raw)); + QL_DPRINT12(dev->ha, "header: tc: %x, flow_label : %x, " + "hop_limit: %x \n", udh->grh.traffic_class, + udh->grh.flow_label, udh->grh.hop_limit); + for (i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "udh dgid = %x\n", udh->grh.destination_gid.raw[i]); + } + for (i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "udh sgid = %x\n", udh->grh.source_gid.raw[i]); + } + udh->grh.next_header = 0x1b; + } +#ifdef DEFINE_IB_UD_HEADER_INIT_UDP_PRESENT + /* This is for RoCEv2 */ + else { + /* IPv4 header */ + u32 ipv4_addr; + + udh->ip4.protocol = IPPROTO_UDP; + udh->ip4.tos = htonl(grh->flow_label); + udh->ip4.frag_off = htons(IP_DF); + udh->ip4.ttl = grh->hop_limit; + + ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + udh->ip4.saddr = ipv4_addr; + ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); + udh->ip4.daddr = ipv4_addr; + /* note: checksum is calculated by the device */ + } +#endif + + /* BTH */ + udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED); + udh->bth.pkey = QLNXR_ROCE_PKEY_DEFAULT;/* TODO: ib_get_cahced_pkey?! */ + //udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn); + udh->bth.destination_qpn = OSAL_CPU_TO_BE32(ud_wr(swr)->remote_qpn); + //udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1)); + udh->bth.psn = OSAL_CPU_TO_BE32((qp->sq_psn++) & ((1 << 24) - 1)); + udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + + /* DETH */ + //udh->deth.qkey = htonl(0x80010000); /* qp->qkey */ /* TODO: what is?! */ + //udh->deth.source_qpn = htonl(QLNXR_GSI_QPN); + udh->deth.qkey = OSAL_CPU_TO_BE32(0x80010000); /* qp->qkey */ /* TODO: what is?! */ + udh->deth.source_qpn = OSAL_CPU_TO_BE32(QLNXR_GSI_QPN); + QL_DPRINT12(dev->ha, "exit\n"); + return 0; +} + +static inline int +qlnxr_gsi_build_packet(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, struct ib_send_wr *swr, + struct ecore_roce_ll2_packet **p_packet) +{ + u8 ud_header_buffer[QLNXR_MAX_UD_HEADER_SIZE]; + struct ecore_roce_ll2_packet *packet; + int roce_mode, header_size; + struct ib_ud_header udh; + int i, rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + *p_packet = NULL; + + rc = qlnxr_gsi_build_header(dev, qp, swr, &udh, &roce_mode); + if (rc) { + QL_DPRINT11(dev->ha, + "qlnxr_gsi_build_header failed rc = %d\n", rc); + return rc; + } + + header_size = ib_ud_header_pack(&udh, &ud_header_buffer); + + packet = kzalloc(sizeof(*packet), GFP_ATOMIC); + if (!packet) { + QL_DPRINT11(dev->ha, "packet == NULL\n"); + return -ENOMEM; + } + + packet->header.vaddr = qlnx_dma_alloc_coherent(&dev->ha->cdev, + &packet->header.baddr, + header_size); + if (!packet->header.vaddr) { + QL_DPRINT11(dev->ha, "packet->header.vaddr == NULL\n"); + kfree(packet); + return -ENOMEM; + } + + if (memcmp(udh.eth.smac_h, udh.eth.dmac_h, ETH_ALEN)) + packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_NW; + else + packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_LB; + + packet->roce_mode = roce_mode; + memcpy(packet->header.vaddr, ud_header_buffer, header_size); + packet->header.len = header_size; + packet->n_seg = swr->num_sge; + qp->wqe_wr_id[qp->sq.prod].bytes_len = IB_GRH_BYTES; //RDMA_GRH_BYTES + for (i = 0; i < packet->n_seg; i++) { + packet->payload[i].baddr = swr->sg_list[i].addr; + packet->payload[i].len = swr->sg_list[i].length; + qp->wqe_wr_id[qp->sq.prod].bytes_len += + packet->payload[i].len; + QL_DPRINT11(dev->ha, "baddr: %p, len: %d\n", + packet->payload[i].baddr, + packet->payload[i].len); + } + + *p_packet = packet; + + QL_DPRINT12(dev->ha, "exit, packet->n_seg: %d\n", packet->n_seg); + return 0; +} + +int +qlnxr_gsi_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct ecore_roce_ll2_packet *pkt = NULL; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + unsigned long flags; + int rc; + + QL_DPRINT12(dev->ha, "exit\n"); + + if (qp->state != ECORE_ROCE_QP_STATE_RTS) { + QL_DPRINT11(dev->ha, + "(qp->state != ECORE_ROCE_QP_STATE_RTS)\n"); + *bad_wr = wr; + return -EINVAL; + } + + if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) { + QL_DPRINT11(dev->ha, + "(wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE)\n"); + rc = -EINVAL; + goto err; + } + + if (wr->opcode != IB_WR_SEND) { + QL_DPRINT11(dev->ha, "(wr->opcode > IB_WR_SEND)\n"); + rc = -EINVAL; + goto err; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + rc = qlnxr_gsi_build_packet(dev, qp, wr, &pkt); + if(rc) { + spin_unlock_irqrestore(&qp->q_lock, flags); + QL_DPRINT11(dev->ha, "qlnxr_gsi_build_packet failed\n"); + goto err; + } + + rc = qlnxr_ll2_post_tx(dev, pkt); + + if (!rc) { + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + qp->wqe_wr_id[qp->sq.prod].signaled = + !!(wr->send_flags & IB_SEND_SIGNALED); + qp->wqe_wr_id[qp->sq.prod].opcode = IB_WC_SEND; + qlnxr_inc_sw_prod(&qp->sq); + QL_DPRINT11(dev->ha, "packet sent over gsi qp\n"); + } else { + QL_DPRINT11(dev->ha, "qlnxr_ll2_post_tx failed\n"); + rc = -EAGAIN; + *bad_wr = wr; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (wr->next != NULL) { + *bad_wr = wr->next; + rc=-EINVAL; + } + + QL_DPRINT12(dev->ha, "exit\n"); + return rc; + +err: + *bad_wr = wr; + QL_DPRINT12(dev->ha, "exit error\n"); + return rc; +} + +#define QLNXR_LL2_RX_BUFFER_SIZE (4 * 1024) +int +qlnxr_gsi_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibqp->device)); + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + unsigned long flags; + int rc = 0; + + QL_DPRINT12(dev->ha, "enter, wr: %p\n", wr); + + if ((qp->state != ECORE_ROCE_QP_STATE_RTR) && + (qp->state != ECORE_ROCE_QP_STATE_RTS)) { + *bad_wr = wr; + QL_DPRINT11(dev->ha, "exit 0\n"); + return -EINVAL; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + while (wr) { + if (wr->num_sge > QLNXR_GSI_MAX_RECV_SGE) { + QL_DPRINT11(dev->ha, "exit 1\n"); + goto err; + } + + rc = ecore_ll2_post_rx_buffer(dev->rdma_ctx, + dev->gsi_ll2_handle, + wr->sg_list[0].addr, + wr->sg_list[0].length, + 0 /* cookie */, + 1 /* notify_fw */); + if (rc) { + QL_DPRINT11(dev->ha, "exit 2\n"); + goto err; + } + + memset(&qp->rqe_wr_id[qp->rq.prod], 0, + sizeof(qp->rqe_wr_id[qp->rq.prod])); + qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0]; + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + + qlnxr_inc_sw_prod(&qp->rq); + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +err: + + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + + QL_DPRINT12(dev->ha, "exit with -ENOMEM\n"); + return -ENOMEM; +} + +int +qlnxr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + struct qlnxr_qp *qp = dev->gsi_qp; + unsigned long flags; + int i = 0; + + QL_DPRINT12(dev->ha, "enter\n"); + + spin_lock_irqsave(&cq->cq_lock, flags); + + while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc[i].opcode = IB_WC_RECV; + wc[i].pkey_index = 0; + wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc)? + IB_WC_GENERAL_ERR:IB_WC_SUCCESS; + /* 0 - currently only one recv sg is supported */ + wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length; + wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK; + +#if __FreeBSD_version >= 1100000 + memcpy(&wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac, ETH_ALEN); + wc[i].wc_flags |= IB_WC_WITH_SMAC; + + if (qp->rqe_wr_id[qp->rq.cons].vlan_id) { + wc[i].wc_flags |= IB_WC_WITH_VLAN; + wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id; + } + +#endif + qlnxr_inc_sw_cons(&qp->rq); + i++; + } + + while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc[i].opcode = IB_WC_SEND; + wc[i].status = IB_WC_SUCCESS; + + qlnxr_inc_sw_cons(&qp->sq); + i++; + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + QL_DPRINT12(dev->ha, "exit i = %d\n", i); + return i; +} + diff --git a/sys/dev/qlnx/qlnxr/qlnxr_cm.h b/sys/dev/qlnx/qlnxr/qlnxr_cm.h new file mode 100644 index 000000000000..79afc547362d --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_cm.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + + +#ifndef __QLNXR_CM_H__ +#define __QLNXR_CM_H__ + + +/* ECORE LL2 has a limit to the number of buffers it can handle. + * FYI, OFED used 512 and 128 for recv and send. + */ +#define QLNXR_GSI_MAX_RECV_WR (4096) +#define QLNXR_GSI_MAX_SEND_WR (4096) + +#define QLNXR_GSI_MAX_RECV_SGE (1) /* LL2 FW limitation */ + +/* future OFED/kernel will have these */ +#define ETH_P_ROCE (0x8915) +#define QLNXR_ROCE_V2_UDP_SPORT (0000) + +#if __FreeBSD_version >= 1102000 + +#define rdma_wr(_wr) rdma_wr(_wr) +#define ud_wr(_wr) ud_wr(_wr) +#define atomic_wr(_wr) atomic_wr(_wr) + +#else + +#define rdma_wr(_wr) (&(_wr->wr.rdma)) +#define ud_wr(_wr) (&(_wr->wr.ud)) +#define atomic_wr(_wr) (&(_wr->wr.atomic)) + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static inline u32 qlnxr_get_ipv4_from_gid(u8 *gid) +{ + return *(u32 *)(void *)&gid[12]; +} + +struct ecore_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct ecore_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct ecore_roce_ll2_packet { + struct ecore_roce_ll2_header header; + int n_seg; + struct ecore_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum ecore_roce_ll2_tx_dest tx_dest; +}; + +/* RDMA CM */ + +extern int qlnxr_gsi_poll_cq(struct ib_cq *ibcq, + int num_entries, + struct ib_wc *wc); + +extern int qlnxr_gsi_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +extern int qlnxr_gsi_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); + +extern struct ib_qp* qlnxr_create_gsi_qp(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp); + +extern void qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs); + +extern void qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info); + +extern int qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev); + +#endif /* #ifndef __QLNXR_CM_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_def.h b/sys/dev/qlnx/qlnxr/qlnxr_def.h new file mode 100644 index 000000000000..8e4394d4c43e --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_def.h @@ -0,0 +1,924 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +/* + * File: qlnxr_def.h + * Author: David C Somayajulu + */ + +#ifndef __QLNX_DEF_H_ +#define __QLNX_DEF_H_ + +#include <sys/ktr.h> + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/completion.h> +#include <linux/netdevice.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/wait.h> +#include <linux/kref.h> +#include <linux/timer.h> +#include <linux/io.h> +#include <linux/fs.h> +#include <sys/vmem.h> + +#include <asm/byteorder.h> + +#include <netinet/in.h> +#include <net/ipv6.h> +#include <netinet/toecore.h> + +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_sa.h> + +#if __FreeBSD_version < 1100000 +#undef MODULE_VERSION +#endif + +#include "qlnx_os.h" +#include "bcm_osal.h" + +#include "reg_addr.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore.h" +#include "ecore_chain.h" +#include "ecore_status.h" +#include "ecore_hw.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_spq.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_sp_commands.h" +#include "ecore_dev_api.h" +#include "ecore_l2_api.h" +#ifdef CONFIG_ECORE_SRIOV +#include "ecore_sriov.h" +#include "ecore_vf.h" +#endif +#ifdef CONFIG_ECORE_LL2 +#include "ecore_ll2.h" +#endif +#ifdef CONFIG_ECORE_FCOE +#include "ecore_fcoe.h" +#endif +#ifdef CONFIG_ECORE_ISCSI +#include "ecore_iscsi.h" +#endif +#include "ecore_mcp.h" +#include "ecore_hw_defs.h" +#include "mcp_public.h" + +#ifdef CONFIG_ECORE_RDMA +#include "ecore_rdma.h" +#include "ecore_rdma_api.h" +#endif + +#ifdef CONFIG_ECORE_ROCE +#include "ecore_roce.h" +#endif + +#ifdef CONFIG_ECORE_IWARP +#include "ecore_iwarp.h" +#endif + +#include "ecore_iro.h" +#include "nvm_cfg.h" + +#include "ecore_dbg_fw_funcs.h" +#include "rdma_common.h" + +#include "qlnx_ioctl.h" +#include "qlnx_def.h" +#include "qlnx_rdma.h" +#include "qlnxr_verbs.h" +#include "qlnxr_user.h" +#include "qlnx_ver.h" +#include <sys/smp.h> + +#define QLNXR_ROCE_INTERFACE_VERSION 1801 + +#define QLNXR_MODULE_VERSION "8.18.1.0" +#define QLNXR_NODE_DESC "QLogic 579xx RoCE HCA" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 +#define QLNXR_MAX_AH 512 + +/* QLNXR Limitations */ + +/* SQ/RQ Limitations + * An S/RQ PBL contains a list a pointers to pages. Each page contains S/RQE + * elements. Several S/RQE elements make an S/RQE, up to a certain maximum that + * is different between SQ and RQ. The size of the PBL was chosen such as not to + * limit the MAX_WR supported by ECORE, and rounded up to a power of two. + */ +/* SQ */ +#define QLNXR_MAX_SQ_PBL (0x8000) /* 2^15 bytes */ +#define QLNXR_MAX_SQ_PBL_ENTRIES (0x10000 / sizeof(void *)) /* number */ +#define QLNXR_SQE_ELEMENT_SIZE (sizeof(struct rdma_sq_sge)) /* bytes */ +#define QLNXR_MAX_SQE_ELEMENTS_PER_SQE (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE / \ + QLNXR_SQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_SQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QLNXR_SQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_SQE ((QLNXR_MAX_SQ_PBL_ENTRIES) * (RDMA_RING_PAGE_SIZE) / \ + (QLNXR_SQE_ELEMENT_SIZE) / (QLNXR_MAX_SQE_ELEMENTS_PER_SQE)) +/* RQ */ +#define QLNXR_MAX_RQ_PBL (0x2000) /* 2^13 bytes */ +#define QLNXR_MAX_RQ_PBL_ENTRIES (0x10000 / sizeof(void *)) /* number */ +#define QLNXR_RQE_ELEMENT_SIZE (sizeof(struct rdma_rq_sge)) /* bytes */ +#define QLNXR_MAX_RQE_ELEMENTS_PER_RQE (RDMA_MAX_SGE_PER_RQ_WQE) /* number */ +#define QLNXR_MAX_RQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QLNXR_RQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_RQE ((QLNXR_MAX_RQ_PBL_ENTRIES) * (RDMA_RING_PAGE_SIZE) / \ + (QLNXR_RQE_ELEMENT_SIZE) / (QLNXR_MAX_RQE_ELEMENTS_PER_RQE)) + +/* CQE Limitation + * Although FW supports two layer PBL we use single layer since it is more + * than enough. For that layer we use a maximum size of 512 kB, again, because + * it reaches the maximum number of page pointers. Notice is the '-1' in the + * calculation that comes from having a u16 for the number of pages i.e. 0xffff + * is the maximum number of pages (in single layer). + */ +#define QLNXR_CQE_SIZE (sizeof(union rdma_cqe)) +#define QLNXR_MAX_CQE_PBL_SIZE (512*1024) /* 512kB */ +#define QLNXR_MAX_CQE_PBL_ENTRIES (((QLNXR_MAX_CQE_PBL_SIZE) / \ + sizeof(u64)) - 1) /* 64k -1 */ +#define QLNXR_MAX_CQES ((u32)((QLNXR_MAX_CQE_PBL_ENTRIES) * (ECORE_CHAIN_PAGE_SIZE)\ + / QLNXR_CQE_SIZE)) /* 8M -4096/32 = 8,388,480 */ + +/* CNQ size Limitation + * The maximum CNQ size is not reachable because the FW supports a chain of u16 + * (specifically 64k-1). The FW can buffer CNQ elements avoiding an overflow, on + * the expense of performance. Hence we set it to an arbitrarily smaller value + * than the maximum. + */ +#define QLNXR_ROCE_MAX_CNQ_SIZE (0x4000) /* 2^16 */ + +#define QLNXR_MAX_PORT (1) +#define QLNXR_PORT (1) + +#define QLNXR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) + +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + +/* The following number is used to determine if a handle recevied from the FW + * actually point to a CQ/QP. + */ +#define QLNXR_CQ_MAGIC_NUMBER (0x11223344) +#define QLNXR_QP_MAGIC_NUMBER (0x77889900) + +/* Fast path debug prints */ +#define FP_DP_VERBOSE(...) +/* #define FP_DP_VERBOSE(...) DP_VERBOSE(__VA_ARGS__) */ + +#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE) + +#define QLNXR_MSG_INIT 0x10000, +#define QLNXR_MSG_FAIL 0x10000, +#define QLNXR_MSG_CQ 0x20000, +#define QLNXR_MSG_RQ 0x40000, +#define QLNXR_MSG_SQ 0x80000, +#define QLNXR_MSG_QP (QLNXR_MSG_SQ | QLNXR_MSG_RQ), +#define QLNXR_MSG_MR 0x100000, +#define QLNXR_MSG_GSI 0x200000, +#define QLNXR_MSG_MISC 0x400000, +#define QLNXR_MSG_SRQ 0x800000, +#define QLNXR_MSG_IWARP 0x1000000, + +#define QLNXR_ROCE_PKEY_MAX 1 +#define QLNXR_ROCE_PKEY_TABLE_LEN 1 +#define QLNXR_ROCE_PKEY_DEFAULT 0xffff + +#define QLNXR_MAX_SGID 128 /* TBD - add more source gids... */ + +#define QLNXR_ENET_STATE_BIT (0) + +#define QLNXR_MAX_MSIX (16) + + +struct qlnxr_cnq { + struct qlnxr_dev *dev; + struct ecore_chain pbl; + struct ecore_sb_info *sb; + char name[32]; + u64 n_comp; + __le16 *hw_cons_ptr; + u8 index; + int irq_rid; + struct resource *irq; + void *irq_handle; +}; + +struct qlnxr_device_attr { + /* Vendor specific information */ + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; /* node GUID */ + u64 sys_image_guid; /* System image GUID */ + + u8 max_cnq; + u8 max_sge; /* Maximum # of scatter/gather entries + * per Work Request supported + */ + u16 max_inline; + u32 max_sqe; /* Maximum number of send outstanding send work + * requests on any Work Queue supported + */ + u32 max_rqe; /* Maximum number of receive outstanding receive + * work requests on any Work Queue supported + */ + u8 max_qp_resp_rd_atomic_resc; /* Maximum number of RDMA Reads + * & atomic operation that can + * be outstanding per QP + */ + + u8 max_qp_req_rd_atomic_resc; /* The maximum depth per QP for + * initiation of RDMA Read + * & atomic operations + */ + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_mr; /* Maximum # of MRs supported */ + u64 max_mr_size; /* Size (in bytes) of largest contiguous memory + * block that can be registered by this device + */ + u32 max_cqe; + u32 max_mw; /* Maximum # of memory windows supported */ + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; /* Maximum # of protection domains supported */ + u32 max_ah; + u8 max_pkey; + u32 max_srq; /* Maximum number of SRQs */ + u32 max_srq_wr; /* Maximum number of WRs per SRQ */ + u8 max_srq_sge; /* Maximum number of SGE per WQE */ + u8 max_stats_queues; /* Maximum number of statistics queues */ + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QLNXR_ROCE_DEV_CAP_RNR_NAK_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QLNXR_ROCE_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QLNXR_ROCE_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QLNXR_ROCE_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QLNXR_ROCE_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QLNXR_ROCE_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QLNXR_ROCE_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QLNXR_ROCE_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QLNXR_ROCE_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + + /* Abilty to support automatic path migration */ +#define QLNXR_ROCE_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QLNXR_ROCE_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QLNXR_ROCE_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QLNXR_ROCE_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QLNXR_ROCE_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QLNXR_ROCE_DEV_CAP_ZBVA_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QLNXR_ROCE_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QLNXR_ROCE_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; /* Value of reserved L_key */ + u32 bad_pkey_counter;/* Bad P_key counter support + * indicator + */ + struct ecore_rdma_events events; +}; + +struct qlnxr_dev { + struct ib_device ibdev; + qlnx_host_t *ha; + struct ecore_dev *cdev; + + /* Added to extend Applications Support */ + struct pci_dev *pdev; + uint32_t dp_module; + uint8_t dp_level; + + void *rdma_ctx; + + struct mtx idr_lock; + struct idr qpidr; + + uint32_t wq_multiplier; + int num_cnq; + + struct ecore_sb_info sb_array[QLNXR_MAX_MSIX]; + struct qlnxr_cnq cnq_array[QLNXR_MAX_MSIX]; + + int sb_start; + + int gsi_qp_created; + struct qlnxr_cq *gsi_sqcq; + struct qlnxr_cq *gsi_rqcq; + struct qlnxr_qp *gsi_qp; + + /* TBD: we'll need an array of these probablly per DPI... */ + void __iomem *db_addr; + uint64_t db_phys_addr; + uint32_t db_size; + uint16_t dpi; + + uint64_t guid; + enum ib_atomic_cap atomic_cap; + + union ib_gid sgid_tbl[QLNXR_MAX_SGID]; + struct mtx sgid_lock; + struct notifier_block nb_inet; + struct notifier_block nb_inet6; + + uint8_t mr_key; + struct list_head entry; + + struct dentry *dbgfs; + + uint8_t gsi_ll2_mac_address[ETH_ALEN]; + uint8_t gsi_ll2_handle; + + unsigned long enet_state; + + struct workqueue_struct *iwarp_wq; + + volatile uint32_t pd_count; + struct qlnxr_device_attr attr; + uint8_t user_dpm_enabled; +}; + +typedef struct qlnxr_dev qlnxr_dev_t; + + +struct qlnxr_pd { + struct ib_pd ibpd; + u32 pd_id; + struct qlnxr_ucontext *uctx; +}; + +struct qlnxr_ucontext { + struct ib_ucontext ibucontext; + struct qlnxr_dev *dev; + struct qlnxr_pd *pd; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; + u16 dpi; + + struct list_head mm_head; + struct mutex mm_list_lock; +}; + + + +struct qlnxr_dev_attr { + struct ib_device_attr ib_attr; +}; + +struct qlnxr_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + +struct qlnxr_pbl { + struct list_head list_entry; + void *va; + dma_addr_t pa; +}; + +struct qlnxr_queue_info { + void *va; + dma_addr_t dma; + u32 size; + u16 len; + u16 entry_size; /* Size of an element in the queue */ + u16 id; /* qid, where to ring the doorbell. */ + u16 head, tail; + bool created; +}; + +struct qlnxr_eq { + struct qlnxr_queue_info q; + u32 vector; + int cq_cnt; + struct qlnxr_dev *dev; + char irq_name[32]; +}; + +struct qlnxr_mq { + struct qlnxr_queue_info sq; + struct qlnxr_queue_info cq; + bool rearm_cq; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + +union db_prod64 { + struct rdma_pwm_val32_data data; + u64 raw; +}; + +enum qlnxr_cq_type { + QLNXR_CQ_TYPE_GSI, + QLNXR_CQ_TYPE_KERNEL, + QLNXR_CQ_TYPE_USER +}; + +struct qlnxr_pbl_info { + u32 num_pbls; + u32 num_pbes; + u32 pbl_size; + u32 pbe_size; + bool two_layered; +}; + +struct qlnxr_userq { + struct ib_umem *umem; + struct qlnxr_pbl_info pbl_info; + struct qlnxr_pbl *pbl_tbl; + u64 buf_addr; + size_t buf_len; +}; + +struct qlnxr_cq { + struct ib_cq ibcq; /* must be first */ + + enum qlnxr_cq_type cq_type; + uint32_t sig; + uint16_t icid; + + /* relevant to cqs created from kernel space only (ULPs) */ + spinlock_t cq_lock; + uint8_t arm_flags; + struct ecore_chain pbl; + + void __iomem *db_addr; /* db address for cons update*/ + union db_prod64 db; + + uint8_t pbl_toggle; + union rdma_cqe *latest_cqe; + union rdma_cqe *toggle_cqe; + + /* TODO: remove since it is redundant with 32 bit chains */ + uint32_t cq_cons; + + /* relevant to cqs created from user space only (applications) */ + struct qlnxr_userq q; + + /* destroy-IRQ handler race prevention */ + uint8_t destroyed; + uint16_t cnq_notif; +}; + + +struct qlnxr_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; +}; + +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; +}; + +struct qlnxr_qp_hwq_info { + /* WQE Elements*/ + struct ecore_chain pbl; + u64 p_phys_addr_tbl; + u32 max_sges; + + /* WQE */ + u16 prod; /* WQE prod index for SW ring */ + u16 cons; /* WQE cons index for SW ring */ + u16 wqe_cons; + u16 gsi_cons; /* filled in by GSI implementation */ + u16 max_wr; + + /* DB */ + void __iomem *db; /* Doorbell address */ + union db_prod32 db_data; /* Doorbell data */ + + /* Required for iwarp_only */ + void __iomem *iwarp_db2; /* Doorbell address */ + union db_prod32 iwarp_db2_data; /* Doorbell data */ +}; + +#define QLNXR_INC_SW_IDX(p_info, index) \ + do { \ + p_info->index = (p_info->index + 1) & \ + ecore_chain_get_capacity(p_info->pbl) \ + } while (0) + +struct qlnxr_srq_hwq_info { + u32 max_sges; + u32 max_wr; + struct ecore_chain pbl; + u64 p_phys_addr_tbl; + u32 wqe_prod; /* WQE prod index in HW ring */ + u32 sge_prod; /* SGE prod index in HW ring */ + u32 wr_prod_cnt; /* wr producer count */ + u32 wr_cons_cnt; /* wr consumer count */ + u32 num_elems; + + u32 *virt_prod_pair_addr; /* producer pair virtual address */ + dma_addr_t phy_prod_pair_addr; /* producer pair physical address */ +}; + +struct qlnxr_srq { + struct ib_srq ibsrq; + struct qlnxr_dev *dev; + /* relevant to cqs created from user space only (applications) */ + struct qlnxr_userq usrq; + struct qlnxr_srq_hwq_info hw_srq; + struct ib_umem *prod_umem; + u16 srq_id; + /* lock to protect srq recv post */ + spinlock_t lock; +}; + +enum qlnxr_qp_err_bitmap { + QLNXR_QP_ERR_SQ_FULL = 1 << 0, + QLNXR_QP_ERR_RQ_FULL = 1 << 1, + QLNXR_QP_ERR_BAD_SR = 1 << 2, + QLNXR_QP_ERR_BAD_RR = 1 << 3, + QLNXR_QP_ERR_SQ_PBL_FULL = 1 << 4, + QLNXR_QP_ERR_RQ_PBL_FULL = 1 << 5, +}; + +struct mr_info { + struct qlnxr_pbl *pbl_table; + struct qlnxr_pbl_info pbl_info; + struct list_head free_pbl_list; + struct list_head inuse_pbl_list; + u32 completed; + u32 completed_handled; +}; + +#if __FreeBSD_version < 1102000 +#define DEFINE_IB_FAST_REG +#else +#define DEFINE_ALLOC_MR +#endif + +#ifdef DEFINE_IB_FAST_REG +struct qlnxr_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + struct qlnxr_dev *dev; + struct mr_info info; +}; +#endif +struct qlnxr_qp { + struct ib_qp ibqp; /* must be first */ + struct qlnxr_dev *dev; + struct qlnxr_iw_ep *ep; + struct qlnxr_qp_hwq_info sq; + struct qlnxr_qp_hwq_info rq; + + u32 max_inline_data; + +#if __FreeBSD_version >= 1100000 + spinlock_t q_lock ____cacheline_aligned; +#else + spinlock_t q_lock; +#endif + + struct qlnxr_cq *sq_cq; + struct qlnxr_cq *rq_cq; + struct qlnxr_srq *srq; + enum ecore_roce_qp_state state; /* QP state */ + u32 id; + struct qlnxr_pd *pd; + enum ib_qp_type qp_type; + struct ecore_rdma_qp *ecore_qp; + u32 qp_id; + u16 icid; + u16 mtu; + int sgid_idx; + u32 rq_psn; + u32 sq_psn; + u32 qkey; + u32 dest_qp_num; + u32 sig; /* unique siganture to identify valid QP */ + + /* relevant to qps created from kernel space only (ULPs) */ + u8 prev_wqe_size; + u16 wqe_cons; + u32 err_bitmap; + bool signaled; + /* SQ shadow */ + struct { + u64 wr_id; + enum ib_wc_opcode opcode; + u32 bytes_len; + u8 wqe_size; + bool signaled; + dma_addr_t icrc_mapping; + u32 *icrc; +#ifdef DEFINE_IB_FAST_REG + struct qlnxr_fast_reg_page_list *frmr; +#endif + struct qlnxr_mr *mr; + } *wqe_wr_id; + + /* RQ shadow */ + struct { + u64 wr_id; + struct ib_sge sg_list[RDMA_MAX_SGE_PER_RQ_WQE]; + uint8_t wqe_size; + + /* for GSI only */ + u8 smac[ETH_ALEN]; + u16 vlan_id; + int rc; + } *rqe_wr_id; + + /* relevant to qps created from user space only (applications) */ + struct qlnxr_userq usq; + struct qlnxr_userq urq; + atomic_t refcnt; + bool destroyed; +}; + +enum qlnxr_mr_type { + QLNXR_MR_USER, + QLNXR_MR_KERNEL, + QLNXR_MR_DMA, + QLNXR_MR_FRMR +}; + + +struct qlnxr_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + + struct ecore_rdma_register_tid_in_params hw_mr; + enum qlnxr_mr_type type; + + struct qlnxr_dev *dev; + struct mr_info info; + + u64 *pages; + u32 npages; + + u64 *iova_start; /* valid only for kernel_mr */ +}; + + +struct qlnxr_mm { + struct { + u64 phy_addr; + unsigned long len; + } key; + struct list_head entry; +}; + +struct qlnxr_iw_listener { + struct qlnxr_dev *dev; + struct iw_cm_id *cm_id; + int backlog; + void *ecore_handle; +}; + +struct qlnxr_iw_ep { + struct qlnxr_dev *dev; + struct iw_cm_id *cm_id; + struct qlnxr_qp *qp; + void *ecore_context; + u8 during_connect; +}; + +static inline void +qlnxr_inc_sw_cons(struct qlnxr_qp_hwq_info *info) +{ + info->cons = (info->cons + 1) % info->max_wr; + info->wqe_cons++; +} + +static inline void +qlnxr_inc_sw_prod(struct qlnxr_qp_hwq_info *info) +{ + info->prod = (info->prod + 1) % info->max_wr; +} + +static inline struct qlnxr_dev * +get_qlnxr_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct qlnxr_dev, ibdev); +} + +static inline struct qlnxr_ucontext * +get_qlnxr_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct qlnxr_ucontext, ibucontext); +} + +static inline struct qlnxr_pd * +get_qlnxr_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct qlnxr_pd, ibpd); +} + +static inline struct qlnxr_cq * +get_qlnxr_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct qlnxr_cq, ibcq); +} + +static inline struct qlnxr_qp * +get_qlnxr_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct qlnxr_qp, ibqp); +} + +static inline struct qlnxr_mr * +get_qlnxr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qlnxr_mr, ibmr); +} + +static inline struct qlnxr_ah * +get_qlnxr_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct qlnxr_ah, ibah); +} + +static inline struct qlnxr_srq * +get_qlnxr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qlnxr_srq, ibsrq); +} + +static inline bool qlnxr_qp_has_srq(struct qlnxr_qp *qp) +{ + return !!qp->srq; +} + +static inline bool qlnxr_qp_has_sq(struct qlnxr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI) + return 0; + + return 1; +} + +static inline bool qlnxr_qp_has_rq(struct qlnxr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qlnxr_qp_has_srq(qp)) + return 0; + + return 1; +} + + +#ifdef DEFINE_IB_FAST_REG +static inline struct qlnxr_fast_reg_page_list *get_qlnxr_frmr_list( + struct ib_fast_reg_page_list *ifrpl) +{ + return container_of(ifrpl, struct qlnxr_fast_reg_page_list, ibfrpl); +} +#endif + +#define SET_FIELD2(value, name, flag) \ + do { \ + (value) |= ((flag) << (name ## _SHIFT)); \ + } while (0) + +#define QLNXR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ + RDMA_CQE_RESPONDER_IMM_FLG_SHIFT) +#define QLNXR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \ + RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT) +#define QLNXR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \ + RDMA_CQE_RESPONDER_INV_FLG_SHIFT) + +#define QLNXR_RESP_RDMA_IMM (QLNXR_RESP_IMM | QLNXR_RESP_RDMA) + +static inline int +qlnxr_get_dmac(struct qlnxr_dev *dev, struct ib_ah_attr *ah_attr, u8 *mac_addr) +{ +#ifdef DEFINE_NO_IP_BASED_GIDS + u8 *guid = &ah_attr->grh.dgid.raw[8]; /* GID's 64 MSBs are the GUID */ +#endif + union ib_gid zero_sgid = { { 0 } }; + struct in6_addr in6; + + if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) { + memset(mac_addr, 0x00, ETH_ALEN); + return -EINVAL; + } + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + +#ifdef DEFINE_NO_IP_BASED_GIDS + /* get the MAC address from the GUID i.e. EUI-64 to MAC address */ + mac_addr[0] = guid[0] ^ 2; /* toggle the local/universal bit to local */ + mac_addr[1] = guid[1]; + mac_addr[2] = guid[2]; + mac_addr[3] = guid[5]; + mac_addr[4] = guid[6]; + mac_addr[5] = guid[7]; +#else + memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); +#endif + return 0; +} + +extern int qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, + uint8_t *new_mac_address); + + +#define QLNXR_ROCE_PKEY_MAX 1 +#define QLNXR_ROCE_PKEY_TABLE_LEN 1 +#define QLNXR_ROCE_PKEY_DEFAULT 0xffff + +#if __FreeBSD_version < 1100000 +#define DEFINE_IB_AH_ATTR_WITH_DMAC (0) +#define DEFINE_IB_UMEM_WITH_CHUNK (1) +#else +#define DEFINE_IB_AH_ATTR_WITH_DMAC (1) +#endif + +#define QLNX_IS_IWARP(rdev) IS_IWARP(ECORE_LEADING_HWFN(rdev->cdev)) +#define QLNX_IS_ROCE(rdev) IS_ROCE(ECORE_LEADING_HWFN(rdev->cdev)) + +#define MAX_RXMIT_CONNS 16 + +#endif /* #ifndef __QLNX_DEF_H_ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_os.c b/sys/dev/qlnx/qlnxr/qlnxr_os.c new file mode 100644 index 000000000000..a9e426e1ab18 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_os.c @@ -0,0 +1,1366 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * File: qlnxr_os.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" + +SYSCTL_NODE(_dev, OID_AUTO, qnxr, CTLFLAG_RW, 0, "Qlogic RDMA module"); + +uint32_t delayed_ack = 0; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, delayed_ack, CTLFLAG_RW, &delayed_ack, 1, + "iWARP: Delayed Ack: 0 - Disabled 1 - Enabled. Default: Disabled"); + +uint32_t timestamp = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, timestamp, CTLFLAG_RW, ×tamp, 1, + "iWARP: Timestamp: 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t rcv_wnd_size = 0; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, rcv_wnd_size, CTLFLAG_RW, &rcv_wnd_size, 1, + "iWARP: Receive Window Size in K. Default 1M"); + +uint32_t crc_needed = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, crc_needed, CTLFLAG_RW, &crc_needed, 1, + "iWARP: CRC needed 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t peer2peer = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 1, + "iWARP: Support peer2peer ULPs 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t mpa_enhanced = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, mpa_enhanced, CTLFLAG_RW, &mpa_enhanced, 1, + "iWARP: MPA Enhanced mode. Default:1"); + +uint32_t rtr_type = 7; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, rtr_type, CTLFLAG_RW, &rtr_type, 1, + "iWARP: RDMAP opcode to use for the RTR message: BITMAP 1: RDMA_SEND 2: RDMA_WRITE 4: RDMA_READ. Default: 7"); + + +#define QNXR_WQ_MULTIPLIER_MIN (1) +#define QNXR_WQ_MULTIPLIER_MAX (7) +#define QNXR_WQ_MULTIPLIER_DFT (3) + +uint32_t wq_multiplier= QNXR_WQ_MULTIPLIER_DFT; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, wq_multiplier, CTLFLAG_RW, &wq_multiplier, 1, + " When creating a WQ the actual number of WQE created will" + " be multiplied by this number (default is 3)."); +static ssize_t +show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + + return sprintf(buf, "0x%x\n", dev->cdev->vendor_id); +} + +static ssize_t +show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + return sprintf(buf, "QLogic0x%x\n", dev->cdev->device_id); +} + +static ssize_t +show_fw_ver(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + uint32_t fw_ver = (uint32_t) dev->attr.fw_ver; + + return sprintf(buf, "%d.%d.%d\n", + (fw_ver >> 24) & 0xff, (fw_ver >> 16) & 0xff, + (fw_ver >> 8) & 0xff); +} +static ssize_t +show_board(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + return sprintf(buf, "%x\n", dev->cdev->device_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *qlnxr_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_fw_ver, + &dev_attr_board_id +}; + +static void +qlnxr_ib_dispatch_event(qlnxr_dev_t *dev, uint8_t port_num, + enum ib_event_type type) +{ + struct ib_event ibev; + + QL_DPRINT12(dev->ha, "enter\n"); + + ibev.device = &dev->ibdev; + ibev.element.port_num = port_num; + ibev.event = type; + + ib_dispatch_event(&ibev); + + QL_DPRINT12(dev->ha, "exit\n"); +} + +static int +__qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id) +{ + qlnxr_iw_destroy_listen(cm_id); + + return (0); +} + +static int +qlnxr_register_device(qlnxr_dev_t *dev) +{ + struct ib_device *ibdev; + struct iw_cm_verbs *iwcm; + int ret; + + QL_DPRINT12(dev->ha, "enter\n"); + + ibdev = &dev->ibdev; + + strlcpy(ibdev->name, "qlnxr%d", IB_DEVICE_NAME_MAX); + + memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid)); + memcpy(&ibdev->node_guid, dev->ha->primary_mac, ETHER_ADDR_LEN); + + memcpy(ibdev->node_desc, QLNXR_NODE_DESC, sizeof(QLNXR_NODE_DESC)); + + ibdev->owner = THIS_MODULE; + ibdev->uverbs_abi_ver = 7; + ibdev->local_dma_lkey = 0; + + ibdev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + + if (QLNX_IS_IWARP(dev)) { + ibdev->node_type = RDMA_NODE_RNIC; + ibdev->query_gid = qlnxr_iw_query_gid; + } else { + ibdev->node_type = RDMA_NODE_IB_CA; + ibdev->query_gid = qlnxr_query_gid; + ibdev->uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ibdev->create_srq = qlnxr_create_srq; + ibdev->destroy_srq = qlnxr_destroy_srq; + ibdev->modify_srq = qlnxr_modify_srq; + ibdev->query_srq = qlnxr_query_srq; + ibdev->post_srq_recv = qlnxr_post_srq_recv; + } + + ibdev->phys_port_cnt = 1; + ibdev->num_comp_vectors = dev->num_cnq; + + /* mandatory verbs. */ + ibdev->query_device = qlnxr_query_device; + ibdev->query_port = qlnxr_query_port; + ibdev->modify_port = qlnxr_modify_port; + + ibdev->alloc_ucontext = qlnxr_alloc_ucontext; + ibdev->dealloc_ucontext = qlnxr_dealloc_ucontext; + /* mandatory to support user space verbs consumer. */ + ibdev->mmap = qlnxr_mmap; + + ibdev->alloc_pd = qlnxr_alloc_pd; + ibdev->dealloc_pd = qlnxr_dealloc_pd; + + ibdev->create_cq = qlnxr_create_cq; + ibdev->destroy_cq = qlnxr_destroy_cq; + ibdev->resize_cq = qlnxr_resize_cq; + ibdev->req_notify_cq = qlnxr_arm_cq; + + ibdev->create_qp = qlnxr_create_qp; + ibdev->modify_qp = qlnxr_modify_qp; + ibdev->query_qp = qlnxr_query_qp; + ibdev->destroy_qp = qlnxr_destroy_qp; + + ibdev->query_pkey = qlnxr_query_pkey; + ibdev->create_ah = qlnxr_create_ah; + ibdev->destroy_ah = qlnxr_destroy_ah; + ibdev->query_ah = qlnxr_query_ah; + ibdev->modify_ah = qlnxr_modify_ah; + ibdev->get_dma_mr = qlnxr_get_dma_mr; + ibdev->dereg_mr = qlnxr_dereg_mr; + ibdev->reg_user_mr = qlnxr_reg_user_mr; + +#if __FreeBSD_version >= 1102000 + ibdev->alloc_mr = qlnxr_alloc_mr; + ibdev->map_mr_sg = qlnxr_map_mr_sg; + ibdev->get_port_immutable = qlnxr_get_port_immutable; +#else + ibdev->reg_phys_mr = qlnxr_reg_kernel_mr; + ibdev->alloc_fast_reg_mr = qlnxr_alloc_frmr; + ibdev->alloc_fast_reg_page_list = qlnxr_alloc_frmr_page_list; + ibdev->free_fast_reg_page_list = qlnxr_free_frmr_page_list; +#endif /* #if __FreeBSD_version >= 1102000 */ + + ibdev->poll_cq = qlnxr_poll_cq; + ibdev->post_send = qlnxr_post_send; + ibdev->post_recv = qlnxr_post_recv; + ibdev->process_mad = qlnxr_process_mad; + + + + ibdev->dma_device = &dev->pdev->dev; + + ibdev->get_link_layer = qlnxr_link_layer; + + if (QLNX_IS_IWARP(dev)) { + iwcm = kmalloc(sizeof(*iwcm), GFP_KERNEL); + + device_printf(dev->ha->pci_dev, "device is IWARP\n"); + if (iwcm == NULL) + return (-ENOMEM); + + ibdev->iwcm = iwcm; + + iwcm->connect = qlnxr_iw_connect; + iwcm->accept = qlnxr_iw_accept; + iwcm->reject = qlnxr_iw_reject; + +#if (__FreeBSD_version >= 1004000) && (__FreeBSD_version < 1102000) + + iwcm->create_listen_ep = qlnxr_iw_create_listen; + iwcm->destroy_listen_ep = qlnxr_iw_destroy_listen; +#else + iwcm->create_listen = qlnxr_iw_create_listen; + iwcm->destroy_listen = __qlnxr_iw_destroy_listen; +#endif + iwcm->add_ref = qlnxr_iw_qp_add_ref; + iwcm->rem_ref = qlnxr_iw_qp_rem_ref; + iwcm->get_qp = qlnxr_iw_get_qp; + } + + ret = ib_register_device(ibdev, NULL); + if (ret) { + kfree(iwcm); + } + + QL_DPRINT12(dev->ha, "exit\n"); + return ret; +} + +#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) + +static void +qlnxr_intr(void *handle) +{ + struct qlnxr_cnq *cnq = handle; + struct qlnxr_cq *cq; + struct regpair *cq_handle; + u16 hw_comp_cons, sw_comp_cons; + qlnx_host_t *ha; + + ha = cnq->dev->ha; + + QL_DPRINT12(ha, "enter cnq = %p\n", handle); + + ecore_sb_ack(cnq->sb, IGU_INT_DISABLE, 0 /*do not update*/); + + ecore_sb_update_sb_idx(cnq->sb); + + hw_comp_cons = le16_to_cpu(*cnq->hw_cons_ptr); + sw_comp_cons = ecore_chain_get_cons_idx(&cnq->pbl); + + rmb(); + + QL_DPRINT12(ha, "enter cnq = %p hw_comp_cons = 0x%x sw_comp_cons = 0x%x\n", + handle, hw_comp_cons, sw_comp_cons); + + while (sw_comp_cons != hw_comp_cons) { + cq_handle = (struct regpair *)ecore_chain_consume(&cnq->pbl); + cq = (struct qlnxr_cq *)(uintptr_t)HILO_U64(cq_handle->hi, + cq_handle->lo); + + if (cq == NULL) { + QL_DPRINT11(ha, "cq == NULL\n"); + break; + } + + if (cq->sig != QLNXR_CQ_MAGIC_NUMBER) { + QL_DPRINT11(ha, + "cq->sig = 0x%x QLNXR_CQ_MAGIC_NUMBER = 0x%x\n", + cq->sig, QLNXR_CQ_MAGIC_NUMBER); + break; + } + cq->arm_flags = 0; + + if (!cq->destroyed && cq->ibcq.comp_handler) { + QL_DPRINT11(ha, "calling comp_handler = %p " + "ibcq = %p cq_context = 0x%x\n", + &cq->ibcq, cq->ibcq.cq_context); + + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + } + cq->cnq_notif++; + + sw_comp_cons = ecore_chain_get_cons_idx(&cnq->pbl); + + cnq->n_comp++; + } + + ecore_rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index, sw_comp_cons); + + ecore_sb_ack(cnq->sb, IGU_INT_ENABLE, 1 /*update*/); + + QL_DPRINT12(ha, "exit cnq = %p\n", handle); + return; +} + +static void +qlnxr_release_irqs(struct qlnxr_dev *dev) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + for (i = 0; i < dev->num_cnq; i++) { + if (dev->cnq_array[i].irq_handle) + (void)bus_teardown_intr(dev->ha->pci_dev, + dev->cnq_array[i].irq, + dev->cnq_array[i].irq_handle); + + if (dev->cnq_array[i].irq) + (void) bus_release_resource(dev->ha->pci_dev, + SYS_RES_IRQ, + dev->cnq_array[i].irq_rid, + dev->cnq_array[i].irq); + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_setup_irqs(struct qlnxr_dev *dev) +{ + int start_irq_rid; + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + start_irq_rid = dev->sb_start + 2; + + QL_DPRINT12(ha, "enter start_irq_rid = %d num_rss = %d\n", + start_irq_rid, dev->ha->num_rss); + + + for (i = 0; i < dev->num_cnq; i++) { + + dev->cnq_array[i].irq_rid = start_irq_rid + i; + + dev->cnq_array[i].irq = bus_alloc_resource_any(dev->ha->pci_dev, + SYS_RES_IRQ, + &dev->cnq_array[i].irq_rid, + (RF_ACTIVE | RF_SHAREABLE)); + + if (dev->cnq_array[i].irq == NULL) { + + QL_DPRINT11(ha, + "bus_alloc_resource_any failed irq_rid = %d\n", + dev->cnq_array[i].irq_rid); + + goto qlnxr_setup_irqs_err; + } + + if (bus_setup_intr(dev->ha->pci_dev, + dev->cnq_array[i].irq, + (INTR_TYPE_NET | INTR_MPSAFE), + NULL, qlnxr_intr, &dev->cnq_array[i], + &dev->cnq_array[i].irq_handle)) { + + QL_DPRINT11(ha, "bus_setup_intr failed\n"); + goto qlnxr_setup_irqs_err; + } + QL_DPRINT12(ha, "irq_rid = %d irq = %p irq_handle = %p\n", + dev->cnq_array[i].irq_rid, dev->cnq_array[i].irq, + dev->cnq_array[i].irq_handle); + } + + QL_DPRINT12(ha, "exit\n"); + return (0); + +qlnxr_setup_irqs_err: + qlnxr_release_irqs(dev); + + QL_DPRINT12(ha, "exit -1\n"); + return (-1); +} + +static void +qlnxr_free_resources(struct qlnxr_dev *dev) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter dev->num_cnq = %d\n", dev->num_cnq); + + if (QLNX_IS_IWARP(dev)) { + if (dev->iwarp_wq != NULL) + destroy_workqueue(dev->iwarp_wq); + } + + for (i = 0; i < dev->num_cnq; i++) { + qlnx_free_mem_sb(dev->ha, &dev->sb_array[i]); + ecore_chain_free(&dev->ha->cdev, &dev->cnq_array[i].pbl); + } + + bzero(dev->cnq_array, (sizeof(struct qlnxr_cnq) * QLNXR_MAX_MSIX)); + bzero(dev->sb_array, (sizeof(struct ecore_sb_info) * QLNXR_MAX_MSIX)); + bzero(dev->sgid_tbl, (sizeof(union ib_gid) * QLNXR_MAX_SGID)); + + if (mtx_initialized(&dev->idr_lock)) + mtx_destroy(&dev->idr_lock); + + if (mtx_initialized(&dev->sgid_lock)) + mtx_destroy(&dev->sgid_lock); + + QL_DPRINT12(ha, "exit\n"); + return; +} + + +static int +qlnxr_alloc_resources(struct qlnxr_dev *dev) +{ + uint16_t n_entries; + int i, rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + bzero(dev->sgid_tbl, (sizeof (union ib_gid) * QLNXR_MAX_SGID)); + + mtx_init(&dev->idr_lock, "idr_lock", NULL, MTX_DEF); + mtx_init(&dev->sgid_lock, "sgid_lock", NULL, MTX_DEF); + + idr_init(&dev->qpidr); + + bzero(dev->sb_array, (sizeof (struct ecore_sb_info) * QLNXR_MAX_MSIX)); + bzero(dev->cnq_array, (sizeof (struct qlnxr_cnq) * QLNXR_MAX_MSIX)); + + dev->sb_start = ecore_rdma_get_sb_id(dev->rdma_ctx, 0); + + QL_DPRINT12(ha, "dev->sb_start = 0x%x\n", dev->sb_start); + + /* Allocate CNQ PBLs */ + + n_entries = min_t(u32, ECORE_RDMA_MAX_CNQ_SIZE, QLNXR_ROCE_MAX_CNQ_SIZE); + + for (i = 0; i < dev->num_cnq; i++) { + rc = qlnx_alloc_mem_sb(dev->ha, &dev->sb_array[i], + dev->sb_start + i); + if (rc) + goto qlnxr_alloc_resources_exit; + + rc = ecore_chain_alloc(&dev->ha->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + n_entries, + sizeof(struct regpair *), + &dev->cnq_array[i].pbl, + NULL); + + /* configure cnq, except name since ibdev.name is still NULL */ + dev->cnq_array[i].dev = dev; + dev->cnq_array[i].sb = &dev->sb_array[i]; + dev->cnq_array[i].hw_cons_ptr = + &(dev->sb_array[i].sb_virt->pi_array[ECORE_ROCE_PROTOCOL_INDEX]); + dev->cnq_array[i].index = i; + sprintf(dev->cnq_array[i].name, "qlnxr%d@pci:%d", + i, (dev->ha->pci_func)); + + } + + QL_DPRINT12(ha, "exit\n"); + return 0; + +qlnxr_alloc_resources_exit: + + qlnxr_free_resources(dev); + + QL_DPRINT12(ha, "exit -ENOMEM\n"); + return -ENOMEM; +} + +void +qlnxr_affiliated_event(void *context, u8 e_code, void *fw_handle) +{ +#define EVENT_TYPE_NOT_DEFINED 0 +#define EVENT_TYPE_CQ 1 +#define EVENT_TYPE_QP 2 +#define EVENT_TYPE_GENERAL 3 + + struct qlnxr_dev *dev = (struct qlnxr_dev *)context; + struct regpair *async_handle = (struct regpair *)fw_handle; + u64 roceHandle64 = ((u64)async_handle->hi << 32) + async_handle->lo; + struct qlnxr_cq *cq = (struct qlnxr_cq *)(uintptr_t)roceHandle64; + struct qlnxr_qp *qp = (struct qlnxr_qp *)(uintptr_t)roceHandle64; + u8 event_type = EVENT_TYPE_NOT_DEFINED; + struct ib_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter context = %p e_code = 0x%x fw_handle = %p\n", + context, e_code, fw_handle); + + if (QLNX_IS_IWARP(dev)) { + switch (e_code) { + + case ECORE_IWARP_EVENT_CQ_OVERFLOW: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + + default: + QL_DPRINT12(ha, + "unsupported event %d on handle=%llx\n", + e_code, roceHandle64); + break; + } + } else { + switch (e_code) { + + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + + /* NOTE the following are not implemented in FW + * ROCE_ASYNC_EVENT_CQ_ERR + * ROCE_ASYNC_EVENT_COMM_EST + */ + /* TODO associate the following events - + * ROCE_ASYNC_EVENT_SRQ_LIMIT + * ROCE_ASYNC_EVENT_LAST_WQE_REACHED + * ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR (un-affiliated) + */ + default: + QL_DPRINT12(ha, + "unsupported event 0x%x on fw_handle = %p\n", + e_code, fw_handle); + break; + } + } + + switch (event_type) { + + case EVENT_TYPE_CQ: + if (cq && cq->sig == QLNXR_CQ_MAGIC_NUMBER) { + struct ib_cq *ibcq = &cq->ibcq; + + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } + } else { + QL_DPRINT11(ha, + "CQ event with invalid CQ pointer" + " Handle = %llx\n", roceHandle64); + } + QL_DPRINT12(ha, + "CQ event 0x%x on handle = %p\n", e_code, cq); + break; + + case EVENT_TYPE_QP: + if (qp && qp->sig == QLNXR_QP_MAGIC_NUMBER) { + struct ib_qp *ibqp = &qp->ibqp; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } + } else { + QL_DPRINT11(ha, + "QP event 0x%x with invalid QP pointer" + " qp handle = %p\n", + e_code, roceHandle64); + } + QL_DPRINT12(ha, "QP event 0x%x on qp handle = %p\n", + e_code, qp); + break; + + case EVENT_TYPE_GENERAL: + break; + + default: + break; + + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +void +qlnxr_unaffiliated_event(void *context, u8 e_code) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)context; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit \n"); + return; +} + + +static int +qlnxr_set_device_attr(struct qlnxr_dev *dev) +{ + struct ecore_rdma_device *ecore_attr; + struct qlnxr_device_attr *attr; + u32 page_size; + + ecore_attr = ecore_rdma_query_device(dev->rdma_ctx); + + page_size = ~dev->attr.page_size_caps + 1; + if(page_size > PAGE_SIZE) { + QL_DPRINT12(dev->ha, "Kernel page size : %ld is smaller than" + " minimum page size : %ld required by qlnxr\n", + PAGE_SIZE, page_size); + return -ENODEV; + } + attr = &dev->attr; + attr->vendor_id = ecore_attr->vendor_id; + attr->vendor_part_id = ecore_attr->vendor_part_id; + + QL_DPRINT12(dev->ha, "in qlnxr_set_device_attr, vendor : %x device : %x\n", + attr->vendor_id, attr->vendor_part_id); + + attr->hw_ver = ecore_attr->hw_ver; + attr->fw_ver = ecore_attr->fw_ver; + attr->node_guid = ecore_attr->node_guid; + attr->sys_image_guid = ecore_attr->sys_image_guid; + attr->max_cnq = ecore_attr->max_cnq; + attr->max_sge = ecore_attr->max_sge; + attr->max_inline = ecore_attr->max_inline; + attr->max_sqe = min_t(u32, ecore_attr->max_wqe, QLNXR_MAX_SQE); + attr->max_rqe = min_t(u32, ecore_attr->max_wqe, QLNXR_MAX_RQE); + attr->max_qp_resp_rd_atomic_resc = ecore_attr->max_qp_resp_rd_atomic_resc; + attr->max_qp_req_rd_atomic_resc = ecore_attr->max_qp_req_rd_atomic_resc; + attr->max_dev_resp_rd_atomic_resc = + ecore_attr->max_dev_resp_rd_atomic_resc; + attr->max_cq = ecore_attr->max_cq; + attr->max_qp = ecore_attr->max_qp; + attr->max_mr = ecore_attr->max_mr; + attr->max_mr_size = ecore_attr->max_mr_size; + attr->max_cqe = min_t(u64, ecore_attr->max_cqe, QLNXR_MAX_CQES); + attr->max_mw = ecore_attr->max_mw; + attr->max_fmr = ecore_attr->max_fmr; + attr->max_mr_mw_fmr_pbl = ecore_attr->max_mr_mw_fmr_pbl; + attr->max_mr_mw_fmr_size = ecore_attr->max_mr_mw_fmr_size; + attr->max_pd = ecore_attr->max_pd; + attr->max_ah = ecore_attr->max_ah; + attr->max_pkey = ecore_attr->max_pkey; + attr->max_srq = ecore_attr->max_srq; + attr->max_srq_wr = ecore_attr->max_srq_wr; + //attr->dev_caps = ecore_attr->dev_caps; + attr->page_size_caps = ecore_attr->page_size_caps; + attr->dev_ack_delay = ecore_attr->dev_ack_delay; + attr->reserved_lkey = ecore_attr->reserved_lkey; + attr->bad_pkey_counter = ecore_attr->bad_pkey_counter; + attr->max_stats_queues = ecore_attr->max_stats_queues; + + return 0; +} + + +static int +qlnxr_init_hw(struct qlnxr_dev *dev) +{ + struct ecore_rdma_events events; + struct ecore_rdma_add_user_out_params out_params; + struct ecore_rdma_cnq_params *cur_pbl; + struct ecore_rdma_start_in_params *in_params; + dma_addr_t p_phys_table; + u32 page_cnt; + int rc = 0; + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + in_params = kzalloc(sizeof(*in_params), GFP_KERNEL); + if (!in_params) { + rc = -ENOMEM; + goto out; + } + + bzero(&out_params, sizeof(struct ecore_rdma_add_user_out_params)); + bzero(&events, sizeof(struct ecore_rdma_events)); + + in_params->desired_cnq = dev->num_cnq; + + for (i = 0; i < dev->num_cnq; i++) { + cur_pbl = &in_params->cnq_pbl_list[i]; + + page_cnt = ecore_chain_get_page_cnt(&dev->cnq_array[i].pbl); + cur_pbl->num_pbl_pages = page_cnt; + + p_phys_table = ecore_chain_get_pbl_phys(&dev->cnq_array[i].pbl); + cur_pbl->pbl_ptr = (u64)p_phys_table; + } + + events.affiliated_event = qlnxr_affiliated_event; + events.unaffiliated_event = qlnxr_unaffiliated_event; + events.context = dev; + + in_params->events = &events; + in_params->roce.cq_mode = ECORE_RDMA_CQ_MODE_32_BITS; + in_params->max_mtu = dev->ha->max_frame_size; + + + if (QLNX_IS_IWARP(dev)) { + if (delayed_ack) + in_params->iwarp.flags |= ECORE_IWARP_DA_EN; + + if (timestamp) + in_params->iwarp.flags |= ECORE_IWARP_TS_EN; + + in_params->iwarp.rcv_wnd_size = rcv_wnd_size*1024; + in_params->iwarp.crc_needed = crc_needed; + in_params->iwarp.ooo_num_rx_bufs = + (MAX_RXMIT_CONNS * in_params->iwarp.rcv_wnd_size) / + in_params->max_mtu; + + in_params->iwarp.mpa_peer2peer = peer2peer; + in_params->iwarp.mpa_rev = + mpa_enhanced ? ECORE_MPA_REV2 : ECORE_MPA_REV1; + in_params->iwarp.mpa_rtr = rtr_type; + } + + memcpy(&in_params->mac_addr[0], dev->ha->primary_mac, ETH_ALEN); + + rc = ecore_rdma_start(dev->rdma_ctx, in_params); + if (rc) + goto out; + + rc = ecore_rdma_add_user(dev->rdma_ctx, &out_params); + if (rc) + goto out; + + dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr; + dev->db_phys_addr = out_params.dpi_phys_addr; + dev->db_size = out_params.dpi_size; + dev->dpi = out_params.dpi; + + qlnxr_set_device_attr(dev); + + QL_DPRINT12(ha, + "cdev->doorbells = %p, db_phys_addr = %p db_size = 0x%x\n", + (void *)ha->cdev.doorbells, + (void *)ha->cdev.db_phys_addr, ha->cdev.db_size); + + QL_DPRINT12(ha, + "db_addr = %p db_phys_addr = %p db_size = 0x%x dpi = 0x%x\n", + (void *)dev->db_addr, (void *)dev->db_phys_addr, + dev->db_size, dev->dpi); +out: + kfree(in_params); + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static void +qlnxr_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr, + bool is_vlan, u16 vlan_id) +{ + sgid->global.subnet_prefix = OSAL_CPU_TO_BE64(0xfe80000000000000LL); + sgid->raw[8] = mac_addr[0] ^ 2; + sgid->raw[9] = mac_addr[1]; + sgid->raw[10] = mac_addr[2]; + if (is_vlan) { + sgid->raw[11] = vlan_id >> 8; + sgid->raw[12] = vlan_id & 0xff; + } else { + sgid->raw[11] = 0xff; + sgid->raw[12] = 0xfe; + } + sgid->raw[13] = mac_addr[3]; + sgid->raw[14] = mac_addr[4]; + sgid->raw[15] = mac_addr[5]; +} +static bool +qlnxr_add_sgid(struct qlnxr_dev *dev, union ib_gid *new_sgid); + +static void +qlnxr_add_ip_based_gid(struct qlnxr_dev *dev, struct ifnet *ifp) +{ + struct ifaddr *ifa; + union ib_gid gid; + + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { + + QL_DPRINT12(dev->ha, "IP address : %x\n", ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr); + ipv6_addr_set_v4mapped( + ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, + (struct in6_addr *)&gid); + QL_DPRINT12(dev->ha, "gid generated : %llx\n", gid); + + qlnxr_add_sgid(dev, &gid); + } + } + for (int i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "gid generated : %x\n", gid.raw[i]); + } +} + +static bool +qlnxr_add_sgid(struct qlnxr_dev *dev, union ib_gid *new_sgid) +{ + union ib_gid zero_sgid = { { 0 } }; + int i; + //unsigned long flags; + mtx_lock(&dev->sgid_lock); + for (i = 0; i < QLNXR_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], &zero_sgid, + sizeof(union ib_gid))) { + /* found free entry */ + memcpy(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid)); + QL_DPRINT12(dev->ha, "copying sgid : %llx\n", + *new_sgid); + mtx_unlock(&dev->sgid_lock); + //TODO ib_dispatch event here? + return true; + } else if (!memcmp(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid))) { + /* entry already present, no addition required */ + mtx_unlock(&dev->sgid_lock); + QL_DPRINT12(dev->ha, "sgid present : %llx\n", + *new_sgid); + return false; + } + } + if (i == QLNXR_MAX_SGID) { + QL_DPRINT12(dev->ha, "didn't find an empty entry in sgid_tbl\n"); + } + mtx_unlock(&dev->sgid_lock); + return false; +} + +static bool qlnxr_del_sgid(struct qlnxr_dev *dev, union ib_gid *gid) +{ + int found = false; + int i; + //unsigned long flags; + + QL_DPRINT12(dev->ha, "removing gid %llx %llx\n", + gid->global.interface_id, + gid->global.subnet_prefix); + mtx_lock(&dev->sgid_lock); + /* first is the default sgid which cannot be deleted */ + for (i = 1; i < QLNXR_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], gid, sizeof(union ib_gid))) { + /* found matching entry */ + memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid)); + found = true; + break; + } + } + mtx_unlock(&dev->sgid_lock); + + return found; +} + +#if __FreeBSD_version < 1100000 + +static inline int +is_vlan_dev(struct ifnet *ifp) +{ + return (ifp->if_type == IFT_L2VLAN); +} + +static inline uint16_t +vlan_dev_vlan_id(struct ifnet *ifp) +{ + uint16_t vtag; + + if (VLAN_TAG(ifp, &vtag) == 0) + return (vtag); + + return (0); +} + +#endif /* #if __FreeBSD_version < 1100000 */ + +static void +qlnxr_add_sgids(struct qlnxr_dev *dev) +{ + qlnx_host_t *ha = dev->ha; + u16 vlan_id; + bool is_vlan; + union ib_gid vgid; + + qlnxr_add_ip_based_gid(dev, ha->ifp); + /* MAC/VLAN base GIDs */ + is_vlan = is_vlan_dev(ha->ifp); + vlan_id = (is_vlan) ? vlan_dev_vlan_id(ha->ifp) : 0; + qlnxr_build_sgid_mac(&vgid, ha->primary_mac, is_vlan, vlan_id); + qlnxr_add_sgid(dev, &vgid); +} + +static int +qlnxr_add_default_sgid(struct qlnxr_dev *dev) +{ + /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ + union ib_gid *sgid = &dev->sgid_tbl[0]; + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + ha = dev->ha; + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + if(sgid == NULL) + QL_DPRINT12(ha, "sgid = NULL?\n"); + + sgid->global.subnet_prefix = OSAL_CPU_TO_BE64(0xfe80000000000000LL); + QL_DPRINT12(ha, "node_guid = %llx", dev->attr.node_guid); + memcpy(&sgid->raw[8], &qattr->node_guid, + sizeof(qattr->node_guid)); + //memcpy(&sgid->raw[8], &dev->attr.node_guid, + // sizeof(dev->attr.node_guid)); + QL_DPRINT12(ha, "DEFAULT sgid=[%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x]\n", + sgid->raw[0], sgid->raw[1], sgid->raw[2], sgid->raw[3], sgid->raw[4], sgid->raw[5], + sgid->raw[6], sgid->raw[7], sgid->raw[8], sgid->raw[9], sgid->raw[10], sgid->raw[11], + sgid->raw[12], sgid->raw[13], sgid->raw[14], sgid->raw[15]); + return 0; +} + +static int qlnxr_addr_event (struct qlnxr_dev *dev, + unsigned long event, + struct ifnet *ifp, + union ib_gid *gid) +{ + bool is_vlan = false; + union ib_gid vgid; + u16 vlan_id = 0xffff; + + QL_DPRINT12(dev->ha, "Link event occured\n"); + is_vlan = is_vlan_dev(dev->ha->ifp); + vlan_id = (is_vlan) ? vlan_dev_vlan_id(dev->ha->ifp) : 0; + + switch (event) { + case NETDEV_UP : + qlnxr_add_sgid(dev, gid); + if (is_vlan) { + qlnxr_build_sgid_mac(&vgid, dev->ha->primary_mac, is_vlan, vlan_id); + qlnxr_add_sgid(dev, &vgid); + } + break; + case NETDEV_DOWN : + qlnxr_del_sgid(dev, gid); + if (is_vlan) { + qlnxr_build_sgid_mac(&vgid, dev->ha->primary_mac, is_vlan, vlan_id); + qlnxr_del_sgid(dev, &vgid); + } + break; + default : + break; + } + return 1; +} + +static int qlnxr_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct ifaddr *ifa = ptr; + union ib_gid gid; + struct qlnxr_dev *dev = container_of(notifier, struct qlnxr_dev, nb_inet); + qlnx_host_t *ha = dev->ha; + + ipv6_addr_set_v4mapped( + ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, + (struct in6_addr *)&gid); + return qlnxr_addr_event(dev, event, ha->ifp, &gid); +} + +static int +qlnxr_register_inet(struct qlnxr_dev *dev) +{ + int ret; + dev->nb_inet.notifier_call = qlnxr_inetaddr_event; + ret = register_inetaddr_notifier(&dev->nb_inet); + if (ret) { + QL_DPRINT12(dev->ha, "Failed to register inetaddr\n"); + return ret; + } + /* TODO : add for CONFIG_IPV6) */ + return 0; +} + +static int +qlnxr_build_sgid_tbl(struct qlnxr_dev *dev) +{ + qlnxr_add_default_sgid(dev); + qlnxr_add_sgids(dev); + return 0; +} + +static struct qlnx_rdma_if qlnxr_drv; + +static void * +qlnxr_add(void *eth_dev) +{ + struct qlnxr_dev *dev; + int ret; + //device_t pci_dev; + qlnx_host_t *ha; + + ha = eth_dev; + + QL_DPRINT12(ha, "enter [ha = %p]\n", ha); + + dev = (struct qlnxr_dev *)ib_alloc_device(sizeof(struct qlnxr_dev)); + + if (dev == NULL) + return (NULL); + + dev->ha = eth_dev; + dev->cdev = &ha->cdev; + /* Added to extend Application support */ + dev->pdev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); + + dev->pdev->dev = *(dev->ha->pci_dev); + dev->pdev->device = pci_get_device(dev->ha->pci_dev); + dev->pdev->vendor = pci_get_vendor(dev->ha->pci_dev); + + dev->rdma_ctx = &ha->cdev.hwfns[0]; + dev->wq_multiplier = wq_multiplier; + dev->num_cnq = QLNX_NUM_CNQ; + + QL_DPRINT12(ha, + "ha = %p dev = %p ha->cdev = %p\n", + ha, dev, &ha->cdev); + QL_DPRINT12(ha, + "dev->cdev = %p dev->rdma_ctx = %p\n", + dev->cdev, dev->rdma_ctx); + + ret = qlnxr_alloc_resources(dev); + + if (ret) + goto qlnxr_add_err; + + ret = qlnxr_setup_irqs(dev); + + if (ret) { + qlnxr_free_resources(dev); + goto qlnxr_add_err; + } + + ret = qlnxr_init_hw(dev); + + if (ret) { + qlnxr_release_irqs(dev); + qlnxr_free_resources(dev); + goto qlnxr_add_err; + } + + qlnxr_register_device(dev); + for (int i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) { + if (device_create_file(&dev->ibdev.dev, qlnxr_class_attributes[i])) + goto sysfs_err; + } + qlnxr_build_sgid_tbl(dev); + //ret = qlnxr_register_inet(dev); + QL_DPRINT12(ha, "exit\n"); + if (!test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) { + QL_DPRINT12(ha, "dispatching IB_PORT_ACITVE event\n"); + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ACTIVE); + } + + return (dev); +sysfs_err: + for (int i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) { + device_remove_file(&dev->ibdev.dev, qlnxr_class_attributes[i]); + } + ib_unregister_device(&dev->ibdev); + +qlnxr_add_err: + ib_dealloc_device(&dev->ibdev); + + QL_DPRINT12(ha, "exit failed\n"); + return (NULL); +} + +static void +qlnxr_remove_sysfiles(struct qlnxr_dev *dev) +{ + int i; + for (i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) + device_remove_file(&dev->ibdev.dev, qlnxr_class_attributes[i]); +} + +static int +qlnxr_remove(void *eth_dev, void *qlnx_rdma_dev) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = qlnx_rdma_dev; + ha = eth_dev; + + if ((ha == NULL) || (dev == NULL)) + return (0); + + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_dev = %p pd_count = %d\n", + ha, qlnx_rdma_dev, dev->pd_count); + + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ERR); + + if (QLNX_IS_IWARP(dev)) { + if (dev->pd_count) + return (EBUSY); + } + + ib_unregister_device(&dev->ibdev); + + if (QLNX_IS_ROCE(dev)) { + if (dev->pd_count) + return (EBUSY); + } + + ecore_rdma_remove_user(dev->rdma_ctx, dev->dpi); + ecore_rdma_stop(dev->rdma_ctx); + + qlnxr_release_irqs(dev); + + qlnxr_free_resources(dev); + + qlnxr_remove_sysfiles(dev); + ib_dealloc_device(&dev->ibdev); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_dev = %p\n", ha, qlnx_rdma_dev); + return (0); +} + +int +qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, + uint8_t *new_mac_address) +{ + struct ecore_hwfn *p_hwfn = rdma_ctx; + struct qlnx_host *ha; + int ret = 0; + + ha = (struct qlnx_host *)(p_hwfn->p_dev); + QL_DPRINT2(ha, "enter rdma_ctx (%p)\n", rdma_ctx); + + if (old_mac_address) + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, old_mac_address); + + if (new_mac_address) + ret = ecore_llh_add_mac_filter(p_hwfn->p_dev, 0, new_mac_address); + + QL_DPRINT2(ha, "exit rdma_ctx (%p)\n", rdma_ctx); + return (ret); +} + +static void +qlnxr_mac_address_change(struct qlnxr_dev *dev) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit\n"); + + return; +} + +static void +qlnxr_notify(void *eth_dev, void *qlnx_rdma_dev, enum qlnx_rdma_event event) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = qlnx_rdma_dev; + + if (dev == NULL) + return; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter (%p, %d)\n", qlnx_rdma_dev, event); + + switch (event) { + + case QLNX_ETHDEV_UP: + if (!test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ACTIVE); + break; + + case QLNX_ETHDEV_CHANGE_ADDR: + qlnxr_mac_address_change(dev); + break; + + case QLNX_ETHDEV_DOWN: + if (test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ERR); + break; + } + + QL_DPRINT12(ha, "exit (%p, %d)\n", qlnx_rdma_dev, event); + return; +} + +static int +qlnxr_mod_load(void) +{ + int ret; + + + qlnxr_drv.add = qlnxr_add; + qlnxr_drv.remove = qlnxr_remove; + qlnxr_drv.notify = qlnxr_notify; + + ret = qlnx_rdma_register_if(&qlnxr_drv); + + return (0); +} + +static int +qlnxr_mod_unload(void) +{ + int ret; + + ret = qlnx_rdma_deregister_if(&qlnxr_drv); + return (ret); +} + +static int +qlnxr_event_handler(module_t mod, int event, void *arg) +{ + + int ret = 0; + + switch (event) { + + case MOD_LOAD: + ret = qlnxr_mod_load(); + break; + + case MOD_UNLOAD: + ret = qlnxr_mod_unload(); + break; + + default: + break; + } + + return (ret); +} + +static moduledata_t qlnxr_mod_info = { + .name = "qlnxr", + .evhand = qlnxr_event_handler, +}; + +MODULE_VERSION(qlnxr, 1); +MODULE_DEPEND(qlnxr, if_qlnxe, 1, 1, 1); +MODULE_DEPEND(qlnxr, ibcore, 1, 1, 1); + +#if __FreeBSD_version >= 1100000 +MODULE_DEPEND(qlnxr, linuxkpi, 1, 1, 1); +#endif /* #if __FreeBSD_version >= 1100000 */ + +DECLARE_MODULE(qlnxr, qlnxr_mod_info, SI_SUB_LAST, SI_ORDER_ANY); + diff --git a/sys/dev/qlnx/qlnxr/qlnxr_roce.h b/sys/dev/qlnx/qlnxr/qlnxr_roce.h new file mode 100644 index 000000000000..9a39cb5d18db --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_roce.h @@ -0,0 +1,675 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef __QLNXR_ROCE_H__ +#define __QLNXR_ROCE_H__ + + +/* + * roce completion notification queue element + */ +struct roce_cnqe { + struct regpair cq_handle; +}; + + +struct roce_cqe_responder { + struct regpair srq_wr_id; + struct regpair qp_handle; + __le32 imm_data_or_inv_r_Key; + __le32 length; + __le32 reserved0; + __le16 rq_cons; + u8 flags; +#define ROCE_CQE_RESPONDER_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_RESPONDER_TYPE_MASK 0x3 +#define ROCE_CQE_RESPONDER_TYPE_SHIFT 1 +#define ROCE_CQE_RESPONDER_INV_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_INV_FLG_SHIFT 3 +#define ROCE_CQE_RESPONDER_IMM_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_IMM_FLG_SHIFT 4 +#define ROCE_CQE_RESPONDER_RDMA_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_RDMA_FLG_SHIFT 5 +#define ROCE_CQE_RESPONDER_RESERVED2_MASK 0x3 +#define ROCE_CQE_RESPONDER_RESERVED2_SHIFT 6 + u8 status; +}; + +struct roce_cqe_requester { + __le16 sq_cons; + __le16 reserved0; + __le32 reserved1; + struct regpair qp_handle; + struct regpair reserved2; + __le32 reserved3; + __le16 reserved4; + u8 flags; +#define ROCE_CQE_REQUESTER_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_REQUESTER_TYPE_MASK 0x3 +#define ROCE_CQE_REQUESTER_TYPE_SHIFT 1 +#define ROCE_CQE_REQUESTER_RESERVED5_MASK 0x1F +#define ROCE_CQE_REQUESTER_RESERVED5_SHIFT 3 + u8 status; +}; + +struct roce_cqe_common { + struct regpair reserved0; + struct regpair qp_handle; + __le16 reserved1[7]; + u8 flags; +#define ROCE_CQE_COMMON_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_COMMON_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_COMMON_TYPE_MASK 0x3 +#define ROCE_CQE_COMMON_TYPE_SHIFT 1 +#define ROCE_CQE_COMMON_RESERVED2_MASK 0x1F +#define ROCE_CQE_COMMON_RESERVED2_SHIFT 3 + u8 status; +}; + +/* + * roce completion queue element + */ +union roce_cqe { + struct roce_cqe_responder resp; + struct roce_cqe_requester req; + struct roce_cqe_common cmn; +}; + + + + +/* + * CQE requester status enumeration + */ +enum roce_cqe_requester_status_enum { + ROCE_CQE_REQ_STS_OK, + ROCE_CQE_REQ_STS_BAD_RESPONSE_ERR, + ROCE_CQE_REQ_STS_LOCAL_LENGTH_ERR, + ROCE_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR, + ROCE_CQE_REQ_STS_LOCAL_PROTECTION_ERR, + ROCE_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR, + ROCE_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR, + ROCE_CQE_REQ_STS_REMOTE_ACCESS_ERR, + ROCE_CQE_REQ_STS_REMOTE_OPERATION_ERR, + ROCE_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR, + ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR, + ROCE_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_ROCE_CQE_REQUESTER_STATUS_ENUM +}; + + + +/* + * CQE responder status enumeration + */ +enum roce_cqe_responder_status_enum { + ROCE_CQE_RESP_STS_OK, + ROCE_CQE_RESP_STS_LOCAL_ACCESS_ERR, + ROCE_CQE_RESP_STS_LOCAL_LENGTH_ERR, + ROCE_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR, + ROCE_CQE_RESP_STS_LOCAL_PROTECTION_ERR, + ROCE_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR, + ROCE_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR, + ROCE_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_ROCE_CQE_RESPONDER_STATUS_ENUM +}; + + +/* + * CQE type enumeration + */ +enum roce_cqe_type { + ROCE_CQE_TYPE_REQUESTER, + ROCE_CQE_TYPE_RESPONDER_RQ, + ROCE_CQE_TYPE_RESPONDER_SRQ, + ROCE_CQE_TYPE_INVALID, + MAX_ROCE_CQE_TYPE +}; + + +/* + * memory window type enumeration + */ +enum roce_mw_type { + ROCE_MW_TYPE_1, + ROCE_MW_TYPE_2A, + MAX_ROCE_MW_TYPE +}; + + +struct roce_rq_sge { + struct regpair addr; + __le32 length; + __le32 flags; +#define ROCE_RQ_SGE_L_KEY_MASK 0x3FFFFFF +#define ROCE_RQ_SGE_L_KEY_SHIFT 0 +#define ROCE_RQ_SGE_NUM_SGES_MASK 0x7 +#define ROCE_RQ_SGE_NUM_SGES_SHIFT 26 +#define ROCE_RQ_SGE_RESERVED0_MASK 0x7 +#define ROCE_RQ_SGE_RESERVED0_SHIFT 29 +}; + + +struct roce_sq_atomic_wqe { + struct regpair remote_va; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_ATOMIC_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_ATOMIC_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_ATOMIC_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_ATOMIC_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_ATOMIC_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_ATOMIC_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_ATOMIC_WQE_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; + struct regpair swap_data; + __le32 r_key; + __le32 reserved2; + struct regpair cmp_data; + struct regpair reserved3; +}; + + +/* + * First element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_1st { + struct regpair remote_va; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_ATOMIC_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_ATOMIC_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_ATOMIC_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_ATOMIC_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_ATOMIC_WQE_1ST_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_2nd { + struct regpair swap_data; + __le32 r_key; + __le32 reserved2; +}; + + +/* + * Third element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_3rd { + struct regpair cmp_data; + struct regpair reserved3; +}; + + +struct roce_sq_bind_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_BIND_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_BIND_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_BIND_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_BIND_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_BIND_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_BIND_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_BIND_WQE_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_BIND_WQE_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_BIND_WQE_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_BIND_WQE_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_BIND_WQE_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_BIND_WQE_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_BIND_WQE_RESERVED1_MASK 0x7 +#define ROCE_SQ_BIND_WQE_RESERVED1_SHIFT 5 + u8 prev_wqe_size; + u8 bind_ctrl; +#define ROCE_SQ_BIND_WQE_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_BIND_WQE_ZERO_BASED_SHIFT 0 +#define ROCE_SQ_BIND_WQE_MW_TYPE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_MW_TYPE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_RESERVED2_MASK 0x3F +#define ROCE_SQ_BIND_WQE_RESERVED2_SHIFT 2 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved6; +}; + + +/* + * First element (16 bytes) of bind wqe + */ +struct roce_sq_bind_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_BIND_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_BIND_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_BIND_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_BIND_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_BIND_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_1ST_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED1_MASK 0x7 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED1_SHIFT 5 + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of bind wqe + */ +struct roce_sq_bind_wqe_2nd { + u8 bind_ctrl; +#define ROCE_SQ_BIND_WQE_2ND_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT 0 +#define ROCE_SQ_BIND_WQE_2ND_MW_TYPE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_2ND_RESERVED2_MASK 0x3F +#define ROCE_SQ_BIND_WQE_2ND_RESERVED2_SHIFT 2 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved6; +}; + + +/* + * Structure with only the SQ WQE common fields. Size is of one SQ element (16B) + */ +struct roce_sq_common_wqe { + __le32 reserved1[3]; + u8 req_type; + u8 flags; +#define ROCE_SQ_COMMON_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_COMMON_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_COMMON_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_COMMON_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_COMMON_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_COMMON_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_COMMON_WQE_RESERVED0_SHIFT 5 + u8 reserved2; + u8 prev_wqe_size; +}; + + +struct roce_sq_fmr_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_FMR_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_FMR_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_FMR_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_FMR_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_FMR_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_FMR_WQE_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_FMR_WQE_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_FMR_WQE_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_FMR_WQE_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_FMR_WQE_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_FMR_WQE_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_FMR_WQE_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_FMR_WQE_RESERVED1_MASK 0x7 +#define ROCE_SQ_FMR_WQE_RESERVED1_SHIFT 5 + u8 prev_wqe_size; + u8 fmr_ctrl; +#define ROCE_SQ_FMR_WQE_PAGE_SIZE_LOG_MASK 0x1F +#define ROCE_SQ_FMR_WQE_PAGE_SIZE_LOG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_FMR_WQE_ZERO_BASED_SHIFT 5 +#define ROCE_SQ_FMR_WQE_BIND_EN_MASK 0x1 +#define ROCE_SQ_FMR_WQE_BIND_EN_SHIFT 6 +#define ROCE_SQ_FMR_WQE_RESERVED2_MASK 0x1 +#define ROCE_SQ_FMR_WQE_RESERVED2_SHIFT 7 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; +}; + + +/* + * First element (16 bytes) of fmr wqe + */ +struct roce_sq_fmr_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_FMR_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_FMR_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_FMR_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_FMR_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_FMR_WQE_1ST_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED1_MASK 0x7 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED1_SHIFT 5 + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of fmr wqe + */ +struct roce_sq_fmr_wqe_2nd { + u8 fmr_ctrl; +#define ROCE_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_MASK 0x1F +#define ROCE_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_2ND_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_ZERO_BASED_SHIFT 5 +#define ROCE_SQ_FMR_WQE_2ND_BIND_EN_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_BIND_EN_SHIFT 6 +#define ROCE_SQ_FMR_WQE_2ND_RESERVED2_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_RESERVED2_SHIFT 7 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; +}; + + +struct roce_sq_local_inv_wqe { + struct regpair reserved; + __le32 inv_l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_LOCAL_INV_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_LOCAL_INV_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_LOCAL_INV_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_LOCAL_INV_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_LOCAL_INV_WQE_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; +}; + + +struct roce_sq_rdma_wqe { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_RDMA_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_RDMA_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_RDMA_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_RDMA_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_RDMA_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_RDMA_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_RDMA_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; + struct regpair remote_va; + __le32 r_key; + __le32 reserved1; +}; + + +/* + * First element (16 bytes) of rdma wqe + */ +struct roce_sq_rdma_wqe_1st { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_RDMA_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_RDMA_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_RDMA_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of rdma wqe + */ +struct roce_sq_rdma_wqe_2nd { + struct regpair remote_va; + __le32 r_key; + __le32 reserved1; +}; + + +/* + * SQ WQE req type enumeration + */ +enum roce_sq_req_type { + ROCE_SQ_REQ_TYPE_SEND, + ROCE_SQ_REQ_TYPE_SEND_WITH_IMM, + ROCE_SQ_REQ_TYPE_SEND_WITH_INVALIDATE, + ROCE_SQ_REQ_TYPE_RDMA_WR, + ROCE_SQ_REQ_TYPE_RDMA_WR_WITH_IMM, + ROCE_SQ_REQ_TYPE_RDMA_RD, + ROCE_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP, + ROCE_SQ_REQ_TYPE_ATOMIC_ADD, + ROCE_SQ_REQ_TYPE_LOCAL_INVALIDATE, + ROCE_SQ_REQ_TYPE_FAST_MR, + ROCE_SQ_REQ_TYPE_BIND, + ROCE_SQ_REQ_TYPE_INVALID, + MAX_ROCE_SQ_REQ_TYPE +}; + + +struct roce_sq_send_wqe { + __le32 inv_key_or_imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_SEND_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_SEND_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_SEND_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_SEND_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_SEND_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_SEND_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_SEND_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + + +struct roce_sq_sge { + __le32 length; + struct regpair addr; + __le32 l_key; +}; + + +struct roce_srq_prod { + __le16 prod; +}; + + +struct roce_srq_sge { + struct regpair addr; + __le32 length; + __le32 l_key; + struct regpair wr_id; + u8 flags; +#define ROCE_SRQ_SGE_NUM_SGES_MASK 0x3 +#define ROCE_SRQ_SGE_NUM_SGES_SHIFT 0 +#define ROCE_SRQ_SGE_RESERVED0_MASK 0x3F +#define ROCE_SRQ_SGE_RESERVED0_SHIFT 2 + u8 reserved1; + __le16 reserved2; + __le32 reserved3; +}; + + +/* + * RoCE doorbell data for SQ and RQ + */ +struct roce_pwm_val16_data { + __le16 icid; + __le16 prod_val; +}; + + +union roce_pwm_val16_data_union { + struct roce_pwm_val16_data as_struct; + __le32 as_dword; +}; + + +/* + * RoCE doorbell data for CQ + */ +struct roce_pwm_val32_data { + __le16 icid; + u8 agg_flags; + u8 params; +#define ROCE_PWM_VAL32_DATA_AGG_CMD_MASK 0x3 +#define ROCE_PWM_VAL32_DATA_AGG_CMD_SHIFT 0 +#define ROCE_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1 +#define ROCE_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2 +#define ROCE_PWM_VAL32_DATA_RESERVED_MASK 0x1F +#define ROCE_PWM_VAL32_DATA_RESERVED_SHIFT 3 + __le32 cq_cons_val; +}; + + +union roce_pwm_val32_data_union { + struct roce_pwm_val32_data as_struct; + struct regpair as_repair; +}; + +#endif /* __QLNXR_ROCE_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_user.h b/sys/dev/qlnx/qlnxr/qlnxr_user.h new file mode 100644 index 000000000000..ac6755188468 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_user.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef __QLNXR_USER_H__ +#define __QLNXR_USER_H__ + +#define QLNXR_ABI_VERSION (7) +#define QLNXR_BE_ROCE_ABI_VERSION (1) + +/* user kernel communication data structures. */ + +struct qlnxr_alloc_ucontext_resp { + u64 db_pa; + u32 db_size; + + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_srq_wr; + uint32_t sges_per_send_wr; + uint32_t sges_per_recv_wr; + uint32_t sges_per_srq_wr; + int max_cqes; + uint8_t dpm_enabled; + uint8_t wids_enabled; + uint16_t wid_count; +}; + +struct qlnxr_alloc_pd_ureq { + u64 rsvd1; +}; + +struct qlnxr_alloc_pd_uresp { + u32 pd_id; +}; + +struct qlnxr_create_cq_ureq { + uint64_t addr; /* user space virtual address of CQ buffer */ + size_t len; /* size of CQ buffer */ +}; + +struct qlnxr_create_cq_uresp { + u32 db_offset; + u16 icid; +}; + +struct qlnxr_create_qp_ureq { + u32 qp_handle_hi; + u32 qp_handle_lo; + + /* SQ */ + uint64_t sq_addr; /* user space virtual address of SQ buffer */ + size_t sq_len; /* length of SQ buffer */ + + /* RQ */ + uint64_t rq_addr; /* user space virtual address of RQ buffer */ + size_t rq_len; /* length of RQ buffer */ +}; + +struct qlnxr_create_qp_uresp { + u32 qp_id; + int atomic_supported; + + /* SQ*/ + u32 sq_db_offset; + u16 sq_icid; + + /* RQ */ + u32 rq_db_offset; + u16 rq_icid; + + u32 rq_db2_offset; +}; + +struct qlnxr_create_srq_ureq { + /* user space virtual address of producer pair */ + uint64_t prod_pair_addr; + uint64_t srq_addr; /* user space virtual address of SQ buffer */ + size_t srq_len; /* length of SQ buffer */ +}; + +struct qlnxr_create_srq_uresp { + u16 srq_id; +}; + +#endif /* #ifndef __QLNXR_USER_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c new file mode 100644 index 000000000000..54ca86977fd0 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c @@ -0,0 +1,7283 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * File: qlnxr_verbs.c + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" +#include "rdma_common.h" +#include "qlnxr_roce.h" +#include "qlnxr_cm.h" + +#define upper_32_bits(x) (uint32_t)(x >> 32) +#define lower_32_bits(x) (uint32_t)(x) +#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) + +#define TYPEPTR_ADDR_SET(type_ptr, field, vaddr) \ + do { \ + (type_ptr)->field.hi = cpu_to_le32(upper_32_bits(vaddr));\ + (type_ptr)->field.lo = cpu_to_le32(lower_32_bits(vaddr));\ + } while (0) + + +#define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ + do { \ + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->flags = cpu_to_le32(vflags); \ + } while (0) + +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ + do { \ + TYPEPTR_ADDR_SET(hdr, wr_id, vwr_id); \ + (hdr)->num_sges = num_sge; \ + } while (0) + +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ + do { \ + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->l_key = cpu_to_le32(vlkey); \ + } while (0) + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +static int +qlnxr_check_srq_params(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *attrs); + +static int +qlnxr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qlnxr_srq *srq, + struct qlnxr_create_srq_ureq *ureq, + int access, int dmasync); + +static int +qlnxr_alloc_srq_kernel_params(struct qlnxr_srq *srq, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *init_attr); + + +static int +qlnxr_copy_srq_uresp(struct qlnxr_dev *dev, + struct qlnxr_srq *srq, + struct ib_udata *udata); + +static void +qlnxr_free_srq_user_params(struct qlnxr_srq *srq); + +static void +qlnxr_free_srq_kernel_params(struct qlnxr_srq *srq); + + +static u32 +qlnxr_srq_elem_left(struct qlnxr_srq_hwq_info *hw_srq); + +int +qlnxr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *sgid) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(sgid->raw, 0, sizeof(sgid->raw)); + + memcpy(sgid->raw, dev->ha->primary_mac, sizeof (dev->ha->primary_mac)); + + QL_DPRINT12(ha, "exit\n"); + + return 0; +} + +int +qlnxr_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *sgid) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + QL_DPRINT12(ha, "enter index: %d\n", index); +#if 0 + int ret = 0; + /* @@@: if DEFINE_ROCE_GID_TABLE to be used here */ + //if (!rdma_cap_roce_gid_table(ibdev, port)) { + if (!(rdma_protocol_roce(ibdev, port) && + ibdev->add_gid && ibdev->del_gid)) { + QL_DPRINT11(ha, "acquire gid failed\n"); + return -ENODEV; + } + + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); + if (ret == -EAGAIN) { + memcpy(sgid, &zgid, sizeof(*sgid)); + return 0; + } +#endif + if ((index >= QLNXR_MAX_SGID) || (index < 0)) { + QL_DPRINT12(ha, "invalid gid index %d\n", index); + memset(sgid, 0, sizeof(*sgid)); + return -EINVAL; + } + memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); + + QL_DPRINT12(ha, "exit : %p\n", sgid); + + return 0; +} + +struct ib_srq * +qlnxr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct ecore_rdma_destroy_srq_in_params destroy_in_params; + struct ecore_rdma_create_srq_out_params out_params; + struct ecore_rdma_create_srq_in_params in_params; + u64 pbl_base_addr, phy_prod_pair_addr; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct ib_ucontext *ib_ctx = NULL; + struct qlnxr_srq_hwq_info *hw_srq; + struct qlnxr_ucontext *ctx = NULL; + struct qlnxr_create_srq_ureq ureq; + u32 page_cnt, page_size; + struct qlnxr_srq *srq; + int ret = 0; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ret = qlnxr_check_srq_params(ibpd, dev, init_attr); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + QL_DPRINT11(ha, "cannot allocate memory for srq\n"); + return NULL; //@@@ : TODO what to return here? + } + + srq->dev = dev; + hw_srq = &srq->hw_srq; + spin_lock_init(&srq->lock); + memset(&in_params, 0, sizeof(in_params)); + + if (udata && ibpd->uobject && ibpd->uobject->context) { + ib_ctx = ibpd->uobject->context; + ctx = get_qlnxr_ucontext(ib_ctx); + + memset(&ureq, 0, sizeof(ureq)); + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { + QL_DPRINT11(ha, "problem" + " copying data from user space\n"); + goto err0; + } + + ret = qlnxr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + if (ret) + goto err0; + + page_cnt = srq->usrq.pbl_info.num_pbes; + pbl_base_addr = srq->usrq.pbl_tbl->pa; + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + // @@@ : if DEFINE_IB_UMEM_PAGE_SHIFT + // page_size = BIT(srq->usrq.umem->page_shift); + // else + page_size = srq->usrq.umem->page_size; + } else { + struct ecore_chain *pbl; + ret = qlnxr_alloc_srq_kernel_params(srq, dev, init_attr); + if (ret) + goto err0; + pbl = &hw_srq->pbl; + + page_cnt = ecore_chain_get_page_cnt(pbl); + pbl_base_addr = ecore_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = pbl->elem_per_page << 4; + } + + in_params.pd_id = pd->pd_id; + in_params.pbl_base_addr = pbl_base_addr; + in_params.prod_pair_addr = phy_prod_pair_addr; + in_params.num_pages = page_cnt; + in_params.page_size = page_size; + + ret = ecore_rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); + if (ret) + goto err1; + + srq->srq_id = out_params.srq_id; + + if (udata) { + ret = qlnxr_copy_srq_uresp(dev, srq, udata); + if (ret) + goto err2; + } + + QL_DPRINT12(ha, "created srq with srq_id = 0x%0x\n", srq->srq_id); + return &srq->ibsrq; +err2: + memset(&in_params, 0, sizeof(in_params)); + destroy_in_params.srq_id = srq->srq_id; + ecore_rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); + +err1: + if (udata) + qlnxr_free_srq_user_params(srq); + else + qlnxr_free_srq_kernel_params(srq); + +err0: + kfree(srq); + return ERR_PTR(-EFAULT); +} + +int +qlnxr_destroy_srq(struct ib_srq *ibsrq) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_destroy_srq_in_params in_params; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + + memset(&in_params, 0, sizeof(in_params)); + in_params.srq_id = srq->srq_id; + + ecore_rdma_destroy_srq(dev->rdma_ctx, &in_params); + + if (ibsrq->pd->uobject && ibsrq->pd->uobject->context) + qlnxr_free_srq_user_params(srq); + else + qlnxr_free_srq_kernel_params(srq); + + QL_DPRINT12(ha, "destroyed srq_id=0x%0x\n", srq->srq_id); + kfree(srq); + return 0; +} + +int +qlnxr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_modify_srq_in_params in_params; + int ret = 0; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + if (attr_mask & IB_SRQ_MAX_WR) { + QL_DPRINT12(ha, "invalid attribute mask=0x%x" + " specified for %p\n", attr_mask, srq); + return -EINVAL; + } + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->hw_srq.max_wr) { + QL_DPRINT12(ha, "invalid srq_limit=0x%x" + " (max_srq_limit = 0x%x)\n", + attr->srq_limit, srq->hw_srq.max_wr); + return -EINVAL; + } + memset(&in_params, 0, sizeof(in_params)); + in_params.srq_id = srq->srq_id; + in_params.wqe_limit = attr->srq_limit; + ret = ecore_rdma_modify_srq(dev->rdma_ctx, &in_params); + if (ret) + return ret; + } + + QL_DPRINT12(ha, "modified srq with srq_id = 0x%0x\n", srq->srq_id); + return 0; +} + +int +qlnxr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_device *qattr; + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + //qattr = &dev->attr; + qattr = ecore_rdma_query_device(dev->rdma_ctx); + QL_DPRINT12(ha, "enter\n"); + + if (!dev->rdma_ctx) { + QL_DPRINT12(ha, "called with invalid params" + " rdma_ctx is NULL\n"); + return -EINVAL; + } + + srq_attr->srq_limit = qattr->max_srq; + srq_attr->max_wr = qattr->max_srq_wr; + srq_attr->max_sge = qattr->max_sge; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +/* Increment srq wr producer by one */ +static +void qlnxr_inc_srq_wr_prod (struct qlnxr_srq_hwq_info *info) +{ + info->wr_prod_cnt++; +} + +/* Increment srq wr consumer by one */ +static +void qlnxr_inc_srq_wr_cons(struct qlnxr_srq_hwq_info *info) +{ + info->wr_cons_cnt++; +} + +/* get_port_immutable verb is not available in FreeBSD */ +#if 0 +int +qlnxr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "entered but not implemented!!!\n"); +} +#endif + +int +qlnxr_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct qlnxr_srq_hwq_info *hw_srq; + struct ecore_chain *pbl; + unsigned long flags; + int status = 0; + u32 num_sge, offset; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + hw_srq = &srq->hw_srq; + + QL_DPRINT12(ha, "enter\n"); + spin_lock_irqsave(&srq->lock, flags); + + pbl = &srq->hw_srq.pbl; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qlnxr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + QL_DPRINT11(ha, "WR cannot be posted" + " (%d, %d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = ecore_chain_produce(pbl); + num_sge = wr->num_sge; + /* Set number of sge and WR id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, num_sge); + + /* PBL is maintained in case of WR granularity. + * So increment WR producer in case we post a WR. + */ + qlnxr_inc_srq_wr_prod(hw_srq); + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + QL_DPRINT12(ha, "SRQ WR : SGEs: %d with wr_id[%d] = %llx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge = + ecore_chain_produce(pbl); + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + QL_DPRINT12(ha, "[%d]: len %d, key %x, addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + wmb(); + /* + * SRQ prod is 8 bytes. Need to update SGE prod in index + * in first 4 bytes and need to update WQE prod in next + * 4 bytes. + */ + *(srq->hw_srq.virt_prod_pair_addr) = hw_srq->sge_prod; + offset = offsetof(struct rdma_srq_producers, wqe_prod); + *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = + hw_srq->wqe_prod; + /* Flush prod after updating it */ + wmb(); + wr = wr->next; + } + + QL_DPRINT12(ha, "Elements in SRQ: %d\n", + ecore_chain_get_elem_left(pbl)); + + spin_unlock_irqrestore(&srq->lock, flags); + QL_DPRINT12(ha, "exit\n"); + return status; +} + +int +#if __FreeBSD_version < 1102000 +qlnxr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +#else +qlnxr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *udata) +#endif /* #if __FreeBSD_version < 1102000 */ + +{ + struct qlnxr_dev *dev; + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + +#if __FreeBSD_version > 1102000 + if (udata->inlen || udata->outlen) + return -EINVAL; +#endif /* #if __FreeBSD_version > 1102000 */ + + if (dev->rdma_ctx == NULL) { + return -EINVAL; + } + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + + memset(attr, 0, sizeof *attr); + + attr->fw_ver = qattr->fw_ver; + attr->sys_image_guid = qattr->sys_image_guid; + attr->max_mr_size = qattr->max_mr_size; + attr->page_size_cap = qattr->page_size_caps; + attr->vendor_id = qattr->vendor_id; + attr->vendor_part_id = qattr->vendor_part_id; + attr->hw_ver = qattr->hw_ver; + attr->max_qp = qattr->max_qp; + attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_MGT_EXTENSIONS; + + attr->max_sge = qattr->max_sge; + attr->max_sge_rd = qattr->max_sge; + attr->max_cq = qattr->max_cq; + attr->max_cqe = qattr->max_cqe; + attr->max_mr = qattr->max_mr; + attr->max_mw = qattr->max_mw; + attr->max_pd = qattr->max_pd; + attr->atomic_cap = dev->atomic_cap; + attr->max_fmr = qattr->max_fmr; + attr->max_map_per_fmr = 16; /* TBD: FMR */ + + /* There is an implicit assumption in some of the ib_xxx apps that the + * qp_rd_atom is smaller than the qp_init_rd_atom. Specifically, in + * communication the qp_rd_atom is passed to the other side and used as + * init_rd_atom without check device capabilities for init_rd_atom. + * for this reason, we set the qp_rd_atom to be the minimum between the + * two...There is an additional assumption in mlx4 driver that the + * values are power of two, fls is performed on the value - 1, which + * in fact gives a larger power of two for values which are not a power + * of two. This should be fixed in mlx4 driver, but until then -> + * we provide a value that is a power of two in our code. + */ + attr->max_qp_init_rd_atom = + 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); + attr->max_qp_rd_atom = + min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), + attr->max_qp_init_rd_atom); + + attr->max_srq = qattr->max_srq; + attr->max_srq_sge = qattr->max_srq_sge; + attr->max_srq_wr = qattr->max_srq_wr; + + /* TODO: R&D to more properly configure the following */ + attr->local_ca_ack_delay = qattr->dev_ack_delay; + attr->max_fast_reg_page_list_len = qattr->max_mr/8; + attr->max_pkeys = QLNXR_ROCE_PKEY_MAX; + attr->max_ah = qattr->max_ah; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static inline void +get_link_speed_and_width(int speed, uint8_t *ib_speed, uint8_t *ib_width) +{ + switch (speed) { + case 1000: + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + case 10000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case 20000: + *ib_speed = IB_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case 25000: + *ib_speed = IB_SPEED_EDR; + *ib_width = IB_WIDTH_1X; + break; + + case 40000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + case 50000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; // TODO doesn't add up to 50... + break; + + case 100000: + *ib_speed = IB_SPEED_EDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + } + return; +} + +int +qlnxr_query_port(struct ib_device *ibdev, uint8_t port, + struct ib_port_attr *attr) +{ + struct qlnxr_dev *dev; + struct ecore_rdma_port *rdma_port; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (port > 1) { + QL_DPRINT12(ha, "port [%d] > 1 \n", port); + return -EINVAL; + } + + if (dev->rdma_ctx == NULL) { + QL_DPRINT12(ha, "rdma_ctx == NULL\n"); + return -EINVAL; + } + + rdma_port = ecore_rdma_query_port(dev->rdma_ctx); + memset(attr, 0, sizeof *attr); + + if (rdma_port->port_state == ECORE_RDMA_PORT_UP) { + attr->state = IB_PORT_ACTIVE; + attr->phys_state = 5; + } else { + attr->state = IB_PORT_DOWN; + attr->phys_state = 3; + } + + attr->max_mtu = IB_MTU_4096; + attr->active_mtu = iboe_get_mtu(dev->ha->ifp->if_mtu); + attr->lid = 0; + attr->lmc = 0; + attr->sm_lid = 0; + attr->sm_sl = 0; + attr->port_cap_flags = 0; + + if (QLNX_IS_IWARP(dev)) { + attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; + } else { + attr->gid_tbl_len = QLNXR_MAX_SGID; + attr->pkey_tbl_len = QLNXR_ROCE_PKEY_TABLE_LEN; + } + + attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; + attr->qkey_viol_cntr = 0; + + get_link_speed_and_width(rdma_port->link_speed, + &attr->active_speed, &attr->active_width); + + attr->max_msg_sz = rdma_port->max_msg_size; + attr->max_vl_num = 4; /* TODO -> figure this one out... */ + + QL_DPRINT12(ha, "state = %d phys_state = %d " + " link_speed = %d active_speed = %d active_width = %d" + " attr->gid_tbl_len = %d attr->pkey_tbl_len = %d" + " max_msg_sz = 0x%x max_vl_num = 0x%x \n", + attr->state, attr->phys_state, + rdma_port->link_speed, attr->active_speed, + attr->active_width, attr->gid_tbl_len, attr->pkey_tbl_len, + attr->max_msg_sz, attr->max_vl_num); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +int +qlnxr_modify_port(struct ib_device *ibdev, uint8_t port, int mask, + struct ib_port_modify *props) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (port > 1) { + QL_DPRINT12(ha, "port (%d) > 1\n", port); + return -EINVAL; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +enum rdma_link_layer +qlnxr_link_layer(struct ib_device *ibdev, uint8_t port_num) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "ibdev = %p port_num = 0x%x\n", ibdev, port_num); + + return IB_LINK_LAYER_ETHERNET; +} + +struct ib_pd * +qlnxr_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qlnxr_pd *pd = NULL; + u16 pd_id; + int rc; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "ibdev = %p context = %p" + " udata = %p enter\n", ibdev, context, udata); + + if (dev->rdma_ctx == NULL) { + QL_DPRINT11(ha, "dev->rdma_ctx = NULL\n"); + rc = -1; + goto err; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + rc = -ENOMEM; + QL_DPRINT11(ha, "kzalloc(pd) = NULL\n"); + goto err; + } + + rc = ecore_rdma_alloc_pd(dev->rdma_ctx, &pd_id); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_alloc_pd failed\n"); + goto err; + } + + pd->pd_id = pd_id; + + if (udata && context) { + + rc = ib_copy_to_udata(udata, &pd->pd_id, sizeof(pd->pd_id)); + if (rc) { + QL_DPRINT11(ha, "ib_copy_to_udata failed\n"); + ecore_rdma_free_pd(dev->rdma_ctx, pd_id); + goto err; + } + + pd->uctx = get_qlnxr_ucontext(context); + pd->uctx->pd = pd; + } + + atomic_add_rel_32(&dev->pd_count, 1); + QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", + pd, pd_id, dev->pd_count); + + return &pd->ibpd; + +err: + kfree(pd); + QL_DPRINT12(ha, "exit -1\n"); + return ERR_PTR(rc); +} + +int +qlnxr_dealloc_pd(struct ib_pd *ibpd) +{ + struct qlnxr_pd *pd; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + pd = get_qlnxr_pd(ibpd); + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (pd == NULL) { + QL_DPRINT11(ha, "pd = NULL\n"); + } else { + ecore_rdma_free_pd(dev->rdma_ctx, pd->pd_id); + kfree(pd); + atomic_subtract_rel_32(&dev->pd_count, 1); + QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", + pd, pd->pd_id, dev->pd_count); + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +#define ROCE_WQE_ELEM_SIZE sizeof(struct rdma_sq_sge) +#define RDMA_MAX_SGE_PER_SRQ (4) /* Should be part of HSI */ +/* Should be part of HSI */ +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1) /* +1 for header */ +#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) + +static void qlnxr_cleanup_user(struct qlnxr_dev *, struct qlnxr_qp *); +static void qlnxr_cleanup_kernel(struct qlnxr_dev *, struct qlnxr_qp *); + +int +qlnxr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter index = 0x%x\n", index); + + if (index > QLNXR_ROCE_PKEY_TABLE_LEN) + return -EINVAL; + + *pkey = QLNXR_ROCE_PKEY_DEFAULT; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static inline bool +qlnxr_get_vlan_id_qp(qlnx_host_t *ha, struct ib_qp_attr *attr, int attr_mask, + u16 *vlan_id) +{ + bool ret = false; + + QL_DPRINT12(ha, "enter \n"); + + *vlan_id = 0; + +#if __FreeBSD_version >= 1100000 + u16 tmp_vlan_id; + +#if __FreeBSD_version >= 1102000 + union ib_gid *dgid; + + dgid = &attr->ah_attr.grh.dgid; + tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12]; + + if (!(tmp_vlan_id & ~EVL_VLID_MASK)) { + *vlan_id = tmp_vlan_id; + ret = true; + } +#else + tmp_vlan_id = attr->vlan_id; + + if ((attr_mask & IB_QP_VID) && (!(tmp_vlan_id & ~EVL_VLID_MASK))) { + *vlan_id = tmp_vlan_id; + ret = true; + } + +#endif /* #if __FreeBSD_version > 1102000 */ + +#else + ret = true; + +#endif /* #if __FreeBSD_version >= 1100000 */ + + QL_DPRINT12(ha, "exit vlan_id = 0x%x ret = %d \n", *vlan_id, ret); + + return (ret); +} + +static inline void +get_gid_info(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, + struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_modify_qp_in_params *qp_params) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memcpy(&qp_params->sgid.bytes[0], + &dev->sgid_tbl[qp->sgid_idx].raw[0], + sizeof(qp_params->sgid.bytes)); + memcpy(&qp_params->dgid.bytes[0], + &attr->ah_attr.grh.dgid.raw[0], + sizeof(qp_params->dgid)); + + qlnxr_get_vlan_id_qp(ha, attr, attr_mask, &qp_params->vlan_id); + + for (i = 0; i < (sizeof(qp_params->sgid.dwords)/sizeof(uint32_t)); i++) { + qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); + qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + + + +static int +qlnxr_add_mmap(struct qlnxr_ucontext *uctx, u64 phy_addr, unsigned long len) +{ + struct qlnxr_mm *mm; + qlnx_host_t *ha; + + ha = uctx->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (mm == NULL) { + QL_DPRINT11(ha, "mm = NULL\n"); + return -ENOMEM; + } + + mm->key.phy_addr = phy_addr; + + /* This function might be called with a length which is not a multiple + * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel + * forces this granularity by increasing the requested size if needed. + * When qedr_mmap is called, it will search the list with the updated + * length as a key. To prevent search failures, the length is rounded up + * in advance to PAGE_SIZE. + */ + mm->key.len = roundup(len, PAGE_SIZE); + INIT_LIST_HEAD(&mm->entry); + + mutex_lock(&uctx->mm_list_lock); + list_add(&mm->entry, &uctx->mm_head); + mutex_unlock(&uctx->mm_list_lock); + + QL_DPRINT12(ha, "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", + (unsigned long long)mm->key.phy_addr, + (unsigned long)mm->key.len, uctx); + + return 0; +} + +static bool +qlnxr_search_mmap(struct qlnxr_ucontext *uctx, u64 phy_addr, unsigned long len) +{ + bool found = false; + struct qlnxr_mm *mm; + qlnx_host_t *ha; + + ha = uctx->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mutex_lock(&uctx->mm_list_lock); + list_for_each_entry(mm, &uctx->mm_head, entry) { + if (len != mm->key.len || phy_addr != mm->key.phy_addr) + continue; + + found = true; + break; + } + mutex_unlock(&uctx->mm_list_lock); + + QL_DPRINT12(ha, + "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, found=%d\n", + mm->key.phy_addr, mm->key.len, uctx, found); + + return found; +} + +struct +ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + int rc; + struct qlnxr_ucontext *ctx; + struct qlnxr_alloc_ucontext_resp uresp; + struct qlnxr_dev *dev = get_qlnxr_dev(ibdev); + qlnx_host_t *ha = dev->ha; + struct ecore_rdma_add_user_out_params oparams; + + if (!udata) { + return ERR_PTR(-EFAULT); + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + rc = ecore_rdma_add_user(dev->rdma_ctx, &oparams); + if (rc) { + QL_DPRINT12(ha, + "Failed to allocate a DPI for a new RoCE application " + ",rc = %d. To overcome this, consider to increase " + "the number of DPIs, increase the doorbell BAR size " + "or just close unnecessary RoCE applications. In " + "order to increase the number of DPIs consult the " + "README\n", rc); + goto err; + } + + ctx->dpi = oparams.dpi; + ctx->dpi_addr = oparams.dpi_addr; + ctx->dpi_phys_addr = oparams.dpi_phys_addr; + ctx->dpi_size = oparams.dpi_size; + INIT_LIST_HEAD(&ctx->mm_head); + mutex_init(&ctx->mm_list_lock); + + memset(&uresp, 0, sizeof(uresp)); + uresp.dpm_enabled = offsetof(struct qlnxr_alloc_ucontext_resp, dpm_enabled) + < udata->outlen ? dev->user_dpm_enabled : 0; //TODO: figure this out + uresp.wids_enabled = offsetof(struct qlnxr_alloc_ucontext_resp, wids_enabled) + < udata->outlen ? 1 : 0; //TODO: figure this out + uresp.wid_count = offsetof(struct qlnxr_alloc_ucontext_resp, wid_count) + < udata->outlen ? oparams.wid_count : 0; //TODO: figure this out + uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_size = ctx->dpi_size; + uresp.max_send_wr = dev->attr.max_sqe; + uresp.max_recv_wr = dev->attr.max_rqe; + uresp.max_srq_wr = dev->attr.max_srq_wr; + uresp.sges_per_send_wr = QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + uresp.sges_per_recv_wr = QLNXR_MAX_RQE_ELEMENTS_PER_RQE; + uresp.sges_per_srq_wr = dev->attr.max_srq_sge; + uresp.max_cqes = QLNXR_MAX_CQES; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + goto err; + + ctx->dev = dev; + + rc = qlnxr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); + if (rc) + goto err; + QL_DPRINT12(ha, "Allocated user context %p\n", + &ctx->ibucontext); + + return &ctx->ibucontext; +err: + kfree(ctx); + return ERR_PTR(rc); +} + +int +qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + struct qlnxr_ucontext *uctx = get_qlnxr_ucontext(ibctx); + struct qlnxr_dev *dev = uctx->dev; + qlnx_host_t *ha = dev->ha; + struct qlnxr_mm *mm, *tmp; + int status = 0; + + QL_DPRINT12(ha, "Deallocating user context %p\n", + uctx); + + if (dev) { + ecore_rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); + } + + list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { + QL_DPRINT12(ha, "deleted addr= 0x%llx, len = 0x%lx for" + " ctx=%p\n", + mm->key.phy_addr, mm->key.len, uctx); + list_del(&mm->entry); + kfree(mm); + } + kfree(uctx); + return status; +} + +int +qlnxr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct qlnxr_ucontext *ucontext = get_qlnxr_ucontext(context); + struct qlnxr_dev *dev = get_qlnxr_dev((context->device)); + unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; + u64 unmapped_db; + unsigned long len = (vma->vm_end - vma->vm_start); + int rc = 0; + bool found; + qlnx_host_t *ha; + + ha = dev->ha; + +#if __FreeBSD_version > 1102000 + unmapped_db = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); +#else + unmapped_db = dev->db_phys_addr; +#endif /* #if __FreeBSD_version > 1102000 */ + + QL_DPRINT12(ha, "qedr_mmap enter vm_page=0x%lx" + " vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", + vm_page, vma->vm_pgoff, unmapped_db, + dev->db_size, len); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + QL_DPRINT11(ha, "Vma_start not page aligned " + "vm_start = %ld vma_end = %ld\n", vma->vm_start, + vma->vm_end); + return -EINVAL; + } + + found = qlnxr_search_mmap(ucontext, vm_page, len); + if (!found) { + QL_DPRINT11(ha, "Vma_pgoff not found in mapped array = %ld\n", + vma->vm_pgoff); + return -EINVAL; + } + + QL_DPRINT12(ha, "Mapping doorbell bar\n"); + +#if __FreeBSD_version > 1102000 + + if ((vm_page < unmapped_db) || + ((vm_page + len) > (unmapped_db + ucontext->dpi_size))) { + QL_DPRINT11(ha, "failed pages are outside of dpi;" + "page address=0x%lx, unmapped_db=0x%lx, dpi_size=0x%x\n", + vm_page, unmapped_db, ucontext->dpi_size); + return -EINVAL; + } + + if (vma->vm_flags & VM_READ) { + QL_DPRINT11(ha, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); + +#else + + if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + + dev->db_size))) { + + QL_DPRINT12(ha, "Mapping doorbell bar\n"); + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + PAGE_SIZE, vma->vm_page_prot); + } else { + QL_DPRINT12(ha, "Mapping chains\n"); + rc = io_remap_pfn_range(vma, vma->vm_start, + vma->vm_pgoff, len, vma->vm_page_prot); + } + +#endif /* #if __FreeBSD_version > 1102000 */ + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +struct ib_mr * +qlnxr_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + struct qlnxr_mr *mr; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (acc & IB_ACCESS_MW_BIND) { + QL_DPRINT12(ha, "Unsupported access flags received for dma mr\n"); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + rc = -ENOMEM; + QL_DPRINT12(ha, "kzalloc(mr) failed %d\n", rc); + goto err0; + } + + mr->type = QLNXR_MR_DMA; + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_alloc_tid failed %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.dma_mr = true; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_register_tid failed %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) { + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + } + + QL_DPRINT12(ha, "lkey = %x\n", mr->ibmr.lkey); + + return &mr->ibmr; + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + kfree(mr); +err0: + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return ERR_PTR(rc); +} + +static void +qlnxr_free_pbl(struct qlnxr_dev *dev, struct qlnxr_pbl_info *pbl_info, + struct qlnxr_pbl *pbl) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + for (i = 0; i < pbl_info->num_pbls; i++) { + if (!pbl[i].va) + continue; + qlnx_dma_free_coherent(&dev->ha->cdev, pbl[i].va, pbl[i].pa, + pbl_info->pbl_size); + } + kfree(pbl); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +#define MIN_FW_PBL_PAGE_SIZE (4*1024) +#define MAX_FW_PBL_PAGE_SIZE (64*1024) + +#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) +#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) +#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE*MAX_PBES_ON_PAGE) + +static struct qlnxr_pbl * +qlnxr_alloc_pbl_tbl(struct qlnxr_dev *dev, + struct qlnxr_pbl_info *pbl_info, gfp_t flags) +{ + void *va; + dma_addr_t pa; + dma_addr_t *pbl_main_tbl; + struct qlnxr_pbl *pbl_table; + int i, rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pbl_table = kzalloc(sizeof(*pbl_table) * pbl_info->num_pbls, flags); + + if (!pbl_table) { + QL_DPRINT12(ha, "pbl_table = NULL\n"); + return NULL; + } + + for (i = 0; i < pbl_info->num_pbls; i++) { + va = qlnx_dma_alloc_coherent(&dev->ha->cdev, &pa, pbl_info->pbl_size); + if (!va) { + QL_DPRINT11(ha, "Failed to allocate pbl#%d\n", i); + rc = -ENOMEM; + goto err; + } + memset(va, 0, pbl_info->pbl_size); + pbl_table[i].va = va; + pbl_table[i].pa = pa; + } + + /* Two-Layer PBLs, if we have more than one pbl we need to initialize + * the first one with physical pointers to all of the rest + */ + pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; + for (i = 0; i < pbl_info->num_pbls - 1; i++) + pbl_main_tbl[i] = pbl_table[i + 1].pa; + + QL_DPRINT12(ha, "exit\n"); + return pbl_table; + +err: + qlnxr_free_pbl(dev, pbl_info, pbl_table); + + QL_DPRINT12(ha, "exit with error\n"); + return NULL; +} + +static int +qlnxr_prepare_pbl_tbl(struct qlnxr_dev *dev, + struct qlnxr_pbl_info *pbl_info, + u32 num_pbes, + int two_layer_capable) +{ + u32 pbl_capacity; + u32 pbl_size; + u32 num_pbls; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { + if (num_pbes > MAX_PBES_TWO_LAYER) { + QL_DPRINT11(ha, "prepare pbl table: too many pages %d\n", + num_pbes); + return -EINVAL; + } + + /* calculate required pbl page size */ + pbl_size = MIN_FW_PBL_PAGE_SIZE; + pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * + NUM_PBES_ON_PAGE(pbl_size); + + while (pbl_capacity < num_pbes) { + pbl_size *= 2; + pbl_capacity = pbl_size / sizeof(u64); + pbl_capacity = pbl_capacity * pbl_capacity; + } + + num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); + num_pbls++; /* One for the layer0 ( points to the pbls) */ + pbl_info->two_layered = true; + } else { + /* One layered PBL */ + num_pbls = 1; + pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, \ + roundup_pow_of_two((num_pbes * sizeof(u64)))); + pbl_info->two_layered = false; + } + + pbl_info->num_pbls = num_pbls; + pbl_info->pbl_size = pbl_size; + pbl_info->num_pbes = num_pbes; + + QL_DPRINT12(ha, "prepare pbl table: num_pbes=%d, num_pbls=%d pbl_size=%d\n", + pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); + + return 0; +} + +#define upper_32_bits(x) (uint32_t)(x >> 32) +#define lower_32_bits(x) (uint32_t)(x) + +static void +qlnxr_populate_pbls(struct qlnxr_dev *dev, struct ib_umem *umem, + struct qlnxr_pbl *pbl, struct qlnxr_pbl_info *pbl_info) +{ + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl; + struct scatterlist *sg; + int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + qlnx_host_t *ha; + +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + int i; + struct ib_umem_chunk *chunk = NULL; +#else + int entry; +#endif + + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!pbl_info) { + QL_DPRINT11(ha, "PBL_INFO not initialized\n"); + return; + } + + if (!pbl_info->num_pbes) { + QL_DPRINT11(ha, "pbl_info->num_pbes == 0\n"); + return; + } + + /* If we have a two layered pbl, the first pbl points to the rest + * of the pbls and the first entry lays on the second pbl in the table + */ + if (pbl_info->two_layered) + pbl_tbl = &pbl[1]; + else + pbl_tbl = pbl; + + pbe = (struct regpair *)pbl_tbl->va; + if (!pbe) { + QL_DPRINT12(ha, "pbe is NULL\n"); + return; + } + + pbe_cnt = 0; + + shift = ilog2(umem->page_size); + +#ifndef DEFINE_IB_UMEM_WITH_CHUNK + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + +#else + list_for_each_entry(chunk, &umem->chunk_list, list) { + /* get all the dma regions from the chunk. */ + for (i = 0; i < chunk->nmap; i++) { + sg = &chunk->page_list[i]; +#endif + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->lo = + cpu_to_le32(sg_dma_address(sg) + + (umem->page_size * pg_cnt)); + pbe->hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address(sg) + + umem->page_size * pg_cnt))); + + QL_DPRINT12(ha, + "Populate pbl table:" + " pbe->addr=0x%x:0x%x " + " pbe_cnt = %d total_num_pbes=%d" + " pbe=%p\n", pbe->lo, pbe->hi, pbe_cnt, + total_num_pbes, pbe); + + pbe_cnt ++; + total_num_pbes ++; + pbe++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; + } + } +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + } +#endif + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +free_mr_info(struct qlnxr_dev *dev, struct mr_info *info) +{ + struct qlnxr_pbl *pbl, *tmp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (info->pbl_table) + list_add_tail(&info->pbl_table->list_entry, + &info->free_pbl_list); + + if (!list_empty(&info->inuse_pbl_list)) + list_splice(&info->inuse_pbl_list, &info->free_pbl_list); + + list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { + list_del(&pbl->list_entry); + qlnxr_free_pbl(dev, &info->pbl_info, pbl); + } + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_init_mr_info(struct qlnxr_dev *dev, struct mr_info *info, + size_t page_list_len, bool two_layered) +{ + int rc; + struct qlnxr_pbl *tmp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + INIT_LIST_HEAD(&info->free_pbl_list); + INIT_LIST_HEAD(&info->inuse_pbl_list); + + rc = qlnxr_prepare_pbl_tbl(dev, &info->pbl_info, + page_list_len, two_layered); + if (rc) { + QL_DPRINT11(ha, "qlnxr_prepare_pbl_tbl [%d]\n", rc); + goto done; + } + + info->pbl_table = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + + if (!info->pbl_table) { + rc = -ENOMEM; + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl returned NULL\n"); + goto done; + } + + QL_DPRINT12(ha, "pbl_table_pa = %pa\n", &info->pbl_table->pa); + + /* in usual case we use 2 PBLs, so we add one to free + * list and allocating another one + */ + tmp = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + + if (!tmp) { + QL_DPRINT11(ha, "Extra PBL is not allocated\n"); + goto done; /* it's OK if second allocation fails, so rc = 0*/ + } + + list_add_tail(&tmp->list_entry, &info->free_pbl_list); + + QL_DPRINT12(ha, "extra pbl_table_pa = %pa\n", &tmp->pa); + +done: + if (rc) + free_mr_info(dev, info); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return rc; +} + + +struct ib_mr * +#if __FreeBSD_version >= 1102000 +qlnxr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata) +#else +qlnxr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata, int mr_id) +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + int rc = -ENOMEM; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + struct qlnxr_pd *pd; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pd = get_qlnxr_pd(ibpd); + + QL_DPRINT12(ha, "qedr_register user mr pd = %d" + " start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", + pd->pd_id, start, len, usr_addr, acc); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + QL_DPRINT11(ha, + "(acc & IB_ACCESS_REMOTE_WRITE &&" + " !(acc & IB_ACCESS_LOCAL_WRITE))\n"); + return ERR_PTR(-EINVAL); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->type = QLNXR_MR_USER; + + mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + if (IS_ERR(mr->umem)) { + rc = -EFAULT; + QL_DPRINT11(ha, "ib_umem_get failed [%p]\n", mr->umem); + goto err0; + } + + rc = qlnxr_init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + if (rc) { + QL_DPRINT11(ha, + "qlnxr_init_mr_info failed [%d]\n", rc); + goto err1; + } + + qlnxr_populate_pbls(dev, mr->umem, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + + if (rc) { + QL_DPRINT11(ha, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(mr->umem->page_size); /* for the MR pages */ + +#if __FreeBSD_version >= 1102000 + mr->hw_mr.fbo = ib_umem_offset(mr->umem); +#else + mr->hw_mr.fbo = mr->umem->offset; +#endif + mr->hw_mr.length = len; + mr->hw_mr.vaddr = usr_addr; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = false; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + QL_DPRINT12(ha, "register user mr lkey: %x\n", mr->ibmr.lkey); + + return (&mr->ibmr); + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return (ERR_PTR(rc)); +} + +int +qlnxr_dereg_mr(struct ib_mr *ib_mr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(ib_mr); + struct qlnxr_dev *dev = get_qlnxr_dev((ib_mr->device)); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if ((mr->type != QLNXR_MR_DMA) && (mr->type != QLNXR_MR_FRMR)) + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + + /* it could be user registered memory. */ + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr->pages); + + kfree(mr); + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static int +qlnxr_copy_cq_uresp(struct qlnxr_dev *dev, + struct qlnxr_cq *cq, struct ib_udata *udata) +{ + struct qlnxr_create_cq_uresp uresp; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&uresp, 0, sizeof(uresp)); + + uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.icid = cq->icid; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + if (rc) { + QL_DPRINT12(ha, "ib_copy_to_udata error cqid=0x%x[%d]\n", + cq->icid, rc); + } + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static void +consume_cqe(struct qlnxr_cq *cq) +{ + + if (cq->latest_cqe == cq->toggle_cqe) + cq->pbl_toggle ^= RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK; + + cq->latest_cqe = ecore_chain_consume(&cq->pbl); +} + +static inline int +qlnxr_align_cq_entries(int entries) +{ + u64 size, aligned_size; + + /* We allocate an extra entry that we don't report to the FW. + * Why? + * The CQE size is 32 bytes but the FW writes in chunks of 64 bytes + * (for performance purposes). Allocating an extra entry and telling + * the FW we have less prevents overwriting the first entry in case of + * a wrap i.e. when the FW writes the last entry and the application + * hasn't read the first one. + */ + size = (entries + 1) * QLNXR_CQE_SIZE; + + /* We align to PAGE_SIZE. + * Why? + * Since the CQ is going to be mapped and the mapping is anyhow in whole + * kernel pages we benefit from the possibly extra CQEs. + */ + aligned_size = ALIGN(size, PAGE_SIZE); + + /* note: for CQs created in user space the result of this function + * should match the size mapped in user space + */ + return (aligned_size / QLNXR_CQE_SIZE); +} + +static inline int +qlnxr_init_user_queue(struct ib_ucontext *ib_ctx, struct qlnxr_dev *dev, + struct qlnxr_userq *q, u64 buf_addr, size_t buf_len, + int access, int dmasync, int alloc_and_init) +{ + int page_cnt; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + q->buf_addr = buf_addr; + q->buf_len = buf_len; + + QL_DPRINT12(ha, "buf_addr : %llx, buf_len : %x, access : %x" + " dmasync : %x\n", q->buf_addr, q->buf_len, + access, dmasync); + + q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); + + if (IS_ERR(q->umem)) { + QL_DPRINT11(ha, "ib_umem_get failed [%lx]\n", PTR_ERR(q->umem)); + return PTR_ERR(q->umem); + } + + page_cnt = ib_umem_page_count(q->umem); + rc = qlnxr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, + 0 /* SQ and RQ don't support dual layer pbl. + * CQ may, but this is yet uncoded. + */); + if (rc) { + QL_DPRINT11(ha, "qlnxr_prepare_pbl_tbl failed [%d]\n", rc); + goto err; + } + + if (alloc_and_init) { + q->pbl_tbl = qlnxr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); + + if (!q->pbl_tbl) { + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl failed\n"); + rc = -ENOMEM; + goto err; + } + + qlnxr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info); + } else { + q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); + + if (!q->pbl_tbl) { + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl failed\n"); + rc = -ENOMEM; + goto err; + } + } + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err: + ib_umem_release(q->umem); + q->umem = NULL; + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +#if __FreeBSD_version >= 1102000 + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) + +#else + +#if __FreeBSD_version >= 1100000 + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) + +#else + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + int entries, + int vector, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) +#endif /* #if __FreeBSD_version >= 1100000 */ + +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_ucontext *ctx; + struct ecore_rdma_destroy_cq_out_params destroy_oparams; + struct ecore_rdma_destroy_cq_in_params destroy_iparams; + struct qlnxr_dev *dev; + struct ecore_rdma_create_cq_in_params params; + struct qlnxr_create_cq_ureq ureq; + +#if __FreeBSD_version >= 1100000 + int vector = attr->comp_vector; + int entries = attr->cqe; +#endif + struct qlnxr_cq *cq; + int chain_entries, rc, page_cnt; + u64 pbl_ptr; + u16 icid; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "called from %s. entries = %d, " + "vector = %d\n", + (udata ? "User Lib" : "Kernel"), entries, vector); + + memset(¶ms, 0, sizeof(struct ecore_rdma_create_cq_in_params)); + memset(&destroy_iparams, 0, sizeof(struct ecore_rdma_destroy_cq_in_params)); + memset(&destroy_oparams, 0, sizeof(struct ecore_rdma_destroy_cq_out_params)); + + if (entries > QLNXR_MAX_CQES) { + QL_DPRINT11(ha, + "the number of entries %d is too high. " + "Must be equal or below %d.\n", + entries, QLNXR_MAX_CQES); + return ERR_PTR(-EINVAL); + } + chain_entries = qlnxr_align_cq_entries(entries); + chain_entries = min_t(int, chain_entries, QLNXR_MAX_CQES); + + cq = qlnx_zalloc((sizeof(struct qlnxr_cq))); + + if (!cq) + return ERR_PTR(-ENOMEM); + + if (udata) { + memset(&ureq, 0, sizeof(ureq)); + + if (ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen))) { + QL_DPRINT11(ha, "ib_copy_from_udata failed\n"); + goto err0; + } + + if (!ureq.len) { + QL_DPRINT11(ha, "ureq.len == 0\n"); + goto err0; + } + + cq->cq_type = QLNXR_CQ_TYPE_USER; + + qlnxr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, ureq.len, + IB_ACCESS_LOCAL_WRITE, 1, 1); + + pbl_ptr = cq->q.pbl_tbl->pa; + page_cnt = cq->q.pbl_info.num_pbes; + cq->ibcq.cqe = chain_entries; + } else { + cq->cq_type = QLNXR_CQ_TYPE_KERNEL; + + rc = ecore_chain_alloc(&dev->ha->cdev, + ECORE_CHAIN_USE_TO_CONSUME, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + chain_entries, + sizeof(union roce_cqe), + &cq->pbl, NULL); + + if (rc) + goto err1; + + page_cnt = ecore_chain_get_page_cnt(&cq->pbl); + pbl_ptr = ecore_chain_get_pbl_phys(&cq->pbl); + cq->ibcq.cqe = cq->pbl.capacity; + } + + params.cq_handle_hi = upper_32_bits((uintptr_t)cq); + params.cq_handle_lo = lower_32_bits((uintptr_t)cq); + params.cnq_id = vector; + params.cq_size = chain_entries - 1; + params.pbl_num_pages = page_cnt; + params.pbl_ptr = pbl_ptr; + params.pbl_two_level = 0; + + if (ib_ctx != NULL) { + ctx = get_qlnxr_ucontext(ib_ctx); + params.dpi = ctx->dpi; + } else { + params.dpi = dev->dpi; + } + + rc = ecore_rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); + if (rc) + goto err2; + + cq->icid = icid; + cq->sig = QLNXR_CQ_MAGIC_NUMBER; + spin_lock_init(&cq->cq_lock); + + if (ib_ctx) { + rc = qlnxr_copy_cq_uresp(dev, cq, udata); + if (rc) + goto err3; + } else { + /* Generate doorbell address. + * Configure bits 3-9 with DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT. + * TODO: consider moving to device scope as it is a function of + * the device. + * TODO: add ifdef if plan to support 16 bit. + */ + cq->db_addr = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + cq->db.data.icid = cq->icid; + cq->db.data.params = DB_AGG_CMD_SET << + RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; + + /* point to the very last element, passing it we will toggle */ + cq->toggle_cqe = ecore_chain_get_last_elem(&cq->pbl); + cq->pbl_toggle = RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK; + + /* must be different from pbl_toggle */ + cq->latest_cqe = NULL; + consume_cqe(cq); + cq->cq_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + } + + QL_DPRINT12(ha, "exit icid = 0x%0x, addr = %p," + " number of entries = 0x%x\n", + cq->icid, cq, params.cq_size); + QL_DPRINT12(ha,"cq_addr = %p\n", cq); + return &cq->ibcq; + +err3: + destroy_iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); +err2: + if (udata) + qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + else + ecore_chain_free(&dev->ha->cdev, &cq->pbl); +err1: + if (udata) + ib_umem_release(cq->q.umem); +err0: + kfree(cq); + + QL_DPRINT12(ha, "exit error\n"); + + return ERR_PTR(-EINVAL); +} + +int qlnxr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) +{ + int status = 0; + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit\n"); + + return status; +} + +int +qlnxr_destroy_cq(struct ib_cq *ibcq) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + struct ecore_rdma_destroy_cq_out_params oparams; + struct ecore_rdma_destroy_cq_in_params iparams; + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter cq_id = %d\n", cq->icid); + + cq->destroyed = 1; + + /* TODO: Syncronize irq of the CNQ the CQ belongs to for validation + * that all completions with notification are dealt with. The rest + * of the completions are not interesting + */ + + /* GSIs CQs are handled by driver, so they don't exist in the FW */ + + if (cq->cq_type != QLNXR_CQ_TYPE_GSI) { + + iparams.icid = cq->icid; + + rc = ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_destroy_cq failed cq_id = %d\n", + cq->icid); + return rc; + } + + QL_DPRINT12(ha, "free cq->pbl cq_id = %d\n", cq->icid); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + } + + if (ibcq->uobject && ibcq->uobject->context) { + qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + ib_umem_release(cq->q.umem); + } + + cq->sig = ~cq->sig; + + kfree(cq); + + QL_DPRINT12(ha, "exit cq_id = %d\n", cq->icid); + + return rc; +} + +static int +qlnxr_check_qp_attrs(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + QL_DPRINT12(ha, "attrs->sq_sig_type = %d\n", attrs->sq_sig_type); + QL_DPRINT12(ha, "attrs->qp_type = %d\n", attrs->qp_type); + QL_DPRINT12(ha, "attrs->create_flags = %d\n", attrs->create_flags); + +#if __FreeBSD_version < 1102000 + QL_DPRINT12(ha, "attrs->qpg_type = %d\n", attrs->qpg_type); +#endif + + QL_DPRINT12(ha, "attrs->port_num = %d\n", attrs->port_num); + QL_DPRINT12(ha, "attrs->cap.max_send_wr = 0x%x\n", attrs->cap.max_send_wr); + QL_DPRINT12(ha, "attrs->cap.max_recv_wr = 0x%x\n", attrs->cap.max_recv_wr); + QL_DPRINT12(ha, "attrs->cap.max_send_sge = 0x%x\n", attrs->cap.max_send_sge); + QL_DPRINT12(ha, "attrs->cap.max_recv_sge = 0x%x\n", attrs->cap.max_recv_sge); + QL_DPRINT12(ha, "attrs->cap.max_inline_data = 0x%x\n", + attrs->cap.max_inline_data); + +#if __FreeBSD_version < 1102000 + QL_DPRINT12(ha, "attrs->cap.qpg_tss_mask_sz = 0x%x\n", + attrs->cap.qpg_tss_mask_sz); +#endif + + QL_DPRINT12(ha, "\n\nqattr->vendor_id = 0x%x\n", qattr->vendor_id); + QL_DPRINT12(ha, "qattr->vendor_part_id = 0x%x\n", qattr->vendor_part_id); + QL_DPRINT12(ha, "qattr->hw_ver = 0x%x\n", qattr->hw_ver); + QL_DPRINT12(ha, "qattr->fw_ver = %p\n", (void *)qattr->fw_ver); + QL_DPRINT12(ha, "qattr->node_guid = %p\n", (void *)qattr->node_guid); + QL_DPRINT12(ha, "qattr->sys_image_guid = %p\n", + (void *)qattr->sys_image_guid); + QL_DPRINT12(ha, "qattr->max_cnq = 0x%x\n", qattr->max_cnq); + QL_DPRINT12(ha, "qattr->max_sge = 0x%x\n", qattr->max_sge); + QL_DPRINT12(ha, "qattr->max_srq_sge = 0x%x\n", qattr->max_srq_sge); + QL_DPRINT12(ha, "qattr->max_inline = 0x%x\n", qattr->max_inline); + QL_DPRINT12(ha, "qattr->max_wqe = 0x%x\n", qattr->max_wqe); + QL_DPRINT12(ha, "qattr->max_srq_wqe = 0x%x\n", qattr->max_srq_wqe); + QL_DPRINT12(ha, "qattr->max_qp_resp_rd_atomic_resc = 0x%x\n", + qattr->max_qp_resp_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_qp_req_rd_atomic_resc = 0x%x\n", + qattr->max_qp_req_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_dev_resp_rd_atomic_resc = 0x%x\n", + qattr->max_dev_resp_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_cq = 0x%x\n", qattr->max_cq); + QL_DPRINT12(ha, "qattr->max_qp = 0x%x\n", qattr->max_qp); + QL_DPRINT12(ha, "qattr->max_srq = 0x%x\n", qattr->max_srq); + QL_DPRINT12(ha, "qattr->max_mr = 0x%x\n", qattr->max_mr); + QL_DPRINT12(ha, "qattr->max_mr_size = %p\n", (void *)qattr->max_mr_size); + QL_DPRINT12(ha, "qattr->max_cqe = 0x%x\n", qattr->max_cqe); + QL_DPRINT12(ha, "qattr->max_mw = 0x%x\n", qattr->max_mw); + QL_DPRINT12(ha, "qattr->max_fmr = 0x%x\n", qattr->max_fmr); + QL_DPRINT12(ha, "qattr->max_mr_mw_fmr_pbl = 0x%x\n", + qattr->max_mr_mw_fmr_pbl); + QL_DPRINT12(ha, "qattr->max_mr_mw_fmr_size = %p\n", + (void *)qattr->max_mr_mw_fmr_size); + QL_DPRINT12(ha, "qattr->max_pd = 0x%x\n", qattr->max_pd); + QL_DPRINT12(ha, "qattr->max_ah = 0x%x\n", qattr->max_ah); + QL_DPRINT12(ha, "qattr->max_pkey = 0x%x\n", qattr->max_pkey); + QL_DPRINT12(ha, "qattr->max_srq_wr = 0x%x\n", qattr->max_srq_wr); + QL_DPRINT12(ha, "qattr->max_stats_queues = 0x%x\n", + qattr->max_stats_queues); + //QL_DPRINT12(ha, "qattr->dev_caps = 0x%x\n", qattr->dev_caps); + QL_DPRINT12(ha, "qattr->page_size_caps = %p\n", + (void *)qattr->page_size_caps); + QL_DPRINT12(ha, "qattr->dev_ack_delay = 0x%x\n", qattr->dev_ack_delay); + QL_DPRINT12(ha, "qattr->reserved_lkey = 0x%x\n", qattr->reserved_lkey); + QL_DPRINT12(ha, "qattr->bad_pkey_counter = 0x%x\n", + qattr->bad_pkey_counter); + + if ((attrs->qp_type == IB_QPT_GSI) && udata) { + QL_DPRINT12(ha, "unexpected udata when creating GSI QP\n"); + return -EINVAL; + } + + if (udata && !(ibpd->uobject && ibpd->uobject->context)) { + QL_DPRINT12(ha, "called from user without context\n"); + return -EINVAL; + } + + /* QP0... attrs->qp_type == IB_QPT_GSI */ + if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + QL_DPRINT12(ha, "unsupported qp type=0x%x requested\n", + attrs->qp_type); + return -EINVAL; + } + if (attrs->qp_type == IB_QPT_GSI && attrs->srq) { + QL_DPRINT12(ha, "cannot create GSI qp with SRQ\n"); + return -EINVAL; + } + /* Skip the check for QP1 to support CM size of 128 */ + if (attrs->cap.max_send_wr > qattr->max_wqe) { + QL_DPRINT12(ha, "cannot create a SQ with %d elements " + " (max_send_wr=0x%x)\n", + attrs->cap.max_send_wr, qattr->max_wqe); + return -EINVAL; + } + if (!attrs->srq && (attrs->cap.max_recv_wr > qattr->max_wqe)) { + QL_DPRINT12(ha, "cannot create a RQ with %d elements" + " (max_recv_wr=0x%x)\n", + attrs->cap.max_recv_wr, qattr->max_wqe); + return -EINVAL; + } + if (attrs->cap.max_inline_data > qattr->max_inline) { + QL_DPRINT12(ha, + "unsupported inline data size=0x%x " + "requested (max_inline=0x%x)\n", + attrs->cap.max_inline_data, qattr->max_inline); + return -EINVAL; + } + if (attrs->cap.max_send_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported send_sge=0x%x " + "requested (max_send_sge=0x%x)\n", + attrs->cap.max_send_sge, qattr->max_sge); + return -EINVAL; + } + if (attrs->cap.max_recv_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported recv_sge=0x%x requested " + " (max_recv_sge=0x%x)\n", + attrs->cap.max_recv_sge, qattr->max_sge); + return -EINVAL; + } + /* unprivileged user space cannot create special QP */ + if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + QL_DPRINT12(ha, + "userspace can't create special QPs of type=0x%x\n", + attrs->qp_type); + return -EINVAL; + } + /* allow creating only one GSI type of QP */ + if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { + QL_DPRINT12(ha, + "create qp: GSI special QPs already created.\n"); + return -EINVAL; + } + + /* verify consumer QPs are not trying to use GSI QP's CQ */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { + struct qlnxr_cq *send_cq = get_qlnxr_cq(attrs->send_cq); + struct qlnxr_cq *recv_cq = get_qlnxr_cq(attrs->recv_cq); + + if ((send_cq->cq_type == QLNXR_CQ_TYPE_GSI) || + (recv_cq->cq_type == QLNXR_CQ_TYPE_GSI)) { + QL_DPRINT11(ha, "consumer QP cannot use GSI CQs.\n"); + return -EINVAL; + } + } + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static int +qlnxr_copy_srq_uresp(struct qlnxr_dev *dev, + struct qlnxr_srq *srq, + struct ib_udata *udata) +{ + struct qlnxr_create_srq_uresp uresp; + qlnx_host_t *ha; + int rc; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&uresp, 0, sizeof(uresp)); + + uresp.srq_id = srq->srq_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static void +qlnxr_copy_rq_uresp(struct qlnxr_dev *dev, + struct qlnxr_create_qp_uresp *uresp, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + /* Return if QP is associated with SRQ instead of RQ */ + QL_DPRINT12(ha, "enter qp->srq = %p\n", qp->srq); + + if (qp->srq) + return; + + /* iWARP requires two doorbells per RQ. */ + if (QLNX_IS_IWARP(dev)) { + + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + uresp->rq_db2_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + + QL_DPRINT12(ha, "uresp->rq_db_offset = 0x%x " + "uresp->rq_db2_offset = 0x%x\n", + uresp->rq_db_offset, uresp->rq_db2_offset); + } else { + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + } + uresp->rq_icid = qp->icid; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_copy_sq_uresp(struct qlnxr_dev *dev, + struct qlnxr_create_qp_uresp *uresp, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + + /* iWARP uses the same cid for rq and sq*/ + if (QLNX_IS_IWARP(dev)) { + uresp->sq_icid = qp->icid; + QL_DPRINT12(ha, "uresp->sq_icid = 0x%x\n", uresp->sq_icid); + } else + uresp->sq_icid = qp->icid + 1; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_copy_qp_uresp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_udata *udata) +{ + int rc; + struct qlnxr_create_qp_uresp uresp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp->icid =0x%x\n", qp->icid); + + memset(&uresp, 0, sizeof(uresp)); + qlnxr_copy_sq_uresp(dev, &uresp, qp); + qlnxr_copy_rq_uresp(dev, &uresp, qp); + + uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp.qp_id = qp->qp_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + + +static void +qlnxr_set_common_qp_params(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_pd *pd, + struct ib_qp_init_attr *attrs) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + spin_lock_init(&qp->q_lock); + + atomic_set(&qp->refcnt, 1); + qp->pd = pd; + qp->sig = QLNXR_QP_MAGIC_NUMBER; + qp->qp_type = attrs->qp_type; + qp->max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->state = ECORE_ROCE_QP_STATE_RESET; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->sq_cq = get_qlnxr_cq(attrs->send_cq); + qp->rq_cq = get_qlnxr_cq(attrs->recv_cq); + qp->dev = dev; + + if (!attrs->srq) { + /* QP is associated with RQ instead of SRQ */ + qp->rq.max_sges = attrs->cap.max_recv_sge; + QL_DPRINT12(ha, "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); + } else { + qp->srq = get_qlnxr_srq(attrs->srq); + } + + QL_DPRINT12(ha, + "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d," + " state = %d, signaled = %d, use_srq=%d\n", + pd->pd_id, qp->qp_type, qp->max_inline_data, + qp->state, qp->signaled, ((attrs->srq) ? 1 : 0)); + QL_DPRINT12(ha, "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + return; +} + +static int +qlnxr_check_srq_params(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *attrs) +{ + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + ha = dev->ha; + qattr = ecore_rdma_query_device(dev->rdma_ctx); + + QL_DPRINT12(ha, "enter\n"); + + if (attrs->attr.max_wr > qattr->max_srq_wqe) { + QL_DPRINT12(ha, "unsupported srq_wr=0x%x" + " requested (max_srq_wr=0x%x)\n", + attrs->attr.max_wr, qattr->max_srq_wr); + return -EINVAL; + } + + if (attrs->attr.max_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", + attrs->attr.max_sge, qattr->max_sge); + return -EINVAL; + } + + if (attrs->attr.srq_limit > attrs->attr.max_wr) { + QL_DPRINT12(ha, + "unsupported srq_limit=0x%x requested" + " (max_srq_limit=0x%x)\n", + attrs->attr.srq_limit, attrs->attr.srq_limit); + return -EINVAL; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static void +qlnxr_free_srq_user_params(struct qlnxr_srq *srq) +{ + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + qlnxr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + ib_umem_release(srq->prod_umem); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_free_srq_kernel_params(struct qlnxr_srq *srq) +{ + struct qlnxr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ecore_chain_free(dev->cdev, &hw_srq->pbl); + + qlnx_dma_free_coherent(&dev->cdev, + hw_srq->virt_prod_pair_addr, + hw_srq->phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qlnxr_srq *srq, + struct qlnxr_create_srq_ureq *ureq, + int access, int dmasync) +{ +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + struct ib_umem_chunk *chunk; +#endif + struct scatterlist *sg; + int rc; + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = qlnxr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + ureq->srq_len, access, dmasync, 1); + if (rc) + return rc; + + srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), + access, dmasync); + if (IS_ERR(srq->prod_umem)) { + + qlnxr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + + QL_DPRINT12(ha, "ib_umem_get failed for producer [%p]\n", + PTR_ERR(srq->prod_umem)); + + return PTR_ERR(srq->prod_umem); + } + +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + chunk = container_of((&srq->prod_umem->chunk_list)->next, + typeof(*chunk), list); + sg = &chunk->page_list[0]; +#else + sg = srq->prod_umem->sg_head.sgl; +#endif + srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static int +qlnxr_alloc_srq_kernel_params(struct qlnxr_srq *srq, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *init_attr) +{ + struct qlnxr_srq_hwq_info *hw_srq = &srq->hw_srq; + dma_addr_t phy_prod_pair_addr; + u32 num_elems, max_wr; + void *va; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + va = qlnx_dma_alloc_coherent(&dev->cdev, + &phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + if (!va) { + QL_DPRINT11(ha, "qlnx_dma_alloc_coherent failed for produceer\n"); + return -ENOMEM; + } + + hw_srq->phy_prod_pair_addr = phy_prod_pair_addr; + hw_srq->virt_prod_pair_addr = va; + + max_wr = init_attr->attr.max_wr; + + num_elems = max_wr * RDMA_MAX_SRQ_WQE_SIZE; + + rc = ecore_chain_alloc(dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + num_elems, + ECORE_RDMA_SRQ_WQE_ELEM_SIZE, + &hw_srq->pbl, NULL); + + if (rc) { + QL_DPRINT11(ha, "ecore_chain_alloc failed [%d]\n", rc); + goto err0; + } + + hw_srq->max_wr = max_wr; + hw_srq->num_elems = num_elems; + hw_srq->max_sges = RDMA_MAX_SGE_PER_SRQ; + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err0: + qlnx_dma_free_coherent(&dev->cdev, va, phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static inline void +qlnxr_init_common_qp_in_params(struct qlnxr_dev *dev, + struct qlnxr_pd *pd, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs, + bool fmr_and_reserved_lkey, + struct ecore_rdma_create_qp_in_params *params) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + /* QP handle to be written in an async event */ + params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp); + params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp); + + params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); + params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + params->sq_cq_id = get_qlnxr_cq(attrs->send_cq)->icid; + params->stats_queue = 0; + + params->rq_cq_id = get_qlnxr_cq(attrs->recv_cq)->icid; + + if (qp->srq) { + /* QP is associated with SRQ instead of RQ */ + params->srq_id = qp->srq->srq_id; + params->use_srq = true; + QL_DPRINT11(ha, "exit srq_id = 0x%x use_srq = 0x%x\n", + params->srq_id, params->use_srq); + return; + } + + params->srq_id = 0; + params->use_srq = false; + + QL_DPRINT12(ha, "exit\n"); + return; +} + + +static inline void +qlnxr_qp_user_print( struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + QL_DPRINT12((dev->ha), "qp=%p. sq_addr=0x%llx, sq_len=%zd, " + "rq_addr=0x%llx, rq_len=%zd\n", + qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr, + qp->urq.buf_len); + return; +} + +static int +qlnxr_idr_add(struct qlnxr_dev *dev, void *ptr, u32 id) +{ + u32 newid; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!QLNX_IS_IWARP(dev)) + return 0; + + do { + if (!idr_pre_get(&dev->qpidr, GFP_KERNEL)) { + QL_DPRINT11(ha, "idr_pre_get failed\n"); + return -ENOMEM; + } + + mtx_lock(&dev->idr_lock); + + rc = idr_get_new_above(&dev->qpidr, ptr, id, &newid); + + mtx_unlock(&dev->idr_lock); + + } while (rc == -EAGAIN); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return rc; +} + +static void +qlnxr_idr_remove(struct qlnxr_dev *dev, u32 id) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!QLNX_IS_IWARP(dev)) + return; + + mtx_lock(&dev->idr_lock); + idr_remove(&dev->qpidr, id); + mtx_unlock(&dev->idr_lock); + + QL_DPRINT12(ha, "exit \n"); + + return; +} + +static inline void +qlnxr_iwarp_populate_user_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_out_params *out_params) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; + qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; + + qlnxr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, + &qp->usq.pbl_info); + + if (qp->srq) { + QL_DPRINT11(ha, "qp->srq = %p\n", qp->srq); + return; + } + + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + + qlnxr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, + &qp->urq.pbl_info); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_create_user_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_pd *ibpd, + struct ib_udata *udata, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_destroy_qp_out_params d_out_params; + struct ecore_rdma_create_qp_in_params in_params; + struct ecore_rdma_create_qp_out_params out_params; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct ib_ucontext *ib_ctx = NULL; + struct qlnxr_ucontext *ctx = NULL; + struct qlnxr_create_qp_ureq ureq; + int alloc_and_init = QLNX_IS_ROCE(dev); + int rc = -EINVAL; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ib_ctx = ibpd->uobject->context; + ctx = get_qlnxr_ucontext(ib_ctx); + + memset(&ureq, 0, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + + if (rc) { + QL_DPRINT11(ha, "ib_copy_from_udata failed [%d]\n", rc); + return rc; + } + + /* SQ - read access only (0), dma sync not required (0) */ + rc = qlnxr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, + ureq.sq_len, 0, 0, + alloc_and_init); + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_user_queue failed [%d]\n", rc); + return rc; + } + + if (!qp->srq) { + /* RQ - read access only (0), dma sync not required (0) */ + rc = qlnxr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + ureq.rq_len, 0, 0, + alloc_and_init); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_user_queue failed [%d]\n", rc); + return rc; + } + } + + memset(&in_params, 0, sizeof(in_params)); + qlnxr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); + in_params.qp_handle_lo = ureq.qp_handle_lo; + in_params.qp_handle_hi = ureq.qp_handle_hi; + in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; + in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; + + if (!qp->srq) { + in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; + in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); + + if (!qp->ecore_qp) { + rc = -ENOMEM; + QL_DPRINT11(ha, "ecore_rdma_create_qp failed\n"); + goto err1; + } + + if (QLNX_IS_IWARP(dev)) + qlnxr_iwarp_populate_user_qp(dev, qp, &out_params); + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + rc = qlnxr_copy_qp_uresp(dev, qp, udata); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_copy_qp_uresp failed\n"); + goto err; + } + + qlnxr_qp_user_print(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; +err: + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, &d_out_params); + + if (rc) + QL_DPRINT12(ha, "fatal fault\n"); + +err1: + qlnxr_cleanup_user(dev, qp); + + QL_DPRINT12(ha, "exit[%d]\n", rc); + return rc; +} + +static void +qlnxr_set_roce_db_info(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p qp->srq %p\n", qp, qp->srq); + + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; + + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_set_iwarp_db_info(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) + +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p qp->srq %p\n", qp, qp->srq); + + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid; + + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + + qp->rq.iwarp_db2 = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + qp->rq.iwarp_db2_data.data.icid = qp->icid; + qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + } + + QL_DPRINT12(ha, + "qp->sq.db = %p qp->sq.db_data.data.icid =0x%x\n" + "\t\t\tqp->rq.db = %p qp->rq.db_data.data.icid =0x%x\n" + "\t\t\tqp->rq.iwarp_db2 = %p qp->rq.iwarp_db2.data.icid =0x%x" + " qp->rq.iwarp_db2.data.prod_val =0x%x\n", + qp->sq.db, qp->sq.db_data.data.icid, + qp->rq.db, qp->rq.db_data.data.icid, + qp->rq.iwarp_db2, qp->rq.iwarp_db2_data.data.icid, + qp->rq.iwarp_db2_data.data.value); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_roce_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_in_params *in_params, + u32 n_sq_elems, + u32 n_rq_elems) +{ + struct ecore_rdma_create_qp_out_params out_params; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + &qp->sq.pbl, + NULL); + + if (rc) { + QL_DPRINT11(ha, "ecore_chain_alloc qp->sq.pbl failed[%d]\n", rc); + return rc; + } + + in_params->sq_num_pages = ecore_chain_get_page_cnt(&qp->sq.pbl); + in_params->sq_pbl_ptr = ecore_chain_get_pbl_phys(&qp->sq.pbl); + + if (!qp->srq) { + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + &qp->rq.pbl, + NULL); + + if (rc) { + QL_DPRINT11(ha, + "ecore_chain_alloc qp->rq.pbl failed[%d]\n", rc); + return rc; + } + + in_params->rq_num_pages = ecore_chain_get_page_cnt(&qp->rq.pbl); + in_params->rq_pbl_ptr = ecore_chain_get_pbl_phys(&qp->rq.pbl); + } + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, in_params, &out_params); + + if (!qp->ecore_qp) { + QL_DPRINT11(ha, "qp->ecore_qp == NULL\n"); + return -EINVAL; + } + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + qlnxr_set_roce_db_info(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static int +qlnxr_iwarp_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_in_params *in_params, + u32 n_sq_elems, + u32 n_rq_elems) +{ + struct ecore_rdma_destroy_qp_out_params d_out_params; + struct ecore_rdma_create_qp_out_params out_params; + struct ecore_chain_ext_pbl ext_pbl; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + in_params->sq_num_pages = ECORE_CHAIN_PAGE_CNT(n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + ECORE_CHAIN_MODE_PBL); + in_params->rq_num_pages = ECORE_CHAIN_PAGE_CNT(n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + ECORE_CHAIN_MODE_PBL); + + QL_DPRINT12(ha, "n_sq_elems = 0x%x" + " n_rq_elems = 0x%x in_params\n" + "\t\t\tqp_handle_lo\t\t= 0x%08x\n" + "\t\t\tqp_handle_hi\t\t= 0x%08x\n" + "\t\t\tqp_handle_async_lo\t\t= 0x%08x\n" + "\t\t\tqp_handle_async_hi\t\t= 0x%08x\n" + "\t\t\tuse_srq\t\t\t= 0x%x\n" + "\t\t\tsignal_all\t\t= 0x%x\n" + "\t\t\tfmr_and_reserved_lkey\t= 0x%x\n" + "\t\t\tpd\t\t\t= 0x%x\n" + "\t\t\tdpi\t\t\t= 0x%x\n" + "\t\t\tsq_cq_id\t\t\t= 0x%x\n" + "\t\t\tsq_num_pages\t\t= 0x%x\n" + "\t\t\tsq_pbl_ptr\t\t= %p\n" + "\t\t\tmax_sq_sges\t\t= 0x%x\n" + "\t\t\trq_cq_id\t\t\t= 0x%x\n" + "\t\t\trq_num_pages\t\t= 0x%x\n" + "\t\t\trq_pbl_ptr\t\t= %p\n" + "\t\t\tsrq_id\t\t\t= 0x%x\n" + "\t\t\tstats_queue\t\t= 0x%x\n", + n_sq_elems, n_rq_elems, + in_params->qp_handle_lo, + in_params->qp_handle_hi, + in_params->qp_handle_async_lo, + in_params->qp_handle_async_hi, + in_params->use_srq, + in_params->signal_all, + in_params->fmr_and_reserved_lkey, + in_params->pd, + in_params->dpi, + in_params->sq_cq_id, + in_params->sq_num_pages, + (void *)in_params->sq_pbl_ptr, + in_params->max_sq_sges, + in_params->rq_cq_id, + in_params->rq_num_pages, + (void *)in_params->rq_pbl_ptr, + in_params->srq_id, + in_params->stats_queue ); + + memset(&out_params, 0, sizeof (struct ecore_rdma_create_qp_out_params)); + memset(&ext_pbl, 0, sizeof (struct ecore_chain_ext_pbl)); + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, in_params, &out_params); + + if (!qp->ecore_qp) { + QL_DPRINT11(ha, "ecore_rdma_create_qp failed\n"); + return -EINVAL; + } + + /* Now we allocate the chain */ + ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; + + QL_DPRINT12(ha, "ext_pbl.p_pbl_virt = %p " + "ext_pbl.p_pbl_phys = %p\n", + ext_pbl.p_pbl_virt, ext_pbl.p_pbl_phys); + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + &qp->sq.pbl, + &ext_pbl); + + if (rc) { + QL_DPRINT11(ha, + "ecore_chain_alloc qp->sq.pbl failed rc = %d\n", rc); + goto err; + } + + ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; + + QL_DPRINT12(ha, "ext_pbl.p_pbl_virt = %p " + "ext_pbl.p_pbl_phys = %p\n", + ext_pbl.p_pbl_virt, ext_pbl.p_pbl_phys); + + if (!qp->srq) { + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + &qp->rq.pbl, + &ext_pbl); + + if (rc) { + QL_DPRINT11(ha,, "ecore_chain_alloc qp->rq.pbl" + " failed rc = %d\n", rc); + goto err; + } + } + + QL_DPRINT12(ha, "qp_id = 0x%x icid =0x%x\n", + out_params.qp_id, out_params.icid); + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + qlnxr_set_iwarp_db_info(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err: + ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, &d_out_params); + + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + +static int +qlnxr_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_create_qp_in_params in_params; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + int rc = -EINVAL; + u32 n_rq_elems; + u32 n_sq_elems; + u32 n_sq_entries; + struct ecore_rdma_device *qattr = ecore_rdma_query_device(dev->rdma_ctx); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&in_params, 0, sizeof(in_params)); + + /* A single work request may take up to MAX_SQ_WQE_SIZE elements in + * the ring. The ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + * We should add an extra WR since the prod and cons indices of + * wqe_wr_id are managed in such a way that the WQ is considered full + * when (prod+1)%max_wr==cons. We currently don't do that because we + * double the number of entries due an iSER issue that pushes far more + * WRs than indicated. If we decline its ib_post_send() then we get + * error prints in the dmesg we'd like to avoid. + */ + qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier, + qattr->max_wqe); + + qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) { + QL_DPRINT11(ha, "failed SQ shadow memory allocation\n"); + return -ENOMEM; + } + + /* QP handle to be written in CQE */ + in_params.qp_handle_lo = lower_32_bits((uintptr_t)qp); + in_params.qp_handle_hi = upper_32_bits((uintptr_t)qp); + + /* A single work request may take up to MAX_RQ_WQE_SIZE elements in + * the ring. There ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + */ + qp->rq.max_wr = (u16)max_t(u32, attrs->cap.max_recv_wr, 1); + + /* Allocate driver internal RQ array */ + if (!qp->srq) { + qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) { + QL_DPRINT11(ha, "failed RQ shadow memory allocation\n"); + kfree(qp->wqe_wr_id); + return -ENOMEM; + } + } + + //qlnxr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params); + + in_params.qp_handle_async_lo = lower_32_bits((uintptr_t)qp); + in_params.qp_handle_async_hi = upper_32_bits((uintptr_t)qp); + + in_params.signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); + in_params.fmr_and_reserved_lkey = true; + in_params.pd = pd->pd_id; + in_params.dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + in_params.sq_cq_id = get_qlnxr_cq(attrs->send_cq)->icid; + in_params.stats_queue = 0; + + in_params.rq_cq_id = get_qlnxr_cq(attrs->recv_cq)->icid; + + if (qp->srq) { + /* QP is associated with SRQ instead of RQ */ + in_params.srq_id = qp->srq->srq_id; + in_params.use_srq = true; + QL_DPRINT11(ha, "exit srq_id = 0x%x use_srq = 0x%x\n", + in_params.srq_id, in_params.use_srq); + } else { + in_params.srq_id = 0; + in_params.use_srq = false; + } + + n_sq_entries = attrs->cap.max_send_wr; + n_sq_entries = min_t(u32, n_sq_entries, qattr->max_wqe); + n_sq_entries = max_t(u32, n_sq_entries, 1); + n_sq_elems = n_sq_entries * QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + + n_rq_elems = qp->rq.max_wr * QLNXR_MAX_RQE_ELEMENTS_PER_RQE; + + if (QLNX_IS_ROCE(dev)) { + rc = qlnxr_roce_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); + } else { + rc = qlnxr_iwarp_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); + } + + if (rc) + qlnxr_cleanup_kernel(dev, qp); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +struct ib_qp * +qlnxr_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibpd->device); + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct qlnxr_qp *qp; + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = qlnxr_check_qp_attrs(ibpd, dev, attrs, udata); + if (rc) { + QL_DPRINT11(ha, "qlnxr_check_qp_attrs failed [%d]\n", rc); + return ERR_PTR(rc); + } + + QL_DPRINT12(ha, "called from %s, event_handle=%p," + " eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", + (udata ? "user library" : "kernel"), + attrs->event_handler, pd, + get_qlnxr_cq(attrs->send_cq), + get_qlnxr_cq(attrs->send_cq)->icid, + get_qlnxr_cq(attrs->recv_cq), + get_qlnxr_cq(attrs->recv_cq)->icid); + + qp = qlnx_zalloc(sizeof(struct qlnxr_qp)); + + if (!qp) { + QL_DPRINT11(ha, "kzalloc(qp) failed\n"); + return ERR_PTR(-ENOMEM); + } + + qlnxr_set_common_qp_params(dev, qp, pd, attrs); + + if (attrs->qp_type == IB_QPT_GSI) { + QL_DPRINT11(ha, "calling qlnxr_create_gsi_qp\n"); + return qlnxr_create_gsi_qp(dev, attrs, qp); + } + + if (udata) { + rc = qlnxr_create_user_qp(dev, qp, ibpd, udata, attrs); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_create_user_qp failed\n"); + goto err; + } + } else { + rc = qlnxr_create_kernel_qp(dev, qp, ibpd, attrs); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_create_kernel_qp failed\n"); + goto err; + } + } + + qp->ibqp.qp_num = qp->qp_id; + + rc = qlnxr_idr_add(dev, qp, qp->qp_id); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_idr_add failed\n"); + goto err; + } + + QL_DPRINT12(ha, "exit [%p]\n", &qp->ibqp); + + return &qp->ibqp; +err: + kfree(qp); + + QL_DPRINT12(ha, "failed exit\n"); + return ERR_PTR(-EFAULT); +} + + +static enum ib_qp_state +qlnxr_get_ibqp_state(enum ecore_roce_qp_state qp_state) +{ + enum ib_qp_state state = IB_QPS_ERR; + + switch (qp_state) { + case ECORE_ROCE_QP_STATE_RESET: + state = IB_QPS_RESET; + break; + + case ECORE_ROCE_QP_STATE_INIT: + state = IB_QPS_INIT; + break; + + case ECORE_ROCE_QP_STATE_RTR: + state = IB_QPS_RTR; + break; + + case ECORE_ROCE_QP_STATE_RTS: + state = IB_QPS_RTS; + break; + + case ECORE_ROCE_QP_STATE_SQD: + state = IB_QPS_SQD; + break; + + case ECORE_ROCE_QP_STATE_ERR: + state = IB_QPS_ERR; + break; + + case ECORE_ROCE_QP_STATE_SQE: + state = IB_QPS_SQE; + break; + } + return state; +} + +static enum ecore_roce_qp_state +qlnxr_get_state_from_ibqp( enum ib_qp_state qp_state) +{ + enum ecore_roce_qp_state ecore_qp_state; + + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + + switch (qp_state) { + case IB_QPS_RESET: + ecore_qp_state = ECORE_ROCE_QP_STATE_RESET; + break; + + case IB_QPS_INIT: + ecore_qp_state = ECORE_ROCE_QP_STATE_INIT; + break; + + case IB_QPS_RTR: + ecore_qp_state = ECORE_ROCE_QP_STATE_RTR; + break; + + case IB_QPS_RTS: + ecore_qp_state = ECORE_ROCE_QP_STATE_RTS; + break; + + case IB_QPS_SQD: + ecore_qp_state = ECORE_ROCE_QP_STATE_SQD; + break; + + case IB_QPS_ERR: + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + break; + + default: + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + break; + } + + return (ecore_qp_state); +} + +static void +qlnxr_reset_qp_hwq_info(struct qlnxr_qp_hwq_info *qph) +{ + ecore_chain_reset(&qph->pbl); + qph->prod = qph->cons = 0; + qph->wqe_cons = 0; + qph->db_data.data.value = cpu_to_le16(0); + + return; +} + +static int +qlnxr_update_qp_state(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + enum ecore_roce_qp_state new_state) +{ + int status = 0; + uint32_t reg_addr; + struct ecore_dev *cdev; + qlnx_host_t *ha; + + ha = dev->ha; + cdev = &ha->cdev; + + QL_DPRINT12(ha, "enter qp = %p new_state = 0x%x qp->state = 0x%x\n", + qp, new_state, qp->state); + + if (new_state == qp->state) { + return 0; + } + + switch (qp->state) { + case ECORE_ROCE_QP_STATE_RESET: + switch (new_state) { + case ECORE_ROCE_QP_STATE_INIT: + qp->prev_wqe_size = 0; + qlnxr_reset_qp_hwq_info(&qp->sq); + if (!(qp->srq)) + qlnxr_reset_qp_hwq_info(&qp->rq); + break; + default: + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_INIT: + /* INIT->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTR: + /* Update doorbell (in case post_recv was done before move to RTR) */ + if (qp->srq) + break; + wmb(); + //writel(qp->rq.db_data.raw, qp->rq.db); + //if (QLNX_IS_IWARP(dev)) + // writel(qp->rq.iwarp_db2_data.raw, + // qp->rq.iwarp_db2); + + reg_addr = (uint32_t)((uint8_t *)qp->rq.db - + (uint8_t *)cdev->doorbells); + + bus_write_4(ha->pci_dbells, reg_addr, qp->rq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + if (QLNX_IS_IWARP(dev)) { + reg_addr = (uint32_t)((uint8_t *)qp->rq.iwarp_db2 - + (uint8_t *)cdev->doorbells); + bus_write_4(ha->pci_dbells, reg_addr,\ + qp->rq.iwarp_db2_data.raw); + bus_barrier(ha->pci_dbells, 0, 0,\ + BUS_SPACE_BARRIER_READ); + } + + + mmiowb(); + break; + case ECORE_ROCE_QP_STATE_ERR: + /* TBD:flush qps... */ + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_RTR: + /* RTR->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTS: + break; + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_RTS: + /* RTS->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_SQD: + break; + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_SQD: + /* SQD->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTS: + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_ERR: + /* ERR->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RESET: + if ((qp->rq.prod != qp->rq.cons) || + (qp->sq.prod != qp->sq.cons)) { + QL_DPRINT11(ha, + "Error->Reset with rq/sq " + "not empty rq.prod=0x%x rq.cons=0x%x" + " sq.prod=0x%x sq.cons=0x%x\n", + qp->rq.prod, qp->rq.cons, + qp->sq.prod, qp->sq.cons); + status = -EINVAL; + } + break; + default: + status = -EINVAL; + break; + }; + break; + default: + status = -EINVAL; + break; + }; + + QL_DPRINT12(ha, "exit\n"); + return status; +} + +int +qlnxr_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata) +{ + int rc = 0; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = get_qlnxr_dev(&qp->dev->ibdev); + struct ecore_rdma_modify_qp_in_params qp_params = { 0 }; + enum ib_qp_state old_qp_state, new_qp_state; + struct ecore_rdma_device *qattr = ecore_rdma_query_device(dev->rdma_ctx); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, + "enter qp = %p attr_mask = 0x%x, state = %d udata = %p\n", + qp, attr_mask, attr->qp_state, udata); + + old_qp_state = qlnxr_get_ibqp_state(qp->state); + if (attr_mask & IB_QP_STATE) + new_qp_state = attr->qp_state; + else + new_qp_state = old_qp_state; + + if (QLNX_IS_ROCE(dev)) { +#if __FreeBSD_version >= 1100000 + if (!ib_modify_qp_is_ok(old_qp_state, + new_qp_state, + ibqp->qp_type, + attr_mask, + IB_LINK_LAYER_ETHERNET)) { + QL_DPRINT12(ha, + "invalid attribute mask=0x%x" + " specified for qpn=0x%x of type=0x%x \n" + " old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, + old_qp_state, new_qp_state); + rc = -EINVAL; + goto err; + } +#else + if (!ib_modify_qp_is_ok(old_qp_state, + new_qp_state, + ibqp->qp_type, + attr_mask )) { + QL_DPRINT12(ha, + "invalid attribute mask=0x%x" + " specified for qpn=0x%x of type=0x%x \n" + " old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, + old_qp_state, new_qp_state); + rc = -EINVAL; + goto err; + } + +#endif /* #if __FreeBSD_version >= 1100000 */ + } + /* translate the masks... */ + if (attr_mask & IB_QP_STATE) { + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + qp_params.new_state = qlnxr_get_state_from_ibqp(attr->qp_state); + } + + // TBD consider changing ecore to be a flag as well... + if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp_params.sqd_async = true; + + if (attr_mask & IB_QP_PKEY_INDEX) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_PKEY, + 1); + if (attr->pkey_index >= QLNXR_ROCE_PKEY_TABLE_LEN) { + rc = -EINVAL; + goto err; + } + + qp_params.pkey = QLNXR_ROCE_PKEY_DEFAULT; + } + + if (attr_mask & IB_QP_QKEY) { + qp->qkey = attr->qkey; + } + + /* tbd consider splitting in ecore.. */ + if (attr_mask & IB_QP_ACCESS_FLAGS) { + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); + qp_params.incoming_rdma_read_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_READ; + qp_params.incoming_rdma_write_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE; + qp_params.incoming_atomic_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC; + } + + if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { + if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + + QL_DPRINT12(ha, + "Only MTU sizes of 256, 512, 1024," + " 2048 and 4096 are supported " + " attr->path_mtu = [%d]\n", + attr->path_mtu); + + rc = -EINVAL; + goto err; + } + qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), + ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu))); + } + + if (qp->mtu == 0) { + qp->mtu = ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu)); + QL_DPRINT12(ha, "fixing zetoed MTU to qp->mtu = %d\n", + qp->mtu); + } + + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, + 1); + + qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class; + qp_params.flow_label = attr->ah_attr.grh.flow_label; + qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit; + + qp->sgid_idx = attr->ah_attr.grh.sgid_index; + + get_gid_info(ibqp, attr, attr_mask, dev, qp, &qp_params); + + rc = qlnxr_get_dmac(dev, &attr->ah_attr, qp_params.remote_mac_addr); + if (rc) + return rc; + + qp_params.use_local_mac = true; + memcpy(qp_params.local_mac_addr, dev->ha->primary_mac, ETH_ALEN); + + QL_DPRINT12(ha, "dgid=0x%x:0x%x:0x%x:0x%x\n", + qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], + qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); + QL_DPRINT12(ha, "sgid=0x%x:0x%x:0x%x:0x%x\n", + qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], + qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); + QL_DPRINT12(ha, + "remote_mac=[0x%x:0x%x:0x%x:0x%x:0x%x:0x%x]\n", + qp_params.remote_mac_addr[0], + qp_params.remote_mac_addr[1], + qp_params.remote_mac_addr[2], + qp_params.remote_mac_addr[3], + qp_params.remote_mac_addr[4], + qp_params.remote_mac_addr[5]); + + qp_params.mtu = qp->mtu; + } + + if (qp_params.mtu == 0) { + /* stay with current MTU */ + if (qp->mtu) { + qp_params.mtu = qp->mtu; + } else { + qp_params.mtu = ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu)); + } + } + + if (attr_mask & IB_QP_TIMEOUT) { + SET_FIELD(qp_params.modify_flags, \ + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); + + qp_params.ack_timeout = attr->timeout; + if (attr->timeout) { + u32 temp; + + /* 12.7.34 LOCAL ACK TIMEOUT + * Value representing the transport (ACK) timeout for + * use by the remote, expressed as (4.096 μS*2Local ACK + * Timeout) + */ + /* We use 1UL since the temporal value may be overflow + * 32 bits + */ + temp = 4096 * (1UL << attr->timeout) / 1000 / 1000; + qp_params.ack_timeout = temp; /* FW requires [msec] */ + } + else + qp_params.ack_timeout = 0; /* infinite */ + } + if (attr_mask & IB_QP_RETRY_CNT) { + SET_FIELD(qp_params.modify_flags,\ + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); + qp_params.retry_cnt = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, + 1); + qp_params.rnr_retry_cnt = attr->rnr_retry; + } + + if (attr_mask & IB_QP_RQ_PSN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RQ_PSN, + 1); + qp_params.rq_psn = attr->rq_psn; + qp->rq_psn = attr->rq_psn; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > qattr->max_qp_req_rd_atomic_resc) { + rc = -EINVAL; + QL_DPRINT12(ha, + "unsupported max_rd_atomic=%d, supported=%d\n", + attr->max_rd_atomic, + qattr->max_qp_req_rd_atomic_resc); + goto err; + } + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, + 1); + qp_params.max_rd_atomic_req = attr->max_rd_atomic; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, + 1); + qp_params.min_rnr_nak_timer = attr->min_rnr_timer; + } + + if (attr_mask & IB_QP_SQ_PSN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_SQ_PSN, + 1); + qp_params.sq_psn = attr->sq_psn; + qp->sq_psn = attr->sq_psn; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic > + qattr->max_qp_resp_rd_atomic_resc) { + QL_DPRINT12(ha, + "unsupported max_dest_rd_atomic=%d, " + "supported=%d\n", + attr->max_dest_rd_atomic, + qattr->max_qp_resp_rd_atomic_resc); + + rc = -EINVAL; + goto err; + } + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, + 1); + qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; + } + + if (attr_mask & IB_QP_DEST_QPN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_DEST_QP, + 1); + + qp_params.dest_qp = attr->dest_qp_num; + qp->dest_qp_num = attr->dest_qp_num; + } + + /* + * Update the QP state before the actual ramrod to prevent a race with + * fast path. Modifying the QP state to error will cause the device to + * flush the CQEs and while polling the flushed CQEs will considered as + * a potential issue if the QP isn't in error state. + */ + if ((attr_mask & IB_QP_STATE) && (qp->qp_type != IB_QPT_GSI) && + (!udata) && (qp_params.new_state == ECORE_ROCE_QP_STATE_ERR)) + qp->state = ECORE_ROCE_QP_STATE_ERR; + + if (qp->qp_type != IB_QPT_GSI) + rc = ecore_rdma_modify_qp(dev->rdma_ctx, qp->ecore_qp, &qp_params); + + if (attr_mask & IB_QP_STATE) { + if ((qp->qp_type != IB_QPT_GSI) && (!udata)) + rc = qlnxr_update_qp_state(dev, qp, qp_params.new_state); + qp->state = qp_params.new_state; + } + +err: + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static int +qlnxr_to_ib_qp_acc_flags(struct ecore_rdma_query_qp_out_params *params) +{ + int ib_qp_acc_flags = 0; + + if (params->incoming_rdma_write_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; + if (params->incoming_rdma_read_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; + if (params->incoming_atomic_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; + if (true) /* FIXME -> local write ?? */ + ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; + + return ib_qp_acc_flags; +} + +static enum ib_mtu +qlnxr_mtu_int_to_enum(u16 mtu) +{ + enum ib_mtu ib_mtu_size; + + switch (mtu) { + case 256: + ib_mtu_size = IB_MTU_256; + break; + + case 512: + ib_mtu_size = IB_MTU_512; + break; + + case 1024: + ib_mtu_size = IB_MTU_1024; + break; + + case 2048: + ib_mtu_size = IB_MTU_2048; + break; + + case 4096: + ib_mtu_size = IB_MTU_4096; + break; + + default: + ib_mtu_size = IB_MTU_1024; + break; + } + return (ib_mtu_size); +} + +int +qlnxr_query_qp(struct ib_qp *ibqp, + struct ib_qp_attr *qp_attr, + int attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + int rc = 0; + struct ecore_rdma_query_qp_out_params params; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(¶ms, 0, sizeof(params)); + + rc = ecore_rdma_query_qp(dev->rdma_ctx, qp->ecore_qp, ¶ms); + if (rc) + goto err; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + qp_attr->qp_state = qlnxr_get_ibqp_state(params.state); + qp_attr->cur_qp_state = qlnxr_get_ibqp_state(params.state); + + /* In some cases in iWARP qelr will ask for the state only */ + if (QLNX_IS_IWARP(dev) && (attr_mask == IB_QP_STATE)) { + QL_DPRINT11(ha, "only state requested\n"); + return 0; + } + + qp_attr->path_mtu = qlnxr_mtu_int_to_enum(params.mtu); + qp_attr->path_mig_state = IB_MIG_MIGRATED; + qp_attr->rq_psn = params.rq_psn; + qp_attr->sq_psn = params.sq_psn; + qp_attr->dest_qp_num = params.dest_qp; + + qp_attr->qp_access_flags = qlnxr_to_ib_qp_acc_flags(¶ms); + + QL_DPRINT12(ha, "qp_state = 0x%x cur_qp_state = 0x%x " + "path_mtu = %d qp_access_flags = 0x%x\n", + qp_attr->qp_state, qp_attr->cur_qp_state, qp_attr->path_mtu, + qp_attr->qp_access_flags); + + qp_attr->cap.max_send_wr = qp->sq.max_wr; + qp_attr->cap.max_recv_wr = qp->rq.max_wr; + qp_attr->cap.max_send_sge = qp->sq.max_sges; + qp_attr->cap.max_recv_sge = qp->rq.max_sges; + qp_attr->cap.max_inline_data = qp->max_inline_data; + qp_init_attr->cap = qp_attr->cap; + + memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], ¶ms.dgid.bytes[0], + sizeof(qp_attr->ah_attr.grh.dgid.raw)); + + qp_attr->ah_attr.grh.flow_label = params.flow_label; + qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx; + qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl; + qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos; + + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + qp_attr->ah_attr.port_num = 1; /* FIXME -> check this */ + qp_attr->ah_attr.sl = 0;/* FIXME -> check this */ + qp_attr->timeout = params.timeout; + qp_attr->rnr_retry = params.rnr_retry; + qp_attr->retry_cnt = params.retry_cnt; + qp_attr->min_rnr_timer = params.min_rnr_nak_timer; + qp_attr->pkey_index = params.pkey_index; + qp_attr->port_num = 1; /* FIXME -> check this */ + qp_attr->ah_attr.src_path_bits = 0; + qp_attr->ah_attr.static_rate = 0; + qp_attr->alt_pkey_index = 0; + qp_attr->alt_port_num = 0; + qp_attr->alt_timeout = 0; + memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); + + qp_attr->sq_draining = (params.state == ECORE_ROCE_QP_STATE_SQD) ? 1 : 0; + qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; + qp_attr->max_rd_atomic = params.max_rd_atomic; + qp_attr->en_sqd_async_notify = (params.sqd_async)? 1 : 0; + + QL_DPRINT12(ha, "max_inline_data=%d\n", + qp_attr->cap.max_inline_data); + +err: + QL_DPRINT12(ha, "exit\n"); + return rc; +} + + +static void +qlnxr_cleanup_user(struct qlnxr_dev *dev, struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (qp->usq.umem) + ib_umem_release(qp->usq.umem); + + qp->usq.umem = NULL; + + if (qp->urq.umem) + ib_umem_release(qp->urq.umem); + + qp->urq.umem = NULL; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_cleanup_kernel(struct qlnxr_dev *dev, struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (qlnxr_qp_has_sq(qp)) { + QL_DPRINT12(ha, "freeing SQ\n"); + ha->qlnxr_debug = 1; +// ecore_chain_free(dev->cdev, &qp->sq.pbl); + ha->qlnxr_debug = 0; + kfree(qp->wqe_wr_id); + } + + if (qlnxr_qp_has_rq(qp)) { + QL_DPRINT12(ha, "freeing RQ\n"); + ha->qlnxr_debug = 1; + // ecore_chain_free(dev->cdev, &qp->rq.pbl); + ha->qlnxr_debug = 0; + kfree(qp->rqe_wr_id); + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +int +qlnxr_free_qp_resources(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + int rc = 0; + qlnx_host_t *ha; + struct ecore_rdma_destroy_qp_out_params d_out_params; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + +#if 0 + if (qp->qp_type != IB_QPT_GSI) { + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, + &d_out_params); + if (rc) + return rc; + } + + if (qp->ibqp.uobject && qp->ibqp.uobject->context) + qlnxr_cleanup_user(dev, qp); + else + qlnxr_cleanup_kernel(dev, qp); +#endif + + if (qp->ibqp.uobject && qp->ibqp.uobject->context) + qlnxr_cleanup_user(dev, qp); + else + qlnxr_cleanup_kernel(dev, qp); + + if (qp->qp_type != IB_QPT_GSI) { + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, + &d_out_params); + if (rc) + return rc; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +int +qlnxr_destroy_qp(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + int rc = 0; + struct ib_qp_attr attr; + int attr_mask = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p, qp_type=%d\n", qp, qp->qp_type); + + qp->destroyed = 1; + + if (QLNX_IS_ROCE(dev) && (qp->state != (ECORE_ROCE_QP_STATE_RESET | + ECORE_ROCE_QP_STATE_ERR | + ECORE_ROCE_QP_STATE_INIT))) { + + attr.qp_state = IB_QPS_ERR; + attr_mask |= IB_QP_STATE; + + /* change the QP state to ERROR */ + qlnxr_modify_qp(ibqp, &attr, attr_mask, NULL); + } + + if (qp->qp_type == IB_QPT_GSI) + qlnxr_destroy_gsi_qp(dev); + + qp->sig = ~qp->sig; + + qlnxr_free_qp_resources(dev, qp); + + if (atomic_dec_and_test(&qp->refcnt)) { + /* TODO: only for iWARP? */ + qlnxr_idr_remove(dev, qp->qp_id); + kfree(qp); + } + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static inline int +qlnxr_wq_is_full(struct qlnxr_qp_hwq_info *wq) +{ + return (((wq->prod + 1) % wq->max_wr) == wq->cons); +} + +static int +sge_data_len(struct ib_sge *sg_list, int num_sge) +{ + int i, len = 0; + for (i = 0; i < num_sge; i++) + len += sg_list[i].length; + return len; +} + +static void +swap_wqe_data64(u64 *p) +{ + int i; + + for (i = 0; i < QLNXR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) + *p = cpu_to_be64(cpu_to_le64(*p)); +} + + +static u32 +qlnxr_prepare_sq_inline_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + u8 *wqe_size, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, + u8 *bits, + u8 bit) +{ + int i, seg_siz; + char *seg_prt, *wqe; + u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter[%d]\n", data_size); + + if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { + QL_DPRINT12(ha, + "Too much inline data in WR:[%d, %d]\n", + data_size, ROCE_REQ_MAX_INLINE_DATA_SIZE); + *bad_wr = wr; + return 0; + } + + if (!data_size) + return data_size; + + /* set the bit */ + *bits |= bit; + + seg_prt = wqe = NULL; + seg_siz = 0; + + /* copy data inline */ + for (i = 0; i < wr->num_sge; i++) { + u32 len = wr->sg_list[i].length; + void *src = (void *)(uintptr_t)wr->sg_list[i].addr; + + while (len > 0) { + u32 cur; + + /* new segment required */ + if (!seg_siz) { + wqe = (char *)ecore_chain_produce(&qp->sq.pbl); + seg_prt = wqe; + seg_siz = sizeof(struct rdma_sq_common_wqe); + (*wqe_size)++; + } + + /* calculate currently allowed length */ + cur = MIN(len, seg_siz); + + memcpy(seg_prt, src, cur); + + /* update segment variables */ + seg_prt += cur; + seg_siz -= cur; + /* update sge variables */ + src += cur; + len -= cur; + + /* swap fully-completed segments */ + if (!seg_siz) + swap_wqe_data64((u64 *)wqe); + } + } + + /* swap last not completed segment */ + if (seg_siz) + swap_wqe_data64((u64 *)wqe); + + QL_DPRINT12(ha, "exit\n"); + return data_size; +} + +static u32 +qlnxr_prepare_sq_sges(struct qlnxr_dev *dev, struct qlnxr_qp *qp, + u8 *wqe_size, struct ib_send_wr *wr) +{ + int i; + u32 data_size = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter wr->num_sge = %d \n", wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_sq_sge *sge = ecore_chain_produce(&qp->sq.pbl); + + TYPEPTR_ADDR_SET(sge, addr, wr->sg_list[i].addr); + sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); + sge->length = cpu_to_le32(wr->sg_list[i].length); + data_size += wr->sg_list[i].length; + } + + if (wqe_size) + *wqe_size += wr->num_sge; + + QL_DPRINT12(ha, "exit data_size = %d\n", data_size); + return data_size; +} + +static u32 +qlnxr_prepare_sq_rdma_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct rdma_sq_rdma_wqe_1st *rwqe, + struct rdma_sq_rdma_wqe_2nd *rwqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + qlnx_host_t *ha; + u32 ret = 0; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); + TYPEPTR_ADDR_SET(rwqe2, remote_va, rdma_wr(wr)->remote_addr); + + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); + return qlnxr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, + wr, bad_wr, &rwqe->flags, flags); + } + + ret = qlnxr_prepare_sq_sges(dev, qp, &rwqe->wqe_size, wr); + + QL_DPRINT12(ha, "exit ret = 0x%x\n", ret); + + return (ret); +} + +static u32 +qlnxr_prepare_sq_send_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct rdma_sq_send_wqe *swqe, + struct rdma_sq_send_wqe *swqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + qlnx_host_t *ha; + u32 ret = 0; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(swqe2, 0, sizeof(*swqe2)); + + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); + return qlnxr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, + wr, bad_wr, &swqe->flags, flags); + } + + ret = qlnxr_prepare_sq_sges(dev, qp, &swqe->wqe_size, wr); + + QL_DPRINT12(ha, "exit ret = 0x%x\n", ret); + + return (ret); +} + +static void +qlnx_handle_completed_mrs(struct qlnxr_dev *dev, struct mr_info *info) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + int work = info->completed - info->completed_handled - 1; + + QL_DPRINT12(ha, "enter [%d]\n", work); + + while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { + struct qlnxr_pbl *pbl; + + /* Free all the page list that are possible to be freed + * (all the ones that were invalidated), under the assumption + * that if an FMR was completed successfully that means that + * if there was an invalidate operation before it also ended + */ + pbl = list_first_entry(&info->inuse_pbl_list, + struct qlnxr_pbl, + list_entry); + list_del(&pbl->list_entry); + list_add_tail(&pbl->list_entry, &info->free_pbl_list); + info->completed_handled++; + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +#if __FreeBSD_version >= 1102000 + +static int qlnxr_prepare_reg(struct qlnxr_qp *qp, + struct rdma_sq_fmr_wqe_1st *fwqe1, + struct ib_reg_wr *wr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(wr->mr); + struct rdma_sq_fmr_wqe_2nd *fwqe2; + + fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)ecore_chain_produce(&qp->sq.pbl); + fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); + fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); + fwqe1->l_key = wr->key; + + fwqe2->access_ctrl = 0; + + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); + fwqe2->fmr_ctrl = 0; + + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, + ilog2(mr->ibmr.page_size) - 12); + + fwqe2->length_hi = 0; /* TODO - figure out why length is only 32bit.. */ + fwqe2->length_lo = mr->ibmr.length; + fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); + fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); + + qp->wqe_wr_id[qp->sq.prod].mr = mr; + + return 0; +} + +#else + +static void +build_frmr_pbes(struct qlnxr_dev *dev, struct ib_send_wr *wr, + struct mr_info *info) +{ + int i; + u64 buf_addr = 0; + int num_pbes, total_num_pbes = 0; + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl = info->pbl_table; + struct qlnxr_pbl_info *pbl_info = &info->pbl_info; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pbe = (struct regpair *)pbl_tbl->va; + num_pbes = 0; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + buf_addr = wr->wr.fast_reg.page_list->page_list[i]; + pbe->lo = cpu_to_le32((u32)buf_addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(buf_addr)); + + num_pbes += 1; + pbe++; + total_num_pbes++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == + (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + num_pbes = 0; + } + } + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_prepare_safe_pbl(struct qlnxr_dev *dev, struct mr_info *info) +{ + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (info->completed == 0) { + //DP_VERBOSE(dev, QLNXR_MSG_MR, "First FMR\n"); + /* first fmr */ + return 0; + } + + qlnx_handle_completed_mrs(dev, info); + + list_add_tail(&info->pbl_table->list_entry, &info->inuse_pbl_list); + + if (list_empty(&info->free_pbl_list)) { + info->pbl_table = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, + GFP_ATOMIC); + } else { + info->pbl_table = list_first_entry(&info->free_pbl_list, + struct qlnxr_pbl, + list_entry); + list_del(&info->pbl_table->list_entry); + } + + if (!info->pbl_table) + rc = -ENOMEM; + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static inline int +qlnxr_prepare_fmr(struct qlnxr_qp *qp, + struct rdma_sq_fmr_wqe_1st *fwqe1, + struct ib_send_wr *wr) +{ + struct qlnxr_dev *dev = qp->dev; + u64 fbo; + struct qlnxr_fast_reg_page_list *frmr_list = + get_qlnxr_frmr_list(wr->wr.fast_reg.page_list); + struct rdma_sq_fmr_wqe *fwqe2 = + (struct rdma_sq_fmr_wqe *)ecore_chain_produce(&qp->sq.pbl); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (wr->wr.fast_reg.page_list_len == 0) + BUG(); + + rc = qlnxr_prepare_safe_pbl(dev, &frmr_list->info); + if (rc) + return rc; + + fwqe1->addr.hi = upper_32_bits(wr->wr.fast_reg.iova_start); + fwqe1->addr.lo = lower_32_bits(wr->wr.fast_reg.iova_start); + fwqe1->l_key = wr->wr.fast_reg.rkey; + + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_REMOTE_READ, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_REMOTE_WRITE, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_ENABLE_ATOMIC, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_ATOMIC)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_LOCAL_READ, 1); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_LOCAL_WRITE, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)); + + fwqe2->fmr_ctrl = 0; + + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, + ilog2(1 << wr->wr.fast_reg.page_shift) - 12); + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_ZERO_BASED, 0); + + fwqe2->length_hi = 0; /* Todo - figure this out... why length is only 32bit.. */ + fwqe2->length_lo = wr->wr.fast_reg.length; + fwqe2->pbl_addr.hi = upper_32_bits(frmr_list->info.pbl_table->pa); + fwqe2->pbl_addr.lo = lower_32_bits(frmr_list->info.pbl_table->pa); + + /* produce another wqe for fwqe3 */ + ecore_chain_produce(&qp->sq.pbl); + + fbo = wr->wr.fast_reg.iova_start - + (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + + QL_DPRINT12(ha, "wr.fast_reg.iova_start = %p rkey=%x addr=%x:%x" + " length = %x pbl_addr %x:%x\n", + wr->wr.fast_reg.iova_start, wr->wr.fast_reg.rkey, + fwqe1->addr.hi, fwqe1->addr.lo, fwqe2->length_lo, + fwqe2->pbl_addr.hi, fwqe2->pbl_addr.lo); + + build_frmr_pbes(dev, wr, &frmr_list->info); + + qp->wqe_wr_id[qp->sq.prod].frmr = frmr_list; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static enum ib_wc_opcode +qlnxr_ib_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + return IB_WC_RDMA_WRITE; + case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + return IB_WC_SEND; + case IB_WR_RDMA_READ: + return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_WC_FETCH_ADD; + +#if __FreeBSD_version >= 1102000 + case IB_WR_REG_MR: + return IB_WC_REG_MR; +#else + case IB_WR_FAST_REG_MR: + return IB_WC_FAST_REG_MR; +#endif /* #if __FreeBSD_version >= 1102000 */ + + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + default: + return IB_WC_SEND; + } +} +static inline bool +qlnxr_can_post_send(struct qlnxr_qp *qp, struct ib_send_wr *wr) +{ + int wq_is_full, err_wr, pbl_is_full; + struct qlnxr_dev *dev = qp->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter[qp, wr] = [%p,%p]\n", qp, wr); + + /* prevent SQ overflow and/or processing of a bad WR */ + err_wr = wr->num_sge > qp->sq.max_sges; + wq_is_full = qlnxr_wq_is_full(&qp->sq); + pbl_is_full = ecore_chain_get_elem_left_u32(&qp->sq.pbl) < + QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + if (wq_is_full || err_wr || pbl_is_full) { + if (wq_is_full && + !(qp->err_bitmap & QLNXR_QP_ERR_SQ_FULL)) { + + qp->err_bitmap |= QLNXR_QP_ERR_SQ_FULL; + + QL_DPRINT12(ha, + "error: WQ is full. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + + if (err_wr && + !(qp->err_bitmap & QLNXR_QP_ERR_BAD_SR)) { + + qp->err_bitmap |= QLNXR_QP_ERR_BAD_SR; + + QL_DPRINT12(ha, + "error: WQ is bad. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + + if (pbl_is_full && + !(qp->err_bitmap & QLNXR_QP_ERR_SQ_PBL_FULL)) { + + qp->err_bitmap |= QLNXR_QP_ERR_SQ_PBL_FULL; + + QL_DPRINT12(ha, + "error: WQ PBL is full. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + return false; + } + QL_DPRINT12(ha, "exit[qp, wr] = [%p,%p]\n", qp, wr); + return true; +} + +int +qlnxr_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibqp->device); + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + unsigned long flags; + int status = 0, rc = 0; + bool comp; + qlnx_host_t *ha; + uint32_t reg_addr; + + *bad_wr = NULL; + ha = dev->ha; + + QL_DPRINT12(ha, "exit[ibqp, wr, bad_wr] = [%p, %p, %p]\n", + ibqp, wr, bad_wr); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (qp->qp_type == IB_QPT_GSI) + return qlnxr_gsi_post_send(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->q_lock, flags); + + if (QLNX_IS_ROCE(dev) && (qp->state != ECORE_ROCE_QP_STATE_RTS) && + (qp->state != ECORE_ROCE_QP_STATE_ERR) && + (qp->state != ECORE_ROCE_QP_STATE_SQD)) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + QL_DPRINT11(ha, "QP in wrong state! QP icid=0x%x state %d\n", + qp->icid, qp->state); + return -EINVAL; + } + + if (!wr) { + QL_DPRINT11(ha, "Got an empty post send???\n"); + } + + while (wr) { + struct rdma_sq_common_wqe *wqe; + struct rdma_sq_send_wqe *swqe; + struct rdma_sq_send_wqe *swqe2; + struct rdma_sq_rdma_wqe_1st *rwqe; + struct rdma_sq_rdma_wqe_2nd *rwqe2; + struct rdma_sq_local_inv_wqe *iwqe; + struct rdma_sq_atomic_wqe *awqe1; + struct rdma_sq_atomic_wqe *awqe2; + struct rdma_sq_atomic_wqe *awqe3; + struct rdma_sq_fmr_wqe_1st *fwqe1; + + if (!qlnxr_can_post_send(qp, wr)) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = ecore_chain_produce(&qp->sq.pbl); + + qp->wqe_wr_id[qp->sq.prod].signaled = + !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; + + /* common fields */ + wqe->flags = 0; + wqe->flags |= (RDMA_SQ_SEND_WQE_COMP_FLG_MASK << + RDMA_SQ_SEND_WQE_COMP_FLG_SHIFT); + + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, \ + !!(wr->send_flags & IB_SEND_SOLICITED)); + + comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || + (qp->signaled); + + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, \ + !!(wr->send_flags & IB_SEND_FENCE)); + + wqe->prev_wqe_size = qp->prev_wqe_size; + + qp->wqe_wr_id[qp->sq.prod].opcode = qlnxr_ib_to_wc_opcode(wr->opcode); + + + switch (wr->opcode) { + + case IB_WR_SEND_WITH_IMM: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; + swqe = (struct rdma_sq_send_wqe *)wqe; + swqe->wqe_size = 2; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->inv_key_or_imm_data = + cpu_to_le32(wr->ex.imm_data); + swqe->length = cpu_to_le32( + qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, + bad_wr)); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w/ IMM length = %d imm data=%x\n", + swqe->length, wr->ex.imm_data); + + break; + + case IB_WR_SEND: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; + swqe = (struct rdma_sq_send_wqe *)wqe; + + swqe->wqe_size = 2; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->length = cpu_to_le32( + qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, + bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w/o IMM length = %d\n", + swqe->length); + + break; + + case IB_WR_SEND_WITH_INV: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; + swqe = (struct rdma_sq_send_wqe *)wqe; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->wqe_size = 2; + swqe->inv_key_or_imm_data = + cpu_to_le32(wr->ex.invalidate_rkey); + swqe->length = cpu_to_le32(qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w INVALIDATE length = %d\n", + swqe->length); + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, + "RDMA WRITE w/ IMM length = %d imm data=%x\n", + rwqe->length, rwqe->imm_data); + + break; + + case IB_WR_RDMA_WRITE: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, + "RDMA WRITE w/o IMM length = %d\n", + rwqe->length); + + break; + + case IB_WR_RDMA_READ_WITH_INV: + + QL_DPRINT12(ha, + "RDMA READ WITH INVALIDATE not supported\n"); + + *bad_wr = wr; + rc = -EINVAL; + + break; + + case IB_WR_RDMA_READ: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, "RDMA READ length = %d\n", + rwqe->length); + + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + + QL_DPRINT12(ha, + "ATOMIC operation = %s\n", + ((wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ? + "IB_WR_ATOMIC_CMP_AND_SWP" : + "IB_WR_ATOMIC_FETCH_AND_ADD")); + + awqe1 = (struct rdma_sq_atomic_wqe *)wqe; + awqe1->prev_wqe_size = 4; + + awqe2 = (struct rdma_sq_atomic_wqe *) + ecore_chain_produce(&qp->sq.pbl); + + TYPEPTR_ADDR_SET(awqe2, remote_va, \ + atomic_wr(wr)->remote_addr); + + awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); + + awqe3 = (struct rdma_sq_atomic_wqe *) + ecore_chain_produce(&qp->sq.pbl); + + if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; + TYPEPTR_ADDR_SET(awqe3, swap_data, + atomic_wr(wr)->compare_add); + } else { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; + TYPEPTR_ADDR_SET(awqe3, swap_data, + atomic_wr(wr)->swap); + TYPEPTR_ADDR_SET(awqe3, cmp_data, + atomic_wr(wr)->compare_add); + } + + qlnxr_prepare_sq_sges(dev, qp, NULL, wr); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->prev_wqe_size; + qp->prev_wqe_size = awqe1->prev_wqe_size; + + break; + + case IB_WR_LOCAL_INV: + + QL_DPRINT12(ha, + "INVALIDATE length (IB_WR_LOCAL_INV)\n"); + + iwqe = (struct rdma_sq_local_inv_wqe *)wqe; + iwqe->prev_wqe_size = 1; + + iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; + iwqe->inv_l_key = wr->ex.invalidate_rkey; + qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->prev_wqe_size; + qp->prev_wqe_size = iwqe->prev_wqe_size; + + break; + +#if __FreeBSD_version >= 1102000 + + case IB_WR_REG_MR: + + QL_DPRINT12(ha, "IB_WR_REG_MR\n"); + + wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; + fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; + fwqe1->wqe_size = 2; + + rc = qlnxr_prepare_reg(qp, fwqe1, reg_wr(wr)); + if (rc) { + QL_DPRINT11(ha, "IB_WR_REG_MR failed rc=%d\n", rc); + *bad_wr = wr; + break; + } + + qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; + qp->prev_wqe_size = fwqe1->wqe_size; + + break; +#else + case IB_WR_FAST_REG_MR: + + QL_DPRINT12(ha, "FAST_MR (IB_WR_FAST_REG_MR)\n"); + + wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; + fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; + fwqe1->prev_wqe_size = 3; + + rc = qlnxr_prepare_fmr(qp, fwqe1, wr); + + if (rc) { + QL_DPRINT12(ha, + "FAST_MR (IB_WR_FAST_REG_MR) failed" + " rc = %d\n", rc); + *bad_wr = wr; + break; + } + + qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->prev_wqe_size; + qp->prev_wqe_size = fwqe1->prev_wqe_size; + + break; +#endif /* #if __FreeBSD_version >= 1102000 */ + + default: + + QL_DPRINT12(ha, "Invalid Opcode 0x%x!\n", wr->opcode); + + rc = -EINVAL; + *bad_wr = wr; + break; + } + + if (*bad_wr) { + /* + * restore prod to its position before this WR was processed + */ + ecore_chain_set_prod(&qp->sq.pbl, + le16_to_cpu(qp->sq.db_data.data.value), + wqe); + /* restore prev_wqe_size */ + qp->prev_wqe_size = wqe->prev_wqe_size; + status = rc; + + QL_DPRINT12(ha, "failed *bad_wr = %p\n", *bad_wr); + break; /* out of the loop */ + } + + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + + qlnxr_inc_sw_prod(&qp->sq); + + qp->sq.db_data.data.value++; + + wr = wr->next; + } + + /* Trigger doorbell + * If there was a failure in the first WR then it will be triggered in + * vane. However this is not harmful (as long as the producer value is + * unchanged). For performance reasons we avoid checking for this + * redundant doorbell. + */ + wmb(); + //writel(qp->sq.db_data.raw, qp->sq.db); + + reg_addr = (uint32_t)((uint8_t *)qp->sq.db - (uint8_t *)ha->cdev.doorbells); + bus_write_4(ha->pci_dbells, reg_addr, qp->sq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + mmiowb(); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(ha, "exit[ibqp, wr, bad_wr] = [%p, %p, %p]\n", + ibqp, wr, bad_wr); + + return status; +} + +static u32 +qlnxr_srq_elem_left(struct qlnxr_srq_hwq_info *hw_srq) +{ + u32 used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + + return hw_srq->max_wr - used; +} + + +int +qlnxr_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + unsigned long flags; + int status = 0; + qlnx_host_t *ha; + uint32_t reg_addr; + + ha = dev->ha; + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + QL_DPRINT12(ha, "enter\n"); + + if (qp->qp_type == IB_QPT_GSI) { + QL_DPRINT12(ha, "(qp->qp_type = IB_QPT_GSI)\n"); + return qlnxr_gsi_post_recv(ibqp, wr, bad_wr); + } + + if (qp->srq) { + QL_DPRINT11(ha, "qp->srq [%p]" + " QP is associated with SRQ, cannot post RQ buffers\n", + qp->srq); + return -EINVAL; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + if (qp->state == ECORE_ROCE_QP_STATE_RESET) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + + QL_DPRINT11(ha, "qp->qp_type = ECORE_ROCE_QP_STATE_RESET\n"); + + return -EINVAL; + } + + while (wr) { + int i; + + if ((ecore_chain_get_elem_left_u32(&qp->rq.pbl) < + QLNXR_MAX_RQE_ELEMENTS_PER_RQE) || + (wr->num_sge > qp->rq.max_sges)) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + for (i = 0; i < wr->num_sge; i++) { + u32 flags = 0; + struct rdma_rq_sge *rqe = ecore_chain_produce(&qp->rq.pbl); + + /* first one must include the number of SGE in the list */ + if (!i) + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, wr->num_sge); + + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, wr->sg_list[i].lkey); + + RQ_SGE_SET(rqe, wr->sg_list[i].addr, \ + wr->sg_list[i].length, flags); + } + /* Special case of no sges. FW requires between 1-4 sges... + * in this case we need to post 1 sge with length zero. this is + * because rdma write with immediate consumes an RQ. */ + if (!wr->num_sge) { + u32 flags = 0; + struct rdma_rq_sge *rqe = ecore_chain_produce(&qp->rq.pbl); + + /* first one must include the number of SGE in the list */ + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0); + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); + + //RQ_SGE_SET(rqe, 0, 0, flags); + rqe->addr.hi = 0; + rqe->addr.lo = 0; + + rqe->length = 0; + rqe->flags = cpu_to_le32(flags); + + i = 1; + } + + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + qp->rqe_wr_id[qp->rq.prod].wqe_size = i; + + qlnxr_inc_sw_prod(&qp->rq); + + wmb(); + + qp->rq.db_data.data.value++; + + // writel(qp->rq.db_data.raw, qp->rq.db); + mmiowb(); + // if (QLNX_IS_IWARP(dev)) { + // writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); + // mmiowb(); /* for second doorbell */ + // } + + reg_addr = (uint32_t)((uint8_t *)qp->rq.db - + (uint8_t *)ha->cdev.doorbells); + + bus_write_4(ha->pci_dbells, reg_addr, qp->rq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + if (QLNX_IS_IWARP(dev)) { + reg_addr = (uint32_t)((uint8_t *)qp->rq.iwarp_db2 - + (uint8_t *)ha->cdev.doorbells); + bus_write_4(ha->pci_dbells, reg_addr, \ + qp->rq.iwarp_db2_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, \ + BUS_SPACE_BARRIER_READ); + } + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(ha, "exit status = 0x%x\n", status); + + return status; +} + +/* In fmr we need to increase the number of fmr completed counter for the fmr + * algorithm determining whether we can free a pbl or not. + * we need to perform this whether the work request was signaled or not. for + * this purpose we call this function from the condition that checks if a wr + * should be skipped, to make sure we don't miss it ( possibly this fmr + * operation was not signalted) + */ +static inline void +qlnxr_chk_if_fmr(struct qlnxr_qp *qp) +{ +#if __FreeBSD_version >= 1102000 + + if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; +#else + if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_FAST_REG_MR) + qp->wqe_wr_id[qp->sq.cons].frmr->info.completed++; + +#endif /* #if __FreeBSD_version >= 1102000 */ +} + +static int +process_req(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + u16 hw_cons, + enum ib_wc_status status, + int force) +{ + u16 cnt = 0; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + while (num_entries && qp->sq.wqe_cons != hw_cons) { + if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { + qlnxr_chk_if_fmr(qp); + /* skip WC */ + goto next_cqe; + } + + /* fill WC */ + wc->status = status; + wc->vendor_err = 0; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + + // common section + wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; + + switch (wc->opcode) { + + case IB_WC_RDMA_WRITE: + + wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; + + QL_DPRINT12(ha, + "opcode = IB_WC_RDMA_WRITE bytes = %d\n", + qp->wqe_wr_id[qp->sq.cons].bytes_len); + break; + + case IB_WC_COMP_SWAP: + case IB_WC_FETCH_ADD: + wc->byte_len = 8; + break; + +#if __FreeBSD_version >= 1102000 + case IB_WC_REG_MR: + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; + break; +#else + case IB_WC_FAST_REG_MR: + qp->wqe_wr_id[qp->sq.cons].frmr->info.completed++; + break; +#endif /* #if __FreeBSD_version >= 1102000 */ + + case IB_WC_RDMA_READ: + case IB_WC_SEND: + + QL_DPRINT12(ha, "opcode = 0x%x \n", wc->opcode); + break; + default: + ;//DP_ERR("TBD ERROR"); + } + + num_entries--; + wc++; + cnt++; +next_cqe: + while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) + ecore_chain_consume(&qp->sq.pbl); + qlnxr_inc_sw_cons(&qp->sq); + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static int +qlnxr_poll_cq_req(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_requester *req) +{ + int cnt = 0; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter req->status = 0x%x\n", req->status); + + switch (req->status) { + + case RDMA_CQE_REQ_STS_OK: + + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_SUCCESS, 0); + break; + + case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: + + if (qp->state != ECORE_ROCE_QP_STATE_ERR) + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_WR_FLUSH_ERR, 1); + break; + + default: /* other errors case */ + + /* process all WQE before the cosumer */ + qp->state = ECORE_ROCE_QP_STATE_ERR; + cnt = process_req(dev, qp, cq, num_entries, wc, + req->sq_cons - 1, IB_WC_SUCCESS, 0); + wc += cnt; + /* if we have extra WC fill it with actual error info */ + + if (cnt < num_entries) { + enum ib_wc_status wc_status; + + switch (req->status) { + case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: + wc_status = IB_WC_BAD_RESP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: + wc_status = IB_WC_LOC_LEN_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: + wc_status = IB_WC_LOC_PROT_ERR; + break; + case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: + wc_status = IB_WC_MW_BIND_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: + wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: + wc_status = IB_WC_REM_ACCESS_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: + wc_status = IB_WC_REM_OP_ERR; + break; + case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: + wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: + wc_status = IB_WC_RETRY_EXC_ERR; + break; + default: + wc_status = IB_WC_GENERAL_ERR; + } + + cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, + wc_status, 1 /* force use of WC */); + } + } + + QL_DPRINT12(ha, "exit cnt = %d\n", cnt); + return cnt; +} + +static void +__process_resp_one(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + u64 wr_id) +{ + enum ib_wc_status wc_status = IB_WC_SUCCESS; +#if __FreeBSD_version < 1102000 + u8 flags; +#endif + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p resp->status = 0x%x\n", + qp, resp->status); + + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + + switch (resp->status) { + + case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: + wc_status = IB_WC_LOC_ACCESS_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: + wc_status = IB_WC_LOC_LEN_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: + wc_status = IB_WC_LOC_PROT_ERR; + break; + + case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: + wc_status = IB_WC_MW_BIND_ERR; + break; + + case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: + wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + + case RDMA_CQE_RESP_STS_OK: + +#if __FreeBSD_version >= 1102000 + if (resp->flags & QLNXR_RESP_IMM) { + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_IMM; + + if (resp->flags & QLNXR_RESP_RDMA) + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + + if (resp->flags & QLNXR_RESP_INV) { + QL_DPRINT11(ha, + "Invalid flags QLNXR_RESP_INV [0x%x]" + "qp = %p qp->id = 0x%x cq = %p" + " cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } + } else if (resp->flags & QLNXR_RESP_INV) { + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + + if (resp->flags & QLNXR_RESP_RDMA) { + QL_DPRINT11(ha, + "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p" + " cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } + } else if (resp->flags & QLNXR_RESP_RDMA) { + QL_DPRINT11(ha, "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } +#else + wc_status = IB_WC_SUCCESS; + wc->byte_len = le32_to_cpu(resp->length); + + flags = resp->flags & QLNXR_RESP_RDMA_IMM; + + switch (flags) { + + case QLNXR_RESP_RDMA_IMM: + /* update opcode */ + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + /* fall to set imm data */ + case QLNXR_RESP_IMM: + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + case QLNXR_RESP_RDMA: + QL_DPRINT11(ha, "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + break; + default: + /* valid configuration, but nothing todo here */ + ; + } +#endif /* #if __FreeBSD_version >= 1102000 */ + + break; + default: + wc_status = IB_WC_GENERAL_ERR; + } + + /* fill WC */ + wc->status = wc_status; + wc->vendor_err = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + + QL_DPRINT12(ha, "exit status = 0x%x\n", wc_status); + + return; +} + +static int +process_resp_one_srq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qlnxr_srq *srq = qp->srq; + u64 wr_id; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + wr_id = HILO_U64(resp->srq_wr_id.hi, resp->srq_wr_id.lo); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wr_id = wr_id; + wc->byte_len = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + } else { + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + } + + /* PBL is maintained in case of WR granularity. + * So increment WR consumer after consuming WR + */ + srq->hw_srq.wr_cons_cnt++; + + QL_DPRINT12(ha, "exit\n"); + return 1; +} + +static int +process_resp_one(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + qlnx_host_t *ha = dev->ha; + u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + + QL_DPRINT12(ha, "enter\n"); + + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + ecore_chain_consume(&qp->rq.pbl); + qlnxr_inc_sw_cons(&qp->rq); + + QL_DPRINT12(ha, "exit\n"); + return 1; +} + +static int +process_resp_flush(struct qlnxr_qp *qp, + int num_entries, + struct ib_wc *wc, + u16 hw_cons) +{ + u16 cnt = 0; + qlnx_host_t *ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + while (num_entries && qp->rq.wqe_cons != hw_cons) { + /* fill WC */ + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->byte_len = 0; + wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc->qp = &qp->ibqp; + num_entries--; + wc++; + cnt++; + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + ecore_chain_consume(&qp->rq.pbl); + qlnxr_inc_sw_cons(&qp->rq); + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static void +try_consume_resp_cqe(struct qlnxr_cq *cq, + struct qlnxr_qp *qp, + struct rdma_cqe_responder *resp, + int *update) +{ + if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +static int +qlnxr_poll_cq_resp_srq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + int *update) +{ + int cnt; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + cnt = process_resp_one_srq(dev, qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static int +qlnxr_poll_cq_resp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + int *update) +{ + int cnt; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + cnt = process_resp_flush(qp, num_entries, wc, + resp->rq_cons); + try_consume_resp_cqe(cq, qp, resp, update); + } else { + cnt = process_resp_one(dev, qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static void +try_consume_req_cqe(struct qlnxr_cq *cq, struct qlnxr_qp *qp, + struct rdma_cqe_requester *req, int *update) +{ + if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +static void +doorbell_cq(struct qlnxr_dev *dev, struct qlnxr_cq *cq, u32 cons, u8 flags) +{ + uint64_t reg_addr; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + wmb(); + cq->db.data.agg_flags = flags; + cq->db.data.value = cpu_to_le32(cons); + + reg_addr = (uint64_t)((uint8_t *)cq->db_addr - + (uint8_t *)(ha->cdev.doorbells)); + + bus_write_8(ha->pci_dbells, reg_addr, cq->db.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + QL_DPRINT12(ha, "exit\n"); + return; + +//#ifdef __LP64__ +// writeq(cq->db.raw, cq->db_addr); +//#else + /* Note that since the FW allows 64 bit write only, in 32bit systems + * the value of db_addr must be low enough. This is currently not + * enforced. + */ +// writel(cq->db.raw & 0xffffffff, cq->db_addr); +// mmiowb(); +//#endif +} + + +static int +is_valid_cqe(struct qlnxr_cq *cq, union rdma_cqe *cqe) +{ + struct rdma_cqe_requester *resp_cqe = &cqe->req; + return (resp_cqe->flags & RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK) == + cq->pbl_toggle; +} + +int +qlnxr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + int done = 0; + union rdma_cqe *cqe = cq->latest_cqe; + int update = 0; + u32 old_cons, new_cons; + unsigned long flags; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (cq->destroyed) { + QL_DPRINT11(ha, "called after destroy for cq %p (icid=%d)\n", + cq, cq->icid); + return 0; + } + + if (cq->cq_type == QLNXR_CQ_TYPE_GSI) + return qlnxr_gsi_poll_cq(ibcq, num_entries, wc); + + spin_lock_irqsave(&cq->cq_lock, flags); + + old_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + + while (num_entries && is_valid_cqe(cq, cqe)) { + int cnt = 0; + struct qlnxr_qp *qp; + struct rdma_cqe_requester *resp_cqe; + enum rdma_cqe_type cqe_type; + + /* prevent speculative reads of any field of CQE */ + rmb(); + + resp_cqe = &cqe->req; + qp = (struct qlnxr_qp *)(uintptr_t)HILO_U64(resp_cqe->qp_handle.hi, + resp_cqe->qp_handle.lo); + + if (!qp) { + QL_DPRINT11(ha, "qp = NULL\n"); + break; + } + + wc->qp = &qp->ibqp; + + cqe_type = GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); + + switch (cqe_type) { + case RDMA_CQE_TYPE_REQUESTER: + cnt = qlnxr_poll_cq_req(dev, qp, cq, num_entries, + wc, &cqe->req); + try_consume_req_cqe(cq, qp, &cqe->req, &update); + break; + case RDMA_CQE_TYPE_RESPONDER_RQ: + cnt = qlnxr_poll_cq_resp(dev, qp, cq, num_entries, + wc, &cqe->resp, &update); + break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qlnxr_poll_cq_resp_srq(dev, qp, cq, num_entries, + wc, &cqe->resp, &update); + break; + case RDMA_CQE_TYPE_INVALID: + default: + QL_DPRINT11(ha, "cqe type [0x%x] invalid\n", cqe_type); + break; + } + num_entries -= cnt; + wc += cnt; + done += cnt; + + cqe = cq->latest_cqe; + } + new_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + + cq->cq_cons += new_cons - old_cons; + + if (update) { + /* doorbell notifies abount latest VALID entry, + * but chain already point to the next INVALID one + */ + doorbell_cq(dev, cq, cq->cq_cons - 1, cq->arm_flags); + QL_DPRINT12(ha, "cq = %p cons = 0x%x " + "arm_flags = 0x%x db.icid = 0x%x\n", cq, + (cq->cq_cons - 1), cq->arm_flags, cq->db.data.icid); + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + QL_DPRINT12(ha, "exit\n"); + + return done; +} + + +int +qlnxr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + unsigned long sflags; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibcq->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter ibcq = %p flags = 0x%x " + "cp = %p cons = 0x%x cq_type = 0x%x\n", ibcq, + flags, cq, cq->cq_cons, cq->cq_type); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (cq->destroyed) { + QL_DPRINT11(ha, "cq was already destroyed cq = %p icid=%d\n", + cq, cq->icid); + return -EINVAL; + } + + if (cq->cq_type == QLNXR_CQ_TYPE_GSI) { + return 0; + } + + spin_lock_irqsave(&cq->cq_lock, sflags); + + cq->arm_flags = 0; + + if (flags & IB_CQ_SOLICITED) { + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; + } + if (flags & IB_CQ_NEXT_COMP) { + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; + } + + doorbell_cq(dev, cq, (cq->cq_cons - 1), cq->arm_flags); + + spin_unlock_irqrestore(&cq->cq_lock, sflags); + + QL_DPRINT12(ha, "exit ibcq = %p flags = 0x%x\n", ibcq, flags); + return 0; +} + + +static struct qlnxr_mr * +__qlnxr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + int rc = -ENOMEM; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter ibpd = %p pd = %p " + " pd_id = %d max_page_list_len = %d\n", + ibpd, pd, pd->pd_id, max_page_list_len); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->dev = dev; + mr->type = QLNXR_MR_FRMR; + + rc = qlnxr_init_mr_info(dev, &mr->info, max_page_list_len, + 1 /* allow dual layer pbl */); + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_mr_info failed\n"); + goto err0; + } + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_alloc_tid failed\n"); + goto err0; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_FMR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = 0; + mr->hw_mr.remote_read = 0; + mr->hw_mr.remote_write = 0; + mr->hw_mr.remote_atomic = 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = 0; /* Will be supplied during post */ + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.fbo = 0; + mr->hw_mr.length = 0; + mr->hw_mr.vaddr = 0; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = true; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_register_tid failed\n"); + goto err1; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + mr->ibmr.rkey = mr->ibmr.lkey; + + QL_DPRINT12(ha, "exit mr = %p mr->ibmr.lkey = 0x%x\n", + mr, mr->ibmr.lkey); + + return mr; + +err1: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit\n"); + + return ERR_PTR(rc); +} + +#if __FreeBSD_version >= 1102000 + +struct ib_mr * +qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) +{ + struct qlnxr_dev *dev; + struct qlnxr_mr *mr; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibpd->device); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __qlnxr_alloc_mr(ibpd, max_num_sg); + + if (IS_ERR(mr)) + return ERR_PTR(-EINVAL); + + QL_DPRINT12(ha, "exit mr = %p &mr->ibmr = %p\n", mr, &mr->ibmr); + + return &mr->ibmr; +} + +static int +qlnxr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(ibmr); + struct qlnxr_pbl *pbl_table; + struct regpair *pbe; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + u32 pbes_in_page; + + dev = mr->dev; + ha = dev->ha; + + if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { + QL_DPRINT12(ha, "fails mr->npages %d\n", mr->npages); + return -ENOMEM; + } + + QL_DPRINT12(ha, "mr->npages %d addr = %p enter\n", mr->npages, + ((void *)addr)); + + pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); + pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); + pbe = (struct regpair *)pbl_table->va; + pbe += mr->npages % pbes_in_page; + pbe->lo = cpu_to_le32((u32)addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); + + mr->npages++; + + QL_DPRINT12(ha, "mr->npages %d addr = %p exit \n", mr->npages, + ((void *)addr)); + return 0; +} + +int +qlnxr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + int ret; + struct qlnxr_mr *mr = get_qlnxr_mr(ibmr); + qlnx_host_t *ha; + + if (mr == NULL) + return (-1); + + if (mr->dev == NULL) + return (-1); + + ha = mr->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mr->npages = 0; + qlnx_handle_completed_mrs(mr->dev, &mr->info); + + ret = ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qlnxr_set_page); + + QL_DPRINT12(ha, "exit ret = %d\n", ret); + + return (ret); +} + +#else + +struct ib_mr * +qlnxr_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qlnxr_dev *dev; + struct qlnxr_mr *mr; + qlnx_host_t *ha; + struct ib_mr *ibmr = NULL; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mr = __qlnxr_alloc_mr(ibpd, max_page_list_len); + + if (IS_ERR(mr)) { + ibmr = ERR_PTR(-EINVAL); + } else { + ibmr = &mr->ibmr; + } + + QL_DPRINT12(ha, "exit %p\n", ibmr); + return (ibmr); +} + +void +qlnxr_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct qlnxr_fast_reg_page_list *frmr_list; + + frmr_list = get_qlnxr_frmr_list(page_list); + + free_mr_info(frmr_list->dev, &frmr_list->info); + + kfree(frmr_list->ibfrpl.page_list); + kfree(frmr_list); + + return; +} + +struct ib_fast_reg_page_list * +qlnxr_alloc_frmr_page_list(struct ib_device *ibdev, int page_list_len) +{ + struct qlnxr_fast_reg_page_list *frmr_list = NULL; + struct qlnxr_dev *dev; + int size = page_list_len * sizeof(u64); + int rc = -ENOMEM; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + frmr_list = kzalloc(sizeof(*frmr_list), GFP_KERNEL); + if (!frmr_list) { + QL_DPRINT11(ha, "kzalloc(frmr_list) failed\n"); + goto err; + } + + frmr_list->dev = dev; + frmr_list->ibfrpl.page_list = kzalloc(size, GFP_KERNEL); + if (!frmr_list->ibfrpl.page_list) { + QL_DPRINT11(ha, "frmr_list->ibfrpl.page_list = NULL failed\n"); + goto err0; + } + + rc = qlnxr_init_mr_info(dev, &frmr_list->info, page_list_len, + 1 /* allow dual layer pbl */); + if (rc) + goto err1; + + QL_DPRINT12(ha, "exit %p\n", &frmr_list->ibfrpl); + + return &frmr_list->ibfrpl; + +err1: + kfree(frmr_list->ibfrpl.page_list); +err0: + kfree(frmr_list); +err: + QL_DPRINT12(ha, "exit with error\n"); + + return ERR_PTR(rc); +} + +static int +qlnxr_validate_phys_buf_list(qlnx_host_t *ha, struct ib_phys_buf *buf_list, + int buf_cnt, uint64_t *total_size) +{ + u64 size = 0; + + *total_size = 0; + + if (!buf_cnt || buf_list == NULL) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d\n", buf_list, buf_cnt); + return (-1); + } + + size = buf_list->size; + + if (!size) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d" + " buf_list->size = 0\n", buf_list, buf_cnt); + return (-1); + } + + while (buf_cnt) { + + *total_size += buf_list->size; + + if (buf_list->size != size) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d" + " all buffers should have same size\n", + buf_list, buf_cnt); + return (-1); + } + + buf_list++; + buf_cnt--; + } + return (0); +} + +static size_t +qlnxr_get_num_pages(qlnx_host_t *ha, struct ib_phys_buf *buf_list, + int buf_cnt) +{ + int i; + size_t num_pages = 0; + u64 size; + + for (i = 0; i < buf_cnt; i++) { + + size = 0; + while (size < buf_list->size) { + size += PAGE_SIZE; + num_pages++; + } + buf_list++; + } + return (num_pages); +} + +static void +qlnxr_populate_phys_mem_pbls(struct qlnxr_dev *dev, + struct ib_phys_buf *buf_list, int buf_cnt, + struct qlnxr_pbl *pbl, struct qlnxr_pbl_info *pbl_info) +{ + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl; + int pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + qlnx_host_t *ha; + int i; + u64 pbe_addr; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!pbl_info) { + QL_DPRINT11(ha, "PBL_INFO not initialized\n"); + return; + } + + if (!pbl_info->num_pbes) { + QL_DPRINT11(ha, "pbl_info->num_pbes == 0\n"); + return; + } + + /* If we have a two layered pbl, the first pbl points to the rest + * of the pbls and the first entry lays on the second pbl in the table + */ + if (pbl_info->two_layered) + pbl_tbl = &pbl[1]; + else + pbl_tbl = pbl; + + pbe = (struct regpair *)pbl_tbl->va; + if (!pbe) { + QL_DPRINT12(ha, "pbe is NULL\n"); + return; + } + + pbe_cnt = 0; + + for (i = 0; i < buf_cnt; i++) { + + pages = buf_list->size >> PAGE_SHIFT; + + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + + pbe_addr = buf_list->addr + (PAGE_SIZE * pg_cnt); + + pbe->lo = cpu_to_le32((u32)pbe_addr); + pbe->hi = cpu_to_le32(((u32)(pbe_addr >> 32))); + + QL_DPRINT12(ha, "Populate pbl table:" + " pbe->addr=0x%x:0x%x " + " pbe_cnt = %d total_num_pbes=%d" + " pbe=%p\n", pbe->lo, pbe->hi, pbe_cnt, + total_num_pbes, pbe); + + pbe_cnt ++; + total_num_pbes ++; + pbe++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. */ + + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; + } + } + buf_list++; + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +struct ib_mr * +qlnxr_reg_kernel_mr(struct ib_pd *ibpd, + struct ib_phys_buf *buf_list, + int buf_cnt, int acc, u64 *iova_start) +{ + int rc = -ENOMEM; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + struct qlnxr_pd *pd; + qlnx_host_t *ha; + size_t num_pages = 0; + uint64_t length; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pd = get_qlnxr_pd(ibpd); + + QL_DPRINT12(ha, "pd = %d buf_list = %p, buf_cnt = %d," + " iova_start = %p, acc = %d\n", + pd->pd_id, buf_list, buf_cnt, iova_start, acc); + + //if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + // QL_DPRINT11(ha, "(acc & IB_ACCESS_REMOTE_WRITE &&" + // " !(acc & IB_ACCESS_LOCAL_WRITE))\n"); + // return ERR_PTR(-EINVAL); + //} + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->type = QLNXR_MR_KERNEL; + mr->iova_start = iova_start; + + rc = qlnxr_validate_phys_buf_list(ha, buf_list, buf_cnt, &length); + if (rc) + goto err0; + + num_pages = qlnxr_get_num_pages(ha, buf_list, buf_cnt); + if (!num_pages) + goto err0; + + rc = qlnxr_init_mr_info(dev, &mr->info, num_pages, 1); + if (rc) { + QL_DPRINT11(ha, + "qlnxr_init_mr_info failed [%d]\n", rc); + goto err1; + } + + qlnxr_populate_phys_mem_pbls(dev, buf_list, buf_cnt, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + + if (rc) { + QL_DPRINT11(ha, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(PAGE_SIZE); /* for the MR pages */ + + mr->hw_mr.fbo = 0; + + mr->hw_mr.length = length; + mr->hw_mr.vaddr = (uint64_t)iova_start; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = false; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + QL_DPRINT12(ha, "lkey: %x\n", mr->ibmr.lkey); + + return (&mr->ibmr); + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return (ERR_PTR(rc)); +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +struct ib_ah * +#if __FreeBSD_version >= 1102000 +qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) +#else +qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct qlnxr_ah *ah; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "in create_ah\n"); + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) { + QL_DPRINT12(ha, "no address handle can be allocated\n"); + return ERR_PTR(-ENOMEM); + } + + ah->attr = *attr; + + return &ah->ibah; +} + +int +qlnxr_destroy_ah(struct ib_ah *ibah) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct qlnxr_ah *ah = get_qlnxr_ah(ibah); + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "in destroy_ah\n"); + + kfree(ah); + return 0; +} + +int +qlnxr_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + QL_DPRINT12(ha, "Query AH not supported\n"); + return -EINVAL; +} + +int +qlnxr_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + QL_DPRINT12(ha, "Modify AH not supported\n"); + return -ENOSYS; +} + +#if __FreeBSD_version >= 1102000 +int +qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *mad_hdr, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) + +#else + +int +qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad) + +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + QL_DPRINT12(ha, "process mad not supported\n"); + + return -ENOSYS; +// QL_DPRINT12(ha, "qlnxr_process_mad in_mad %x %x %x %x %x %x %x %x\n", +// in_mad->mad_hdr.attr_id, in_mad->mad_hdr.base_version, +// in_mad->mad_hdr.attr_mod, in_mad->mad_hdr.class_specific, +// in_mad->mad_hdr.class_version, in_mad->mad_hdr.method, +// in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.status); + +// return IB_MAD_RESULT_SUCCESS; +} + + +#if __FreeBSD_version >= 1102000 +int +qlnxr_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct ib_port_attr attr; + int err; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + err = qlnxr_query_port(ibdev, port_num, &attr); + if (err) + return err; + + if (QLNX_IS_IWARP(dev)) { + immutable->pkey_tbl_len = 1; + immutable->gid_tbl_len = 1; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + immutable->max_mad_size = 0; + } else { + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} +#endif /* #if __FreeBSD_version > 1102000 */ + + +/***** iWARP related functions *************/ + + +static void +qlnxr_iw_mpa_request(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_listener *listener = (struct qlnxr_iw_listener *)context; + struct qlnxr_dev *dev = listener->dev; + struct qlnxr_iw_ep *ep; + struct iw_cm_event event; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (params->cm_info->ip_version != ECORE_TCP_IPV4) { + QL_DPRINT11(ha, "only IPv4 supported [0x%x]\n", + params->cm_info->ip_version); + return; + } + + ep = kzalloc(sizeof(*ep), GFP_ATOMIC); + + if (!ep) { + QL_DPRINT11(ha, "kzalloc{ep) failed\n"); + return; + } + + ep->dev = dev; + ep->ecore_context = params->ep_context; + + memset(&event, 0, sizeof(event)); + + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.status = params->status; + + laddr = (struct sockaddr_in *)&event.local_addr; + raddr = (struct sockaddr_in *)&event.remote_addr; + + laddr->sin_family = AF_INET; + raddr->sin_family = AF_INET; + + laddr->sin_port = htons(params->cm_info->local_port); + raddr->sin_port = htons(params->cm_info->remote_port); + + laddr->sin_addr.s_addr = htonl(params->cm_info->local_ip[0]); + raddr->sin_addr.s_addr = htonl(params->cm_info->remote_ip[0]); + + event.provider_data = (void *)ep; + event.private_data = (void *)params->cm_info->private_data; + event.private_data_len = (u8)params->cm_info->private_data_len; + +#if __FreeBSD_version >= 1100000 + event.ord = params->cm_info->ord; + event.ird = params->cm_info->ird; +#endif /* #if __FreeBSD_version >= 1100000 */ + + listener->cm_id->event_handler(listener->cm_id, &event); + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static void +qlnxr_iw_issue_event(void *context, + struct ecore_iwarp_cm_event_params *params, + enum iw_cm_event_type event_type, + char *str) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct iw_cm_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&event, 0, sizeof(event)); + event.status = params->status; + event.event = event_type; + + if (params->cm_info != NULL) { +#if __FreeBSD_version >= 1100000 + event.ird = params->cm_info->ird; + event.ord = params->cm_info->ord; + QL_DPRINT12(ha, "ord=[%d] \n", event.ord); + QL_DPRINT12(ha, "ird=[%d] \n", event.ird); +#endif /* #if __FreeBSD_version >= 1100000 */ + + event.private_data_len = params->cm_info->private_data_len; + event.private_data = (void *)params->cm_info->private_data; + QL_DPRINT12(ha, "private_data_len=[%d] \n", + event.private_data_len); + } + + QL_DPRINT12(ha, "event=[%d] %s\n", event.event, str); + QL_DPRINT12(ha, "status=[%d] \n", event.status); + + if (ep) { + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); + else + QL_DPRINT11(ha, "ep->cm_id == NULL \n"); + } else { + QL_DPRINT11(ha, "ep == NULL \n"); + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static void +qlnxr_iw_close_event(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (ep->cm_id) { + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_CLOSE, + "IW_CM_EVENT_EVENT_CLOSE"); + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +#if __FreeBSD_version >= 1102000 + +static void +qlnxr_iw_passive_complete(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + /* We will only reach the following state if MPA_REJECT was called on + * passive. In this case there will be no associated QP. + */ + if ((params->status == -ECONNREFUSED) && (ep->qp == NULL)) { + QL_DPRINT11(ha, "PASSIVE connection refused releasing ep...\n"); + kfree(ep); + return; + } + + /* We always issue an established event, however, ofed does not look + * at event code for established. So if there was a failure, we follow + * with close... + */ + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_ESTABLISHED, + "IW_CM_EVENT_ESTABLISHED"); + + if (params->status < 0) { + qlnxr_iw_close_event(context, params); + } + + return; +} + +struct qlnxr_discon_work { + struct work_struct work; + struct qlnxr_iw_ep *ep; + enum ecore_iwarp_event_type event; + int status; +}; + +static void +qlnxr_iw_disconnect_worker(struct work_struct *work) +{ + struct qlnxr_discon_work *dwork = + container_of(work, struct qlnxr_discon_work, work); + struct ecore_rdma_modify_qp_in_params qp_params = { 0 }; + struct qlnxr_iw_ep *ep = dwork->ep; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp = ep->qp; + struct iw_cm_event event; + + if (qp->destroyed) { + kfree(dwork); + qlnxr_iw_qp_rem_ref(&qp->ibqp); + return; + } + + memset(&event, 0, sizeof(event)); + event.status = dwork->status; + event.event = IW_CM_EVENT_DISCONNECT; + + /* Success means graceful disconnect was requested. modifying + * to SQD is translated to graceful disconnect. O/w reset is sent + */ + if (dwork->status) + qp_params.new_state = ECORE_ROCE_QP_STATE_ERR; + else + qp_params.new_state = ECORE_ROCE_QP_STATE_SQD; + + kfree(dwork); + + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + + ecore_rdma_modify_qp(dev->rdma_ctx, qp->ecore_qp, &qp_params); + + qlnxr_iw_qp_rem_ref(&qp->ibqp); + + return; +} + +void +qlnxr_iw_disconnect_event(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_discon_work *work; + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp = ep->qp; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + qlnxr_iw_qp_add_ref(&qp->ibqp); + work->ep = ep; + work->event = params->event; + work->status = params->status; + + INIT_WORK(&work->work, qlnxr_iw_disconnect_worker); + queue_work(dev->iwarp_wq, &work->work); + + return; +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static int +qlnxr_iw_mpa_reply(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct ecore_iwarp_send_rtr_in rtr_in; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + bzero(&rtr_in, sizeof(struct ecore_iwarp_send_rtr_in)); + rtr_in.ep_context = params->ep_context; + + rc = ecore_iwarp_send_rtr(dev->rdma_ctx, &rtr_in); + + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + + +void +qlnxr_iw_qp_event(void *context, + struct ecore_iwarp_cm_event_params *params, + enum ib_event_type ib_event, + char *str) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct ib_qp *ibqp = &(ep->qp->ibqp); + struct ib_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, + "[context, event, event_handler] = [%p, 0x%x, %s, %p] enter\n", + context, params->event, str, ibqp->event_handler); + + if (ibqp->event_handler) { + event.event = ib_event; + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } + + return; +} + +int +qlnxr_iw_event_handler(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "[context, event] = [%p, 0x%x] " + "enter\n", context, params->event); + + switch (params->event) { + + /* Passive side request received */ + case ECORE_IWARP_EVENT_MPA_REQUEST: + qlnxr_iw_mpa_request(context, params); + break; + + case ECORE_IWARP_EVENT_ACTIVE_MPA_REPLY: + qlnxr_iw_mpa_reply(context, params); + break; + + /* Passive side established ( ack on mpa response ) */ + case ECORE_IWARP_EVENT_PASSIVE_COMPLETE: + +#if __FreeBSD_version >= 1102000 + + ep->during_connect = 0; + qlnxr_iw_passive_complete(context, params); + +#else + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_ESTABLISHED, + "IW_CM_EVENT_ESTABLISHED"); +#endif /* #if __FreeBSD_version >= 1102000 */ + break; + + /* Active side reply received */ + case ECORE_IWARP_EVENT_ACTIVE_COMPLETE: + ep->during_connect = 0; + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_CONNECT_REPLY, + "IW_CM_EVENT_CONNECT_REPLY"); + if (params->status < 0) { + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } + break; + + case ECORE_IWARP_EVENT_DISCONNECT: + +#if __FreeBSD_version >= 1102000 + qlnxr_iw_disconnect_event(context, params); +#else + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_DISCONNECT, + "IW_CM_EVENT_DISCONNECT"); + qlnxr_iw_close_event(context, params); +#endif /* #if __FreeBSD_version >= 1102000 */ + break; + + case ECORE_IWARP_EVENT_CLOSE: + ep->during_connect = 0; + qlnxr_iw_close_event(context, params); + break; + + case ECORE_IWARP_EVENT_RQ_EMPTY: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_RQ_EMPTY"); + break; + + case ECORE_IWARP_EVENT_IRQ_FULL: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_IRQ_FULL"); + break; + + case ECORE_IWARP_EVENT_LLP_TIMEOUT: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_LLP_TIMEOUT"); + break; + + case ECORE_IWARP_EVENT_REMOTE_PROTECTION_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "IWARP_EVENT_REMOTE_PROTECTION_ERROR"); + break; + + case ECORE_IWARP_EVENT_CQ_OVERFLOW: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_CQ_OVERFLOW"); + break; + + case ECORE_IWARP_EVENT_QP_CATASTROPHIC: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_QP_CATASTROPHIC"); + break; + + case ECORE_IWARP_EVENT_LOCAL_ACCESS_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "IWARP_EVENT_LOCAL_ACCESS_ERROR"); + break; + + case ECORE_IWARP_EVENT_REMOTE_OPERATION_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_REMOTE_OPERATION_ERROR"); + break; + + case ECORE_IWARP_EVENT_TERMINATE_RECEIVED: + QL_DPRINT12(ha, "Got terminate message" + " ECORE_IWARP_EVENT_TERMINATE_RECEIVED\n"); + break; + + default: + QL_DPRINT12(ha, + "Unknown event [0x%x] received \n", params->event); + break; + }; + + QL_DPRINT12(ha, "[context, event] = [%p, 0x%x] " + "exit\n", context, params->event); + return 0; +} + +static int +qlnxr_addr4_resolve(struct qlnxr_dev *dev, + struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + u8 *dst_mac) +{ + int rc; + +#if __FreeBSD_version >= 1100000 + rc = arpresolve(dev->ha->ifp, 0, NULL, (struct sockaddr *)dst_in, + dst_mac, NULL, NULL); +#else + struct llentry *lle; + + rc = arpresolve(dev->ha->ifp, NULL, NULL, (struct sockaddr *)dst_in, + dst_mac, &lle); +#endif + + QL_DPRINT12(dev->ha, "rc = %d " + "sa_len = 0x%x sa_family = 0x%x IP Address = %d.%d.%d.%d " + "Dest MAC %02x:%02x:%02x:%02x:%02x:%02x\n", rc, + dst_in->sin_len, dst_in->sin_family, + NIPQUAD((dst_in->sin_addr.s_addr)), + dst_mac[0], dst_mac[1], dst_mac[2], + dst_mac[3], dst_mac[4], dst_mac[5]); + + return rc; +} + +int +qlnxr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct qlnxr_dev *dev; + struct ecore_iwarp_connect_out out_params; + struct ecore_iwarp_connect_in in_params; + struct qlnxr_iw_ep *ep; + struct qlnxr_qp *qp; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + int rc = 0; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((cm_id->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "[cm_id, conn_param] = [%p, %p] " + "enter \n", cm_id, conn_param); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + qp = idr_find(&dev->qpidr, conn_param->qpn); + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + QL_DPRINT12(ha, + "local = [%d.%d.%d.%d, %d] remote = [%d.%d.%d.%d, %d]\n", + NIPQUAD((laddr->sin_addr.s_addr)), laddr->sin_port, + NIPQUAD((raddr->sin_addr.s_addr)), raddr->sin_port); + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) { + QL_DPRINT11(ha, "struct qlnxr_iw_ep " + "alloc memory failed\n"); + return -ENOMEM; + } + + ep->dev = dev; + ep->qp = qp; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + memset(&in_params, 0, sizeof (struct ecore_iwarp_connect_in)); + memset(&out_params, 0, sizeof (struct ecore_iwarp_connect_out)); + + in_params.event_cb = qlnxr_iw_event_handler; + in_params.cb_context = ep; + + in_params.cm_info.ip_version = ECORE_TCP_IPV4; + + in_params.cm_info.remote_ip[0] = ntohl(raddr->sin_addr.s_addr); + in_params.cm_info.local_ip[0] = ntohl(laddr->sin_addr.s_addr); + in_params.cm_info.remote_port = ntohs(raddr->sin_port); + in_params.cm_info.local_port = ntohs(laddr->sin_port); + in_params.cm_info.vlan = 0; + in_params.mss = dev->ha->ifp->if_mtu - 40; + + QL_DPRINT12(ha, "remote_ip = [%d.%d.%d.%d] " + "local_ip = [%d.%d.%d.%d] remote_port = %d local_port = %d " + "vlan = %d\n", + NIPQUAD((in_params.cm_info.remote_ip[0])), + NIPQUAD((in_params.cm_info.local_ip[0])), + in_params.cm_info.remote_port, in_params.cm_info.local_port, + in_params.cm_info.vlan); + + rc = qlnxr_addr4_resolve(dev, laddr, raddr, (u8 *)in_params.remote_mac_addr); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_addr4_resolve failed\n"); + goto err; + } + + QL_DPRINT12(ha, "ord = %d ird=%d private_data=%p" + " private_data_len=%d rq_psn=%d\n", + conn_param->ord, conn_param->ird, conn_param->private_data, + conn_param->private_data_len, qp->rq_psn); + + in_params.cm_info.ord = conn_param->ord; + in_params.cm_info.ird = conn_param->ird; + in_params.cm_info.private_data = conn_param->private_data; + in_params.cm_info.private_data_len = conn_param->private_data_len; + in_params.qp = qp->ecore_qp; + + memcpy(in_params.local_mac_addr, dev->ha->primary_mac, ETH_ALEN); + + rc = ecore_iwarp_connect(dev->rdma_ctx, &in_params, &out_params); + + if (rc) { + QL_DPRINT12(ha, "ecore_iwarp_connect failed\n"); + goto err; + } + + QL_DPRINT12(ha, "exit\n"); + + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(ep); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +int +qlnxr_iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct qlnxr_dev *dev; + struct qlnxr_iw_listener *listener; + struct ecore_iwarp_listen_in iparams; + struct ecore_iwarp_listen_out oparams; + struct sockaddr_in *laddr; + qlnx_host_t *ha; + int rc; + + dev = get_qlnxr_dev((cm_id->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + + listener = kzalloc(sizeof(*listener), GFP_KERNEL); + + if (listener == NULL) { + QL_DPRINT11(ha, "listener memory alloc failed\n"); + return -ENOMEM; + } + + listener->dev = dev; + cm_id->add_ref(cm_id); + listener->cm_id = cm_id; + listener->backlog = backlog; + + memset(&iparams, 0, sizeof (struct ecore_iwarp_listen_in)); + memset(&oparams, 0, sizeof (struct ecore_iwarp_listen_out)); + + iparams.cb_context = listener; + iparams.event_cb = qlnxr_iw_event_handler; + iparams.max_backlog = backlog; + + iparams.ip_version = ECORE_TCP_IPV4; + + iparams.ip_addr[0] = ntohl(laddr->sin_addr.s_addr); + iparams.port = ntohs(laddr->sin_port); + iparams.vlan = 0; + + QL_DPRINT12(ha, "[%d.%d.%d.%d, %d] iparamsport=%d\n", + NIPQUAD((laddr->sin_addr.s_addr)), + laddr->sin_port, iparams.port); + + rc = ecore_iwarp_create_listen(dev->rdma_ctx, &iparams, &oparams); + if (rc) { + QL_DPRINT11(ha, + "ecore_iwarp_create_listen failed rc = %d\n", rc); + goto err; + } + + listener->ecore_handle = oparams.handle; + cm_id->provider_data = listener; + + QL_DPRINT12(ha, "exit\n"); + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(listener); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +void +qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id) +{ + struct qlnxr_iw_listener *listener = cm_id->provider_data; + struct qlnxr_dev *dev = get_qlnxr_dev((cm_id->device)); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (listener->ecore_handle) + rc = ecore_iwarp_destroy_listen(dev->rdma_ctx, + listener->ecore_handle); + + cm_id->rem_ref(cm_id); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return; +} + +int +qlnxr_iw_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)cm_id->provider_data; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp; + struct ecore_iwarp_accept_in params; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qpid=%d\n", conn_param->qpn); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + qp = idr_find(&dev->qpidr, conn_param->qpn); + if (!qp) { + QL_DPRINT11(ha, "idr_find failed invalid qpn = %d\n", + conn_param->qpn); + return -EINVAL; + } + ep->qp = qp; + qp->ep = ep; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + params.ep_context = ep->ecore_context; + params.cb_context = ep; + params.qp = ep->qp->ecore_qp; + params.private_data = conn_param->private_data; + params.private_data_len = conn_param->private_data_len; + params.ird = conn_param->ird; + params.ord = conn_param->ord; + + rc = ecore_iwarp_accept(dev->rdma_ctx, ¶ms); + if (rc) { + QL_DPRINT11(ha, "ecore_iwarp_accept failed %d\n", rc); + goto err; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +err: + cm_id->rem_ref(cm_id); + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + +int +qlnxr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ +#if __FreeBSD_version >= 1102000 + + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)cm_id->provider_data; + struct qlnxr_dev *dev = ep->dev; + struct ecore_iwarp_reject_in params; + int rc; + + params.ep_context = ep->ecore_context; + params.cb_context = ep; + params.private_data = pdata; + params.private_data_len = pdata_len; + ep->qp = NULL; + + rc = ecore_iwarp_reject(dev->rdma_ctx, ¶ms); + + return rc; + +#else + + printf("iWARP reject_cr not implemented\n"); + return -EINVAL; + +#endif /* #if __FreeBSD_version >= 1102000 */ +} + +void +qlnxr_iw_qp_add_ref(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + qlnx_host_t *ha; + + ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter ibqp = %p\n", ibqp); + + atomic_inc(&qp->refcnt); + + QL_DPRINT12(ha, "exit \n"); + return; +} + +void +qlnxr_iw_qp_rem_ref(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + qlnx_host_t *ha; + + ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter ibqp = %p qp = %p\n", ibqp, qp); + + if (atomic_dec_and_test(&qp->refcnt)) { + qlnxr_idr_remove(qp->dev, qp->qp_id); + kfree(qp); + } + + QL_DPRINT12(ha, "exit \n"); + return; +} + +struct ib_qp * +qlnxr_iw_get_qp(struct ib_device *ibdev, int qpn) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibdev); + struct ib_qp *qp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter dev = %p ibdev = %p qpn = %d\n", dev, ibdev, qpn); + + qp = idr_find(&dev->qpidr, qpn); + + QL_DPRINT12(ha, "exit qp = %p\n", qp); + + return (qp); +} diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h new file mode 100644 index 000000000000..c202f4457b85 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +#ifndef __QLNXR_VERBS_H__ +#define __QLNXR_VERBS_H__ + +extern int qlnxr_iw_query_gid(struct ib_device *, + uint8_t port, + int index, + union ib_gid *gid); + +extern int qlnxr_query_gid(struct ib_device *, + u8 port, + int index, + union ib_gid *gid); + +extern struct ib_srq *qlnxr_create_srq(struct ib_pd *, + struct ib_srq_init_attr *, + struct ib_udata *); + +extern int qlnxr_destroy_srq(struct ib_srq *); + + +extern int qlnxr_modify_srq(struct ib_srq *, + struct ib_srq_attr *, + enum ib_srq_attr_mask, + struct ib_udata *); + +extern int qlnxr_query_srq(struct ib_srq *, + struct ib_srq_attr *); + +extern int qlnxr_post_srq_recv(struct ib_srq *, + struct ib_recv_wr *, + struct ib_recv_wr **bad_recv_wr); + +#if __FreeBSD_version < 1102000 +extern int qlnxr_query_device(struct ib_device *, struct ib_device_attr *); +#else +extern int qlnxr_query_device(struct ib_device *, struct ib_device_attr *, + struct ib_udata *); +extern int qlnxr_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +#endif + +extern int qlnxr_query_port(struct ib_device *, + u8 port, + struct ib_port_attr *props); + +extern int qlnxr_modify_port(struct ib_device *, + u8 port, + int mask, + struct ib_port_modify *props); + +extern enum rdma_link_layer qlnxr_link_layer(struct ib_device *device, + uint8_t port_num); + +struct ib_pd *qlnxr_alloc_pd(struct ib_device *, + struct ib_ucontext *, + struct ib_udata *); + +extern int qlnxr_dealloc_pd(struct ib_pd *pd); + +#if __FreeBSD_version >= 1102000 +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#else +#if __FreeBSD_version >= 1100000 +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#else +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + int cqe, + int comp_vector, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#endif +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_destroy_cq(struct ib_cq *); + +extern int qlnxr_resize_cq(struct ib_cq *, + int cqe, + struct ib_udata *); + +extern int qlnxr_poll_cq(struct ib_cq *, + int num_entries, + struct ib_wc *wc); + + +extern struct ib_qp *qlnxr_create_qp(struct ib_pd *, + struct ib_qp_init_attr *attrs, + struct ib_udata *); + +extern int qlnxr_modify_qp(struct ib_qp *, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata); + +extern int qlnxr_query_qp(struct ib_qp *, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *); + +extern int qlnxr_destroy_qp(struct ib_qp *); + +extern int qlnxr_query_pkey(struct ib_device *, + u8 port, + u16 index, + u16 *pkey); + +#if __FreeBSD_version >= 1102000 +extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, + struct ib_ah_attr *attr, struct ib_udata *udata); +#else +extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, + struct ib_ah_attr *attr); +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_destroy_ah(struct ib_ah *ibah); + +extern int qlnxr_query_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr); + +extern int qlnxr_modify_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr); + +#if __FreeBSD_version >= 1102000 +extern int qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *mad_hdr, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index); +#else +extern int qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_post_send(struct ib_qp *, + struct ib_send_wr *, + struct ib_send_wr **bad_wr); + +extern int qlnxr_post_recv(struct ib_qp *, + struct ib_recv_wr *, + struct ib_recv_wr **bad_wr); + +extern int qlnxr_arm_cq(struct ib_cq *, + enum ib_cq_notify_flags flags); + +extern struct ib_mr *qlnxr_get_dma_mr(struct ib_pd *, + int acc); + +#if __FreeBSD_version < 1102000 +extern struct ib_mr *qlnxr_reg_kernel_mr(struct ib_pd *, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start); +#endif /* #if __FreeBSD_version < 1102000 */ + +extern int qlnxr_dereg_mr(struct ib_mr *); + +#if __FreeBSD_version >= 1102000 +extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *, + u64 start, + u64 length, + u64 virt, + int acc, + struct ib_udata *); +#else +extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *, + u64 start, + u64 length, + u64 virt, + int acc, + struct ib_udata *, + int mr_id); +#endif /* #if __FreeBSD_version >= 1102000 */ + +#if __FreeBSD_version >= 1102000 + +extern struct ib_mr *qlnxr_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg); +extern int qlnxr_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); +#else + +extern struct ib_mr *qlnxr_alloc_frmr(struct ib_pd *pd, + int max_page_list_len); + + +extern struct ib_fast_reg_page_list *qlnxr_alloc_frmr_page_list( + struct ib_device *ibdev, + int page_list_len); + +extern void qlnxr_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); + +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern struct ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); + +extern int qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx); + + +extern int qlnxr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); + +extern int qlnxr_iw_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + +extern int qlnxr_iw_create_listen(struct iw_cm_id *cm_id, int backlog); + +void qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id); + +extern int qlnxr_iw_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + +extern int qlnxr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); + +extern void qlnxr_iw_qp_add_ref(struct ib_qp *qp); + +extern void qlnxr_iw_qp_rem_ref(struct ib_qp *qp); + +extern struct ib_qp *qlnxr_iw_get_qp(struct ib_device *dev, int qpn); + +#endif /* #ifndef __QLNXR_VERBS_H__ */ + diff --git a/sys/dev/sound/usb/uaudio.c b/sys/dev/sound/usb/uaudio.c index df1d131f874e..ba2fff153a91 100644 --- a/sys/dev/sound/usb/uaudio.c +++ b/sys/dev/sound/usb/uaudio.c @@ -6285,3 +6285,4 @@ MODULE_DEPEND(uaudio, usb, 1, 1, 1); MODULE_DEPEND(uaudio, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(uaudio, 1); USB_PNP_HOST_INFO(uaudio_devs); +USB_PNP_HOST_INFO(uaudio_vendor_midi); diff --git a/sys/dev/usb/wlan/if_rsu.c b/sys/dev/usb/wlan/if_rsu.c index 6a294a0c7111..0d2e531657cc 100644 --- a/sys/dev/usb/wlan/if_rsu.c +++ b/sys/dev/usb/wlan/if_rsu.c @@ -2755,7 +2755,7 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct r92s_tx_desc *txd; - uint8_t rate, ridx, type, cipher; + uint8_t rate, ridx, type, cipher, qos; int prio = 0; uint8_t which; int hasqos; @@ -2804,12 +2804,14 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, prio = M_WME_GETAC(m0); which = rsu_wme_ac_xfer_map[prio]; hasqos = 1; + qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0]; } else { /* Non-QoS TID */ /* XXX TODO: tid=0 for non-qos TID? */ which = rsu_wme_ac_xfer_map[WME_AC_BE]; hasqos = 0; prio = 0; + qos = 0; } qid = rsu_ac2qid[prio]; @@ -2868,6 +2870,12 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, if (ismcast) txd->txdw2 |= htole32(R92S_TXDW2_BMCAST); + if (!ismcast && (!qos || (qos & IEEE80211_QOS_ACKPOLICY) != + IEEE80211_QOS_ACKPOLICY_NOACK)) { + txd->txdw2 |= htole32(R92S_TXDW2_RTY_LMT_ENA); + txd->txdw2 |= htole32(SM(R92S_TXDW2_RTY_LMT, tp->maxretry)); + } + /* Force mgmt / mcast / ucast rate if needed. */ if (rate != 0) { /* Data rate fallback limit (max). */ diff --git a/sys/dev/usb/wlan/if_rsureg.h b/sys/dev/usb/wlan/if_rsureg.h index b1cd36ea3f0f..a6ab170be7ce 100644 --- a/sys/dev/usb/wlan/if_rsureg.h +++ b/sys/dev/usb/wlan/if_rsureg.h @@ -688,6 +688,9 @@ struct r92s_tx_desc { #define R92S_TXDW1_HWPC 0x80000000 uint32_t txdw2; +#define R92S_TXDW2_RTY_LMT_M 0x0000003f +#define R92S_TXDW2_RTY_LMT_S 0 +#define R92S_TXDW2_RTY_LMT_ENA 0x00000040 #define R92S_TXDW2_BMCAST 0x00000080 #define R92S_TXDW2_AGGEN 0x20000000 #define R92S_TXDW2_BK 0x40000000 diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index d92fdcf3368e..658a3e65f869 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -2029,7 +2029,8 @@ run_read_eeprom(struct run_softc *sc) static struct ieee80211_node * run_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { - return malloc(sizeof (struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO); + return malloc(sizeof (struct run_node), M_80211_NODE, + M_NOWAIT | M_ZERO); } static int diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index 38b61d3a7e8a..456d935abdef 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -398,7 +398,7 @@ vmxnet3_attach_pre(if_ctx_t ctx) /* * Configure the softc context to attempt to configure the interrupt * mode now indicated by intr_config. iflib will follow the usual - * fallback path MSIX -> MSI -> LEGACY, starting at the configured + * fallback path MSI-X -> MSI -> LEGACY, starting at the configured * starting mode. */ switch (intr_config & 0x03) { @@ -620,19 +620,18 @@ static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; - int rid; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { - rid = PCIR_BAR(0); - bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0); + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->vmx_res0), sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { - rid = PCIR_BAR(1); - bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1); + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } } @@ -677,14 +676,16 @@ vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) scctx = sc->vmx_scctx; /* - * There is either one interrupt, or there is one interrupt per - * receive queue. If there is one interrupt, then all interrupt - * indexes are zero. If there is one interrupt per receive queue, - * the transmit queue interrupt indexes are assigned the receive - * queue interrupt indexesin round-robin fashion. - * - * The event interrupt is always the last interrupt index. + * There is always one interrupt per receive queue, assigned + * starting with the first interrupt. When there is only one + * interrupt available, the event interrupt shares the receive queue + * interrupt, otherwise it uses the interrupt following the last + * receive queue interrupt. Transmit queues are not assigned + * interrupts, so they are given indexes beyond the indexes that + * correspond to the real interrupts. */ + + /* The event interrupt is always the last vector. */ sc->vmx_event_intr_idx = scctx->isc_vectors - 1; intr_idx = 0; @@ -1074,14 +1075,14 @@ vmxnet3_init_shared_data(struct vmxnet3_softc *sc) ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared - * config data, even though we don't actually allocate MSIX + * config data, even though we don't actually allocate interrupt * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured * here. */ ds->nintr = (scctx->isc_vectors == 1) ? - 1 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); + 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; diff --git a/sys/dev/wtap/if_wtap.c b/sys/dev/wtap/if_wtap.c index 7316263143c8..fe0a610c6058 100644 --- a/sys/dev/wtap/if_wtap.c +++ b/sys/dev/wtap/if_wtap.c @@ -373,7 +373,7 @@ wtap_vap_delete(struct ieee80211vap *vap) destroy_dev(avp->av_dev); callout_stop(&avp->av_swba); ieee80211_vap_detach(vap); - free((struct wtap_vap*) vap, M_80211_VAP); + free(avp, M_80211_VAP); } static void @@ -602,6 +602,8 @@ wtap_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) ni = malloc(sizeof(struct ieee80211_node), M_80211_NODE, M_NOWAIT|M_ZERO); + if (ni == NULL) + return (NULL); ni->ni_txrate = 130; return ni; diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index f352b9eb5d42..0536fd43c621 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -220,10 +220,14 @@ device ppi # Parallel port interface device device puc # Multi I/O cards and multi-channel UARTs +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel PRO/1000 Gigabit Ethernet Family +device vmx # VMware VMXNET3 Ethernet + # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel PRO/1000 Gigabit Ethernet Family device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') @@ -362,9 +366,6 @@ device hyperv # HyperV drivers options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver -# VMware support -device vmx # VMware VMXNET3 Ethernet - # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index d7324f977e4b..c46dc81ea6a5 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -2005,13 +2005,15 @@ physmap_done: Maxmem = atop(physmap[physmap_idx + 1]); /* - * By default enable the memory test on real hardware, and disable - * it if we appear to be running in a VM. This avoids touching all - * pages unnecessarily, which doesn't matter on real hardware but is - * bad for shared VM hosts. Use a general name so that - * one could eventually do more with the code than just disable it. + * The boot memory test is disabled by default, as it takes a + * significant amount of time on large-memory systems, and is + * unfriendly to virtual machines as it unnecessarily touches all + * pages. + * + * A general name is used as the code may be extended to support + * additional tests beyond the current "page present" test. */ - memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; + memtest = 0; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && diff --git a/sys/i386/i386/minidump_machdep_base.c b/sys/i386/i386/minidump_machdep_base.c index c2a3852edc1e..17b197f26d2b 100644 --- a/sys/i386/i386/minidump_machdep_base.c +++ b/sys/i386/i386/minidump_machdep_base.c @@ -52,9 +52,6 @@ CTASSERT(sizeof(struct kerneldumpheader) == 512); #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) -extern uint32_t *vm_page_dump; -extern int vm_page_dump_size; - static struct kerneldumpheader kdh; /* Handle chunked writes. */ @@ -64,7 +61,6 @@ static uint64_t counter, progress; CTASSERT(sizeof(*vm_page_dump) == 4); - static int is_dumpable(vm_paddr_t pa) { diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 9339590be7ec..b929106f6132 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -847,7 +847,8 @@ mb_free_ext(struct mbuf *m) */ if (m->m_flags & M_NOFREE) { freembuf = 0; - KASSERT(m->m_ext.ext_type == EXT_EXTREF, + KASSERT(m->m_ext.ext_type == EXT_EXTREF || + m->m_ext.ext_type == EXT_RXRING, ("%s: no-free mbuf %p has wrong type", __func__, m)); } else freembuf = 1; @@ -891,6 +892,10 @@ mb_free_ext(struct mbuf *m) ("%s: ext_free not set", __func__)); m->m_ext.ext_free(m); break; + case EXT_RXRING: + KASSERT(m->m_ext.ext_free == NULL, + ("%s: ext_free is set", __func__)); + break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh index c794a5e87fbc..7d18fcabddf7 100644 --- a/sys/kern/makesyscalls.sh +++ b/sys/kern/makesyscalls.sh @@ -526,8 +526,7 @@ sed -e ' if (!flag("NOPROTO") && !flag("NODEF")) { if (funcname == "nosys" || funcname == "lkmnosys" || funcname == "sysarch" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^xenix/ || - funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^cloudabi/) { printf("%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } else { @@ -546,8 +545,7 @@ sed -e ' } else { if (funcname == "nosys" || funcname == "sysarch" || funcname == "lkmnosys" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^xenix/ || - funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^cloudabi/) { printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 } else { diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index 328111337c93..ea364a2a1dfa 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -259,6 +259,17 @@ bioq_disksort(struct bio_queue_head *head, struct bio *bp) return; } + /* + * We should only sort requests of types that have concept of offset. + * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree + * of ordering even if strict ordering is not requested explicitly. + */ + if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && + bp->bio_cmd != BIO_DELETE) { + bioq_insert_tail(head, bp); + return; + } + if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { bioq_insert_tail(head, bp); return; diff --git a/sys/kern/subr_kobj.c b/sys/kern/subr_kobj.c index 1a68c8d5aef7..8cf8d549dfb1 100644 --- a/sys/kern/subr_kobj.c +++ b/sys/kern/subr_kobj.c @@ -125,35 +125,40 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops) cls->ops = ops; } -void -kobj_class_compile(kobj_class_t cls) +static int +kobj_class_compile1(kobj_class_t cls, int mflags) { kobj_ops_t ops; KOBJ_ASSERT(MA_NOTOWNED); - /* - * Allocate space for the compiled ops table. - */ - ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT); - if (!ops) - panic("%s: out of memory", __func__); + ops = malloc(sizeof(struct kobj_ops), M_KOBJ, mflags); + if (ops == NULL) + return (ENOMEM); - KOBJ_LOCK(); - /* * We may have lost a race for kobj_class_compile here - check * to make sure someone else hasn't already compiled this * class. */ + KOBJ_LOCK(); if (cls->ops) { KOBJ_UNLOCK(); free(ops, M_KOBJ); - return; + return (0); } - kobj_class_compile_common(cls, ops); KOBJ_UNLOCK(); + return (0); +} + +void +kobj_class_compile(kobj_class_t cls) +{ + int error; + + error = kobj_class_compile1(cls, M_WAITOK); + KASSERT(error == 0, ("kobj_class_compile1 returned %d", error)); } void @@ -254,24 +259,6 @@ kobj_class_free(kobj_class_t cls) free(ops, M_KOBJ); } -kobj_t -kobj_create(kobj_class_t cls, - struct malloc_type *mtype, - int mflags) -{ - kobj_t obj; - - /* - * Allocate and initialise the new object. - */ - obj = malloc(cls->size, mtype, mflags | M_ZERO); - if (!obj) - return NULL; - kobj_init(obj, cls); - - return obj; -} - static void kobj_init_common(kobj_t obj, kobj_class_t cls) { @@ -280,30 +267,52 @@ kobj_init_common(kobj_t obj, kobj_class_t cls) cls->refs++; } -void -kobj_init(kobj_t obj, kobj_class_t cls) +static int +kobj_init1(kobj_t obj, kobj_class_t cls, int mflags) { - KOBJ_ASSERT(MA_NOTOWNED); - retry: - KOBJ_LOCK(); + int error; - /* - * Consider compiling the class' method table. - */ - if (!cls->ops) { + KOBJ_LOCK(); + while (cls->ops == NULL) { /* * kobj_class_compile doesn't want the lock held * because of the call to malloc - we drop the lock * and re-try. */ KOBJ_UNLOCK(); - kobj_class_compile(cls); - goto retry; + error = kobj_class_compile1(cls, mflags); + if (error != 0) + return (error); + KOBJ_LOCK(); } - kobj_init_common(obj, cls); - KOBJ_UNLOCK(); + return (0); +} + +kobj_t +kobj_create(kobj_class_t cls, struct malloc_type *mtype, int mflags) +{ + kobj_t obj; + + obj = malloc(cls->size, mtype, mflags | M_ZERO); + if (obj == NULL) + return (NULL); + if (kobj_init1(obj, cls, mflags) != 0) { + free(obj, mtype); + return (NULL); + } + return (obj); +} + +void +kobj_init(kobj_t obj, kobj_class_t cls) +{ + int error; + + error = kobj_init1(obj, cls, M_NOWAIT); + if (error != 0) + panic("kobj_init1 failed: error %d", error); } void diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index d570f0a50fbc..196d13491a98 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -972,7 +972,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) uarg[1] = p->namelen; /* u_int */ uarg[2] = (intptr_t) p->old; /* void * */ uarg[3] = (intptr_t) p->oldlenp; /* size_t * */ - uarg[4] = (intptr_t) p->new; /* void * */ + uarg[4] = (intptr_t) p->new; /* const void * */ uarg[5] = p->newlen; /* size_t */ *n_args = 6; break; @@ -4867,7 +4867,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) p = "userland size_t *"; break; case 4: - p = "userland void *"; + p = "userland const void *"; break; case 5: p = "size_t"; diff --git a/sys/mips/conf/OCTEON1 b/sys/mips/conf/OCTEON1 index 4a61198952b0..89bcf2cc7ff1 100644 --- a/sys/mips/conf/OCTEON1 +++ b/sys/mips/conf/OCTEON1 @@ -155,6 +155,8 @@ device octm # physical port, but may eventually provide support for DSA or similar instead. #device mv88e61xxphy # Marvell 88E61XX +device iflib + # PCI Ethernet NICs. device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PF PCIE Ethernet Family diff --git a/sys/mips/conf/std.XLP b/sys/mips/conf/std.XLP index 753c18b0c8e3..979b8ad5501d 100644 --- a/sys/mips/conf/std.XLP +++ b/sys/mips/conf/std.XLP @@ -75,6 +75,7 @@ device ether device xlpge #device re device msk +device iflib device em # Disks diff --git a/sys/modules/Makefile b/sys/modules/Makefile index e94995825a86..d34d260e9fa8 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -169,6 +169,7 @@ SUBDIR= \ if_tun \ if_vlan \ if_vxlan \ + iflib \ ${_iir} \ imgact_binmisc \ ${_intelspi} \ diff --git a/sys/modules/iflib/Makefile b/sys/modules/iflib/Makefile new file mode 100644 index 000000000000..c8311dd19e3c --- /dev/null +++ b/sys/modules/iflib/Makefile @@ -0,0 +1,13 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/net + +KMOD= iflib +SRCS= \ + iflib.c \ + iflib_clone.c \ + mp_ring.c +SRCS+= ifdi_if.c +SRCS+= device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h + +.include <bsd.kmod.mk> diff --git a/sys/modules/nvdimm/Makefile b/sys/modules/nvdimm/Makefile index 5381345a9486..77fa6453ecbc 100644 --- a/sys/modules/nvdimm/Makefile +++ b/sys/modules/nvdimm/Makefile @@ -4,6 +4,7 @@ KMOD= nvdimm SRCS= nvdimm.c \ + nvdimm_nfit.c \ nvdimm_spa.c SRCS+= acpi_if.h bus_if.h device_if.h diff --git a/sys/modules/qlnx/Makefile b/sys/modules/qlnx/Makefile index 0a846999d472..e5dbaa01ba76 100644 --- a/sys/modules/qlnx/Makefile +++ b/sys/modules/qlnx/Makefile @@ -37,5 +37,6 @@ SYSDIR?=${SRCTOP}/sys SUBDIR=qlnxe SUBDIR+=qlnxev +SUBDIR+=qlnxr .include <bsd.subdir.mk> diff --git a/sys/modules/qlnx/qlnxe/Makefile b/sys/modules/qlnx/qlnxe/Makefile index 9034b0eaa112..9bdd80ca153e 100644 --- a/sys/modules/qlnx/qlnxe/Makefile +++ b/sys/modules/qlnx/qlnxe/Makefile @@ -47,6 +47,17 @@ SRCS+=ecore_mng_tlv.c SRCS+=ecore_sriov.c SRCS+=ecore_vf.c +#roce/iwarp files. Compilation can be turned off roce/iwarp are not required. +# In other words if you don't need RDMA please comment out SRCS adds for +# ecore_rdma.c ecore_roce.c ecore_iwarp.c ecore_ooo.c ecore_ll2.c qlnx_rdma.c +SRCS+=ecore_rdma.c +SRCS+=ecore_roce.c +SRCS+=ecore_iwarp.c +SRCS+=ecore_ooo.c +SRCS+=ecore_ll2.c +SRCS+=qlnx_rdma.c + + SRCS+=qlnx_ioctl.c SRCS+=qlnx_os.c @@ -75,4 +86,13 @@ CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include CFLAGS += -DCONFIG_ECORE_SRIOV +# For roce/iwarp files. Compilation can be turned off if roce/iwarp are not required. +# In other words if you don't need RDMA please comment out the CFLAGS which define +# CONFIG_ECORE_LL2 CONFIG_ECORE_ROCE CONFIG_ECORE_IWARP QLNX_ENABLE_IWARP +CFLAGS += -DCONFIG_ECORE_LL2 +CFLAGS += -DCONFIG_ECORE_ROCE +CFLAGS += -DCONFIG_ECORE_IWARP +CFLAGS += -DCONFIG_ECORE_RDMA +CFLAGS += -DQLNX_ENABLE_IWARP + CWARNFLAGS+= -Wno-cast-qual diff --git a/sys/modules/qlnx/qlnxr/Makefile b/sys/modules/qlnx/qlnxr/Makefile new file mode 100644 index 000000000000..0f7486cbe155 --- /dev/null +++ b/sys/modules/qlnx/qlnxr/Makefile @@ -0,0 +1,87 @@ +#/* +# * Copyright (c) 2017-2018 Cavium, Inc. +# * All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# * POSSIBILITY OF SUCH DAMAGE. +# */ +#/* +# * File : Makefile +# * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131. +# */ +# +# $FreeBSD$ +# + +#.PATH: ${.CURDIR} +#OFEDDIR= /usr/src/sys +#ETHDRVR=${.CURDIR}/../qlnxe + +.PATH: ${SRCTOP}/sys/dev/qlnx/qlnxr +OFEDDIR=${SRCTOP}/sys +ETHDRVR=${SRCTOP}/sys/dev/qlnx/qlnxe + +KMOD= qlnxr +SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \ + opt_inet.h opt_inet6.h \ + qlnxr_os.c\ + qlnxr_cm.c\ + qlnxr_verbs.c + +.include <bsd.kmod.mk> + +CFLAGS+= -I${.CURDIR} +CFLAGS+= -I${ETHDRVR} +CFLAGS+= -I${OFEDDIR}/ofed/include +CFLAGS+= -I${OFEDDIR}/ofed/include/uapi +CFLAGS+= -I${OFEDDIR}/compat/linuxkpi/common/include + +CFLAGS+= -DLINUX_TYPES_DEFINED +CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM +CFLAGS+= -DINET6 -DINET +#CFLAGS+= -DDEFINE_NO_IP_BASED_GIDS + +CWARNEXTRA += -Wno-cast-qual +CWARNEXTRA += -Wno-unused-function +.if ${COMPILER_TYPE} == "clang" +CWARNEXTRA += -Wno-gnu-variable-sized-type-not-at-end +.endif +CWARNEXTRA += -Wno-missing-prototypes +CWARNEXTRA += ${NO_WCONSTANT_CONVERSION} +CWARNEXTRA += -Wno-format + +CWARNEXTRA += ${NO_WSHIFT_COUNT_OVERFLOW} +CWARNEXTRA += -Wno-empty-body + +CFLAGS += -DQLNX_DEBUG +CFLAGS += -DECORE_PACKAGE +CFLAGS += -DCONFIG_ECORE_L2 +CFLAGS += -DCONFIG_ECORE_LL2 +CFLAGS += -DCONFIG_ECORE_ROCE +CFLAGS += -DCONFIG_ECORE_IWARP +CFLAGS += -DCONFIG_ECORE_RDMA +CFLAGS += -DECORE_CONFIG_DIRECT_HWFN +CFLAGS += -g -fno-inline +CFLAGS += -DQLNX_RDMA + +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith + diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 578e0756498b..96dc046840d4 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1970,9 +1970,9 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) @@ -2230,9 +2230,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_IN(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_IN(V_inet6_pfil_head) #endif ) { BRIDGE_UNLOCK(sc); @@ -2270,9 +2270,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, BRIDGE_UNLOCK(sc); - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) @@ -2409,7 +2409,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) #ifdef INET6 # define OR_PFIL_HOOKED_INET6 \ - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_IN(V_inet6_pfil_head) #else # define OR_PFIL_HOOKED_INET6 #endif @@ -2427,7 +2427,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \ /* Filter on the physical interface. */ \ if (V_pfil_local_phys && \ - (PFIL_HOOKED(&V_inet_pfil_hook) \ + (PFIL_HOOKED_IN(V_inet_pfil_head) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ @@ -2517,9 +2517,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, } /* Filter on the bridge interface before broadcasting */ - if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) + if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) @@ -2564,9 +2564,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ - if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) + if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (used == 0) { @@ -3101,6 +3101,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) struct ip *ip; struct llc llc1; u_int16_t ether_type; + pfil_return_t rv; snap = 0; error = -1; /* Default error if not error == 0 */ @@ -3172,14 +3173,14 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) } /* Run the packet through pfil before stripping link headers */ - if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 && - dir == PFIL_OUT && ifp != NULL) { - - error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, 0, - NULL); - - if (*mp == NULL || error != 0) /* packet consumed by filter */ - return (error); + if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 && + dir == PFIL_OUT && ifp != NULL) { + switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) { + case PFIL_DROPPED: + return (EPERM); + case PFIL_CONSUMED: + return (0); + } } /* Strip off the Ethernet header and keep a copy. */ @@ -3217,6 +3218,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) /* * Run the packet through pfil */ + rv = PFIL_PASS; switch (ether_type) { case ETHERTYPE_IP: /* @@ -3226,25 +3228,19 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * Keep the order: * in_if -> bridge_if -> out_if */ - if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_member && ifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_member && ifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; /* check if we need to fragment the packet */ @@ -3280,35 +3276,33 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) break; #ifdef INET6 case ETHERTYPE_IPV6: - if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_member && ifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_member && ifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, - dir, 0, NULL); + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) + break; break; #endif + } + + switch (rv) { + case PFIL_CONSUMED: + return (0); + case PFIL_DROPPED: + return (EPERM); default: - error = 0; break; } - if (*mp == NULL) - return (error); - if (error != 0) - goto bad; - error = -1; /* diff --git a/sys/net/if_enc.c b/sys/net/if_enc.c index 01e416535dbd..7bb196b672c1 100644 --- a/sys/net/if_enc.c +++ b/sys/net/if_enc.c @@ -285,24 +285,24 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data, switch (hhook_id) { #ifdef INET case AF_INET: - ph = &V_inet_pfil_hook; + ph = V_inet_pfil_head; break; #endif #ifdef INET6 case AF_INET6: - ph = &V_inet6_pfil_hook; + ph = V_inet6_pfil_head; break; #endif default: ph = NULL; } - if (ph == NULL || !PFIL_HOOKED(ph)) + if (ph == NULL || (pdir == PFIL_OUT && !PFIL_HOOKED_OUT(ph)) || + (pdir == PFIL_IN && !PFIL_HOOKED_IN(ph))) return (0); /* Make a packet looks like it was received on enc(4) */ rcvif = (*ctx->mp)->m_pkthdr.rcvif; (*ctx->mp)->m_pkthdr.rcvif = ifp; - if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, 0, ctx->inp) != 0 || - *ctx->mp == NULL) { + if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != PFIL_PASS) { *ctx->mp = NULL; /* consumed by filter */ return (EACCES); } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 628e32b17642..ff7705b2e21d 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -90,7 +90,7 @@ CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif -VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ +VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); @@ -457,7 +457,6 @@ ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp) int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { - int error; uint8_t pcp; pcp = ifp->if_pcp; @@ -465,15 +464,14 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) !ether_set_pcp(&m, ifp, pcp)) return (0); - if (PFIL_HOOKED(&V_link_pfil_hook)) { - error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, - PFIL_OUT, 0, NULL); - if (error != 0) + if (PFIL_HOOKED_OUT(V_link_pfil_head)) + switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT, + NULL)) { + case PFIL_DROPPED: return (EACCES); - - if (m == NULL) + case PFIL_CONSUMED: return (0); - } + } #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) @@ -737,14 +735,14 @@ SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { - int i; + struct pfil_head_args args; + + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_ETHERNET; + args.pa_headname = PFIL_ETHER_NAME; + V_link_pfil_head = pfil_head_register(&args); - /* Initialize packet filter hooks. */ - V_link_pfil_hook.ph_type = PFIL_TYPE_AF; - V_link_pfil_hook.ph_af = AF_LINK; - if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil link hook, " - "error %d\n", __func__, i); #ifdef VIMAGE netisr_register_vnet(ðer_nh); #endif @@ -756,11 +754,8 @@ VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, static void vnet_ether_pfil_destroy(__unused void *arg) { - int i; - if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil link hook, " - "error %d\n", __func__, i); + pfil_head_unregister(V_link_pfil_head); } VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY, vnet_ether_pfil_destroy, NULL); @@ -818,10 +813,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ - if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) { - i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0, - NULL); - + if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) { + i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL); if (i != 0 || m == NULL) return; } diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index a1e7908f55ed..3bea2e95d69b 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -514,10 +514,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT; -#else - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; #endif + ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; /* * Attach as an ordinary ethernet device, children will be attached diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 42241e74fb81..22cac21b2136 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -95,8 +95,9 @@ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL -VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ -#define V_link_pfil_hook VNET(link_pfil_hook) +VNET_DECLARE(struct pfil_head *, link_pfil_head); +#define V_link_pfil_head VNET(link_pfil_head) +#define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 diff --git a/sys/net/iflib.c b/sys/net/iflib.c index f02b94de0ece..0a566753e9d6 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -4409,8 +4409,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", - scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ @@ -4472,9 +4472,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* - ** Now setup MSI or MSI/X, should - ** return us the number of supported - ** vectors. (Will be 1 for MSI) + ** Now set up MSI or MSI-X, should return us the number of supported + ** vectors (will be 1 for a legacy interrupt and MSI). */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; @@ -4671,8 +4670,8 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", - scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ @@ -4928,15 +4927,15 @@ static void iflib_free_intr_mem(if_ctx_t ctx) { - if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { - pci_release_msi(ctx->ifc_dev); - } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } + if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { + pci_release_msi(ctx->ifc_dev); + } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, - ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); + rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } @@ -5763,11 +5762,13 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { + if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) - bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); + bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, + rman_get_rid(irq->ii_res), irq->ii_res); } static int @@ -5961,7 +5962,9 @@ iflib_msix_init(if_ctx_t ctx) iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; - device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); + if (bootverbose) + device_printf(dev, "msix_init qsets capped at %d\n", + imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; @@ -5969,30 +5972,27 @@ iflib_msix_init(if_ctx_t ctx) if (scctx->isc_disable_msix) goto msi; + /* First try MSI-X */ + if ((msgs = pci_msix_count(dev)) == 0) { + if (bootverbose) + device_printf(dev, "MSI-X not supported or disabled\n"); + goto msi; + } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack - * allows shoddy garbage to use msix in this framework. + * allows shoddy garbage to use MSI-X in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { - /* May not be enabled */ - device_printf(dev, "Unable to map MSIX table \n"); + device_printf(dev, "Unable to map MSI-X table\n"); goto msi; } } - /* First try MSI/X */ - if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ - device_printf(dev, "System has MSIX disabled \n"); - bus_release_resource(dev, SYS_RES_MEMORY, - bar, ctx->ifc_msix_mem); - ctx->ifc_msix_mem = NULL; - goto msi; - } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); @@ -6005,8 +6005,10 @@ iflib_msix_init(if_ctx_t ctx) queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); - device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", - CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); + if (bootverbose) + device_printf(dev, + "intr CPUs: %d queue msgs: %d admincnt: %d\n", + CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) @@ -6042,11 +6044,13 @@ iflib_msix_init(if_ctx_t ctx) rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); + device_printf(dev, "Using %d rx queues %d tx queues\n", + rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { - device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); + device_printf(dev, "Using MSI-X interrupts with %d vectors\n", + vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; @@ -6055,7 +6059,8 @@ iflib_msix_init(if_ctx_t ctx) return (vectors); } else { device_printf(dev, - "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); + "failed to allocate %d MSI-X vectors, err: %d - using MSI\n", + vectors, err); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; @@ -6191,7 +6196,7 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, - "disable MSIX (default 0)"); + "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 098d369b07d6..bb38c748f840 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -540,7 +540,8 @@ enum { enum { /* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated - * from user-space allocated memory pools (e.g. hugepages). */ + * from user-space allocated memory pools (e.g. hugepages). + */ NETMAP_REQ_OPT_EXTMEM = 1, /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based @@ -551,8 +552,15 @@ enum { /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where * head, cur and tail pointers are not exchanged through the * struct netmap_ring header, but rather using an user-provided - * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */ + * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). + */ NETMAP_REQ_OPT_CSB, + + /* An extension to NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, which specifies + * if the TX and/or RX rings are synced in the context of the VM exit. + * This requires the 'ioeventfd' fields to be valid (cannot be < 0). + */ + NETMAP_REQ_OPT_SYNC_KLOOP_MODE, }; /* @@ -877,6 +885,12 @@ struct nmreq_opt_sync_kloop_eventfds { * their order must agree with the CSB arrays passed in the * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor * backed by an eventfd. + * + * If any of the 'ioeventfd' entries is < 0, the event loop uses + * the sleeping synchronization strategy (according to sleep_us), + * and keeps kern_need_kick always disabled. + * Each 'irqfd' can be < 0, and in that case the corresponding queue + * is never notified. */ struct { /* Notifier for the application --> kernel loop direction. */ @@ -886,6 +900,13 @@ struct nmreq_opt_sync_kloop_eventfds { } eventfds[0]; }; +struct nmreq_opt_sync_kloop_mode { + struct nmreq_option nro_opt; /* common header */ +#define NM_OPT_SYNC_KLOOP_DIRECT_TX (1 << 0) +#define NM_OPT_SYNC_KLOOP_DIRECT_RX (1 << 1) + uint32_t mode; +}; + struct nmreq_opt_extmem { struct nmreq_option nro_opt; /* common header */ uint64_t nro_usrptr; /* (in) ptr to usr memory */ diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 19b930a32e35..b46992148bd0 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -4,6 +4,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * + * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org> * Copyright (c) 1996 Matthew R. Green * All rights reserved. * @@ -32,443 +33,631 @@ */ #include <sys/param.h> +#include <sys/conf.h> #include <sys/kernel.h> +#include <sys/epoch.h> #include <sys/errno.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/systm.h> -#include <sys/condvar.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/ucred.h> +#include <sys/jail.h> #include <net/if.h> #include <net/if_var.h> #include <net/pfil.h> -static struct mtx pfil_global_lock; - -MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock", - MTX_DEF); - -static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *); -static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int); -static int pfil_chain_remove(pfil_chain_t *, void *, void *); -static int pfil_add_hook_priv(void *, void *, int, struct pfil_head *, bool); +static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks"); + +static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *); +static struct cdevsw pfil_cdevsw = { + .d_ioctl = pfil_ioctl, + .d_name = PFILDEV, + .d_version = D_VERSION, +}; +static struct cdev *pfil_dev; + +static struct mtx pfil_lock; +MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF); +#define PFIL_LOCK() mtx_lock(&pfil_lock) +#define PFIL_UNLOCK() mtx_unlock(&pfil_lock) +#define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED) + +#define PFIL_EPOCH net_epoch_preempt +#define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et)) +#define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et)) + +struct pfil_hook { + pfil_func_t hook_func; + void *hook_ruleset; + int hook_flags; + int hook_links; + enum pfil_types hook_type; + const char *hook_modname; + const char *hook_rulname; + LIST_ENTRY(pfil_hook) hook_list; +}; + +struct pfil_link { + CK_STAILQ_ENTRY(pfil_link) link_chain; + pfil_func_t link_func; + void *link_ruleset; + int link_flags; + struct pfil_hook *link_hook; + struct epoch_context link_epoch_ctx; +}; + +typedef CK_STAILQ_HEAD(pfil_chain, pfil_link) pfil_chain_t; +struct pfil_head { + int head_nhooksin; + int head_nhooksout; + pfil_chain_t head_in; + pfil_chain_t head_out; + int head_flags; + enum pfil_types head_type; + LIST_ENTRY(pfil_head) head_list; + const char *head_name; +}; LIST_HEAD(pfilheadhead, pfil_head); -VNET_DEFINE(struct pfilheadhead, pfil_head_list); +VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) = + LIST_HEAD_INITIALIZER(pfil_head_list); #define V_pfil_head_list VNET(pfil_head_list) -VNET_DEFINE(struct rmlock, pfil_lock); - -#define PFIL_LOCK_INIT_REAL(l, t) \ - rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE) -#define PFIL_LOCK_DESTROY_REAL(l) \ - rm_destroy(l) -#define PFIL_LOCK_INIT(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \ - PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \ - (p)->ph_plock = &(p)->ph_lock; \ - } else \ - (p)->ph_plock = &V_pfil_lock; \ -} while (0) -#define PFIL_LOCK_DESTROY(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \ - PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \ -} while (0) - -#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t)) -#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t)) -#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock) -#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t)) -#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock) -#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock) - -#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock) -#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock) -/* - * pfil_run_hooks() runs the specified packet filter hook chain. - */ -int -pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp) +LIST_HEAD(pfilhookhead, pfil_hook); +VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) = + LIST_HEAD_INITIALIZER(pfil_hook_list); +#define V_pfil_hook_list VNET(pfil_hook_list) + +static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t ); +static void pfil_link_free(epoch_context_t); + +static __noinline int +pfil_fake_mbuf(pfil_func_t func, void *mem, struct ifnet *ifp, int flags, + void *ruleset, struct inpcb *inp) { - struct rm_priotracker rmpt; - struct packet_filter_hook *pfh; - struct mbuf *m = *mp; - int rv = 0; - - PFIL_RLOCK(ph, &rmpt); - KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); - for (pfh = pfil_chain_get(dir, ph); pfh != NULL; - pfh = TAILQ_NEXT(pfh, pfil_chain)) { - if (pfh->pfil_func_flags != NULL) { - rv = (*pfh->pfil_func_flags)(pfh->pfil_arg, &m, ifp, - dir, flags, inp); - if (rv != 0 || m == NULL) - break; - } - if (pfh->pfil_func != NULL) { - rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir, - inp); - if (rv != 0 || m == NULL) - break; - } + struct mbuf m, *mp; + pfil_return_t rv; + + (void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR); + m_extadd(&m, mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0, EXT_RXRING); + m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags); + mp = &m; + flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); + + rv = func(&mp, ifp, flags, ruleset, inp); + if (rv == PFIL_PASS && mp != &m) { + /* + * Firewalls that need pfil_fake_mbuf() most likely don't + * know to return PFIL_REALLOCED. + */ + rv = PFIL_REALLOCED; + *(struct mbuf **)mem = mp; } - PFIL_RUNLOCK(ph, &rmpt); - *mp = m; + return (rv); } -static struct packet_filter_hook * -pfil_chain_get(int dir, struct pfil_head *ph) +/* + * pfil_run_hooks() runs the specified packet filter hook chain. + */ +int +pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp, + int flags, struct inpcb *inp) { - - if (dir == PFIL_IN) - return (TAILQ_FIRST(&ph->ph_in)); - else if (dir == PFIL_OUT) - return (TAILQ_FIRST(&ph->ph_out)); + struct epoch_tracker et; + pfil_chain_t *pch; + struct pfil_link *link; + pfil_return_t rv, rvi; + + if (PFIL_DIR(flags) == PFIL_IN) + pch = &head->head_in; + else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT)) + pch = &head->head_out; else - return (NULL); + panic("%s: bogus flags %d", __func__, flags); + + rv = PFIL_PASS; + PFIL_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(link, pch, link_chain) { + if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR)) + rvi = pfil_fake_mbuf(link->link_func, p.mem, ifp, + flags, link->link_ruleset, inp); + else + rvi = (*link->link_func)(p, ifp, flags, + link->link_ruleset, inp); + if (rvi == PFIL_DROPPED || rvi == PFIL_CONSUMED) { + rv = rvi; + break; + } else if (rv == PFIL_REALLOCED) { + flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); + rv = rvi; + } + } + PFIL_EPOCH_EXIT(et); + return (rvi); } /* - * pfil_try_rlock() acquires rm reader lock for specified head - * if this is immediately possible. + * pfil_head_register() registers a pfil_head with the packet filter hook + * mechanism. */ -int -pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker) +pfil_head_t +pfil_head_register(struct pfil_head_args *pa) { + struct pfil_head *head, *list; + + MPASS(pa->pa_version == PFIL_VERSION); - return (PFIL_TRY_RLOCK(ph, tracker)); + head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK); + + head->head_nhooksin = head->head_nhooksout = 0; + head->head_flags = pa->pa_flags; + head->head_type = pa->pa_type; + head->head_name = pa->pa_headname; + CK_STAILQ_INIT(&head->head_in); + CK_STAILQ_INIT(&head->head_out); + + PFIL_LOCK(); + LIST_FOREACH(list, &V_pfil_head_list, head_list) + if (strcmp(pa->pa_headname, list->head_name) == 0) { + printf("pfil: duplicate head \"%s\"\n", + pa->pa_headname); + } + LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list); + PFIL_UNLOCK(); + + return (head); } /* - * pfil_rlock() acquires rm reader lock for specified head. + * pfil_head_unregister() removes a pfil_head from the packet filter hook + * mechanism. The producer of the hook promises that all outstanding + * invocations of the hook have completed before it unregisters the hook. */ void -pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker) +pfil_head_unregister(pfil_head_t ph) { + struct pfil_link *link, *next; + + PFIL_LOCK(); + LIST_REMOVE(ph, head_list); - PFIL_RLOCK(ph, tracker); + CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) { + link->link_hook->hook_links--; + free(link, M_PFIL); + } + CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) { + link->link_hook->hook_links--; + free(link, M_PFIL); + } + PFIL_UNLOCK(); } -/* - * pfil_runlock() releases reader lock for specified head. - */ -void -pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker) +pfil_hook_t +pfil_add_hook(struct pfil_hook_args *pa) { + struct pfil_hook *hook, *list; + + MPASS(pa->pa_version == PFIL_VERSION); + + hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO); + hook->hook_func = pa->pa_func; + hook->hook_ruleset = pa->pa_ruleset; + hook->hook_flags = pa->pa_flags; + hook->hook_type = pa->pa_type; + hook->hook_modname = pa->pa_modname; + hook->hook_rulname = pa->pa_rulname; + + PFIL_LOCK(); + LIST_FOREACH(list, &V_pfil_hook_list, hook_list) + if (strcmp(pa->pa_modname, list->hook_modname) == 0 && + strcmp(pa->pa_rulname, list->hook_rulname) == 0) { + printf("pfil: duplicate hook \"%s:%s\"\n", + pa->pa_modname, pa->pa_rulname); + } + LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list); + PFIL_UNLOCK(); - PFIL_RUNLOCK(ph, tracker); + return (hook); } -/* - * pfil_wlock() acquires writer lock for specified head. - */ -void -pfil_wlock(struct pfil_head *ph) +static int +pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook) { + struct pfil_link *in, *out; + + PFIL_LOCK_ASSERT(); + + if (pa->pa_flags & PFIL_IN) { + in = pfil_link_remove(&head->head_in, hook); + if (in != NULL) { + head->head_nhooksin--; + hook->hook_links--; + } + } else + in = NULL; + if (pa->pa_flags & PFIL_OUT) { + out = pfil_link_remove(&head->head_out, hook); + if (out != NULL) { + head->head_nhooksout--; + hook->hook_links--; + } + } else + out = NULL; + PFIL_UNLOCK(); + + if (in != NULL) + epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free); + if (out != NULL) + epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free); - PFIL_WLOCK(ph); + if (in == NULL && out == NULL) + return (ENOENT); + else + return (0); } -/* - * pfil_wunlock() releases writer lock for specified head. - */ -void -pfil_wunlock(struct pfil_head *ph) +int +pfil_link(struct pfil_link_args *pa) { + struct pfil_link *in, *out, *link; + struct pfil_head *head; + struct pfil_hook *hook; + int error; + + MPASS(pa->pa_version == PFIL_VERSION); + + if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN) + in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO); + else + in = NULL; + if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT) + out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO); + else + out = NULL; + + PFIL_LOCK(); + if (pa->pa_flags & PFIL_HEADPTR) + head = pa->pa_head; + else + LIST_FOREACH(head, &V_pfil_head_list, head_list) + if (strcmp(pa->pa_headname, head->head_name) == 0) + break; + if (pa->pa_flags & PFIL_HOOKPTR) + hook = pa->pa_hook; + else + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) + if (strcmp(pa->pa_modname, hook->hook_modname) == 0 && + strcmp(pa->pa_rulname, hook->hook_rulname) == 0) + break; + if (head == NULL || hook == NULL) { + error = ENOENT; + goto fail; + } + + if (pa->pa_flags & PFIL_UNLINK) + return (pfil_unlink(pa, head, hook)); + + if (head->head_type != hook->hook_type || + ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) { + error = EINVAL; + goto fail; + } + + if (pa->pa_flags & PFIL_IN) + CK_STAILQ_FOREACH(link, &head->head_in, link_chain) + if (link->link_hook == hook) { + error = EEXIST; + goto fail; + } + if (pa->pa_flags & PFIL_OUT) + CK_STAILQ_FOREACH(link, &head->head_out, link_chain) + if (link->link_hook == hook) { + error = EEXIST; + goto fail; + } + + if (pa->pa_flags & PFIL_IN) { + in->link_hook = hook; + in->link_func = hook->hook_func; + in->link_flags = hook->hook_flags; + in->link_ruleset = hook->hook_ruleset; + if (pa->pa_flags & PFIL_APPEND) + CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain); + else + CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain); + hook->hook_links++; + head->head_nhooksin++; + } + if (pa->pa_flags & PFIL_OUT) { + out->link_hook = hook; + out->link_func = hook->hook_func; + out->link_flags = hook->hook_flags; + out->link_ruleset = hook->hook_ruleset; + if (pa->pa_flags & PFIL_APPEND) + CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain); + else + CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain); + hook->hook_links++; + head->head_nhooksout++; + } + PFIL_UNLOCK(); + + return (0); - PFIL_WUNLOCK(ph); +fail: + PFIL_UNLOCK(); + free(in, M_PFIL); + free(out, M_PFIL); + return (error); } -/* - * pfil_wowned() returns a non-zero value if the current thread owns - * an exclusive lock. - */ -int -pfil_wowned(struct pfil_head *ph) +static void +pfil_link_free(epoch_context_t ctx) { + struct pfil_link *link; - return (PFIL_WOWNED(ph)); + link = __containerof(ctx, struct pfil_link, link_epoch_ctx); + free(link, M_PFIL); } /* - * pfil_head_register() registers a pfil_head with the packet filter hook - * mechanism. + * pfil_remove_hook removes a filter from all filtering points. */ -int -pfil_head_register(struct pfil_head *ph) +void +pfil_remove_hook(pfil_hook_t hook) { - struct pfil_head *lph; - - PFIL_HEADLIST_LOCK(); - LIST_FOREACH(lph, &V_pfil_head_list, ph_list) { - if (ph->ph_type == lph->ph_type && - ph->ph_un.phu_val == lph->ph_un.phu_val) { - PFIL_HEADLIST_UNLOCK(); - return (EEXIST); + struct pfil_head *head; + struct pfil_link *in, *out; + + PFIL_LOCK(); + LIST_FOREACH(head, &V_pfil_head_list, head_list) { +retry: + in = pfil_link_remove(&head->head_in, hook); + if (in != NULL) { + head->head_nhooksin--; + hook->hook_links--; + epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, + pfil_link_free); + } + out = pfil_link_remove(&head->head_out, hook); + if (out != NULL) { + head->head_nhooksout--; + hook->hook_links--; + epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, + pfil_link_free); } + if (in != NULL || out != NULL) + /* What if some stupid admin put same filter twice? */ + goto retry; } - PFIL_LOCK_INIT(ph); - ph->ph_nhooks = 0; - TAILQ_INIT(&ph->ph_in); - TAILQ_INIT(&ph->ph_out); - LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list); - PFIL_HEADLIST_UNLOCK(); - return (0); + LIST_REMOVE(hook, hook_list); + PFIL_UNLOCK(); + MPASS(hook->hook_links == 0); + free(hook, M_PFIL); } /* - * pfil_head_unregister() removes a pfil_head from the packet filter hook - * mechanism. The producer of the hook promises that all outstanding - * invocations of the hook have completed before it unregisters the hook. + * Internal: Remove a pfil hook from a hook chain. */ -int -pfil_head_unregister(struct pfil_head *ph) +static struct pfil_link * +pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook) { - struct packet_filter_hook *pfh, *pfnext; - - PFIL_HEADLIST_LOCK(); - LIST_REMOVE(ph, ph_list); - PFIL_HEADLIST_UNLOCK(); - TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext) - free(pfh, M_IFADDR); - TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext) - free(pfh, M_IFADDR); - PFIL_LOCK_DESTROY(ph); - return (0); -} + struct pfil_link *link; -/* - * pfil_head_get() returns the pfil_head for a given key/dlt. - */ -struct pfil_head * -pfil_head_get(int type, u_long val) -{ - struct pfil_head *ph; + PFIL_LOCK_ASSERT(); - PFIL_HEADLIST_LOCK(); - LIST_FOREACH(ph, &V_pfil_head_list, ph_list) - if (ph->ph_type == type && ph->ph_un.phu_val == val) - break; - PFIL_HEADLIST_UNLOCK(); - return (ph); + CK_STAILQ_FOREACH(link, chain, link_chain) + if (link->link_hook == hook) { + CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain); + return (link); + } + + return (NULL); } -/* - * pfil_add_hook_flags() adds a function to the packet filter hook. the - * flags are: - * PFIL_IN call me on incoming packets - * PFIL_OUT call me on outgoing packets - * PFIL_ALL call me on all of the above - * PFIL_WAITOK OK to call malloc with M_WAITOK. - */ -int -pfil_add_hook_flags(pfil_func_flags_t func, void *arg, int flags, - struct pfil_head *ph) +static void +pfil_init(const void *unused __unused) { - return (pfil_add_hook_priv(func, arg, flags, ph, true)); + struct make_dev_args args; + int error; + + make_dev_args_init(&args); + args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME; + args.mda_devsw = &pfil_cdevsw; + args.mda_uid = UID_ROOT; + args.mda_gid = GID_WHEEL; + args.mda_mode = 0600; + error = make_dev_s(&args, &pfil_dev, PFILDEV); + KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error)); } +/* + * Make sure the pfil bits are first before any possible subsystem which + * might piggyback on the SI_SUB_PROTO_PFIL. + */ +SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL); /* - * pfil_add_hook() adds a function to the packet filter hook. the - * flags are: - * PFIL_IN call me on incoming packets - * PFIL_OUT call me on outgoing packets - * PFIL_ALL call me on all of the above - * PFIL_WAITOK OK to call malloc with M_WAITOK. + * User control interface. */ -int -pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) +static int pfilioc_listheads(struct pfilioc_list *); +static int pfilioc_listhooks(struct pfilioc_list *); +static int pfilioc_link(struct pfilioc_link *); + +static int +pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) { - return (pfil_add_hook_priv(func, arg, flags, ph, false)); + int error; + + CURVNET_SET(TD_TO_VNET(td)); + error = 0; + switch (cmd) { + case PFILIOC_LISTHEADS: + error = pfilioc_listheads((struct pfilioc_list *)addr); + break; + case PFILIOC_LISTHOOKS: + error = pfilioc_listhooks((struct pfilioc_list *)addr); + break; + case PFILIOC_LINK: + error = pfilioc_link((struct pfilioc_link *)addr); + break; + default: + error = EINVAL; + break; + } + CURVNET_RESTORE(); + return (error); } static int -pfil_add_hook_priv(void *func, void *arg, int flags, - struct pfil_head *ph, bool hasflags) +pfilioc_listheads(struct pfilioc_list *req) { - struct packet_filter_hook *pfh1 = NULL; - struct packet_filter_hook *pfh2 = NULL; - int err; - - if (flags & PFIL_IN) { - pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), - M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT); - if (pfh1 == NULL) { - err = ENOMEM; - goto error; - } + struct pfil_head *head; + struct pfil_link *link; + struct pfilioc_head *iohead; + struct pfilioc_hook *iohook; + u_int nheads, nhooks, hd, hk; + int error; + + PFIL_LOCK(); +restart: + nheads = nhooks = 0; + LIST_FOREACH(head, &V_pfil_head_list, head_list) { + nheads++; + nhooks += head->head_nhooksin + head->head_nhooksout; } - if (flags & PFIL_OUT) { - pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), - M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT); - if (pfh2 == NULL) { - err = ENOMEM; - goto error; - } - } - PFIL_WLOCK(ph); - if (flags & PFIL_IN) { - pfh1->pfil_func_flags = hasflags ? func : NULL; - pfh1->pfil_func = hasflags ? NULL : func; - pfh1->pfil_arg = arg; - err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT); - if (err) - goto locked_error; - ph->ph_nhooks++; + PFIL_UNLOCK(); + + if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) { + req->pio_nheads = nheads; + req->pio_nhooks = nhooks; + return (0); } - if (flags & PFIL_OUT) { - pfh2->pfil_func_flags = hasflags ? func : NULL; - pfh2->pfil_func = hasflags ? NULL : func; - pfh2->pfil_arg = arg; - err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN); - if (err) { - if (flags & PFIL_IN) - pfil_chain_remove(&ph->ph_in, func, arg); - goto locked_error; + + iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK); + iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); + + hd = hk = 0; + PFIL_LOCK(); + LIST_FOREACH(head, &V_pfil_head_list, head_list) { + if (hd + 1 > nheads || + hk + head->head_nhooksin + head->head_nhooksout > nhooks) { + /* Configuration changed during malloc(). */ + free(iohead, M_TEMP); + free(iohook, M_TEMP); + goto restart; + } + strlcpy(iohead[hd].pio_name, head->head_name, + sizeof(iohead[0].pio_name)); + iohead[hd].pio_nhooksin = head->head_nhooksin; + iohead[hd].pio_nhooksout = head->head_nhooksout; + iohead[hd].pio_type = head->head_type; + CK_STAILQ_FOREACH(link, &head->head_in, link_chain) { + strlcpy(iohook[hk].pio_module, + link->link_hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, + link->link_hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + hk++; + } + CK_STAILQ_FOREACH(link, &head->head_out, link_chain) { + strlcpy(iohook[hk].pio_module, + link->link_hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, + link->link_hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + hk++; } - ph->ph_nhooks++; + hd++; } - PFIL_WUNLOCK(ph); - return (0); -locked_error: - PFIL_WUNLOCK(ph); -error: - if (pfh1 != NULL) - free(pfh1, M_IFADDR); - if (pfh2 != NULL) - free(pfh2, M_IFADDR); - return (err); -} + PFIL_UNLOCK(); -/* - * pfil_remove_hook_flags removes a specific function from the packet filter hook - * chain. - */ -int -pfil_remove_hook_flags(pfil_func_flags_t func, void *arg, int flags, - struct pfil_head *ph) -{ - return (pfil_remove_hook((pfil_func_t)func, arg, flags, ph)); -} + error = copyout(iohead, req->pio_heads, + sizeof(*iohead) * min(hd, req->pio_nheads)); + if (error == 0) + error = copyout(iohook, req->pio_hooks, + sizeof(*iohook) * min(req->pio_nhooks, hk)); -/* - * pfil_remove_hook removes a specific function from the packet filter hook - * chain. - */ -int -pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) -{ - int err = 0; + req->pio_nheads = hd; + req->pio_nhooks = hk; - PFIL_WLOCK(ph); - if (flags & PFIL_IN) { - err = pfil_chain_remove(&ph->ph_in, func, arg); - if (err == 0) - ph->ph_nhooks--; - } - if ((err == 0) && (flags & PFIL_OUT)) { - err = pfil_chain_remove(&ph->ph_out, func, arg); - if (err == 0) - ph->ph_nhooks--; - } - PFIL_WUNLOCK(ph); - return (err); -} + free(iohead, M_TEMP); + free(iohook, M_TEMP); -/* - * Internal: Add a new pfil hook into a hook chain. - */ -static int -pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags) -{ - struct packet_filter_hook *pfh; - - /* - * First make sure the hook is not already there. - */ - TAILQ_FOREACH(pfh, chain, pfil_chain) - if (((pfh->pfil_func != NULL && pfh->pfil_func == pfh1->pfil_func) || - (pfh->pfil_func_flags != NULL && - pfh->pfil_func_flags == pfh1->pfil_func_flags)) && - pfh->pfil_arg == pfh1->pfil_arg) - return (EEXIST); - - /* - * Insert the input list in reverse order of the output list so that - * the same path is followed in or out of the kernel. - */ - if (flags & PFIL_IN) - TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain); - else - TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain); - return (0); + return (error); } -/* - * Internal: Remove a pfil hook from a hook chain. - */ static int -pfil_chain_remove(pfil_chain_t *chain, void *func, void *arg) +pfilioc_listhooks(struct pfilioc_list *req) { - struct packet_filter_hook *pfh; - - TAILQ_FOREACH(pfh, chain, pfil_chain) - if ((pfh->pfil_func == func || pfh->pfil_func_flags == func) && - pfh->pfil_arg == arg) { - TAILQ_REMOVE(chain, pfh, pfil_chain); - free(pfh, M_IFADDR); - return (0); + struct pfil_hook *hook; + struct pfilioc_hook *iohook; + u_int nhooks, hk; + int error; + + PFIL_LOCK(); +restart: + nhooks = 0; + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) + nhooks++; + PFIL_UNLOCK(); + + if (req->pio_nhooks < nhooks) { + req->pio_nhooks = nhooks; + return (0); + } + + iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); + + hk = 0; + PFIL_LOCK(); + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) { + if (hk + 1 > nhooks) { + /* Configuration changed during malloc(). */ + free(iohook, M_TEMP); + goto restart; } - return (ENOENT); -} + strlcpy(iohook[hk].pio_module, hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + iohook[hk].pio_type = hook->hook_type; + iohook[hk].pio_flags = hook->hook_flags; + hk++; + } + PFIL_UNLOCK(); -/* - * Stuff that must be initialized for every instance (including the first of - * course). - */ -static void -vnet_pfil_init(const void *unused __unused) -{ + error = copyout(iohook, req->pio_hooks, + sizeof(*iohook) * min(req->pio_nhooks, hk)); + req->pio_nhooks = hk; + free(iohook, M_TEMP); - LIST_INIT(&V_pfil_head_list); - PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared"); + return (error); } -/* - * Called for the removal of each instance. - */ -static void -vnet_pfil_uninit(const void *unused __unused) +static int +pfilioc_link(struct pfilioc_link *req) { + struct pfil_link_args args; - KASSERT(LIST_EMPTY(&V_pfil_head_list), - ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list)); - PFIL_LOCK_DESTROY_REAL(&V_pfil_lock); -} + if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND)) + return (EINVAL); -/* - * Starting up. - * - * VNET_SYSINIT is called for each existing vnet and each new vnet. - * Make sure the pfil bits are first before any possible subsystem which - * might piggyback on the SI_SUB_PROTO_PFIL. - */ -VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, - vnet_pfil_init, NULL); - -/* - * Closing up shop. These are done in REVERSE ORDER. Not called on reboot. - * - * VNET_SYSUNINIT is called for each exiting vnet as it exits. - */ -VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, - vnet_pfil_uninit, NULL); + args.pa_version = PFIL_VERSION; + args.pa_flags = req->pio_flags; + args.pa_headname = req->pio_name; + args.pa_modname = req->pio_module; + args.pa_rulname = req->pio_ruleset; + + return (pfil_link(&args)); +} diff --git a/sys/net/pfil.h b/sys/net/pfil.h index 8fdaf5a69119..13d78e6a277f 100644 --- a/sys/net/pfil.h +++ b/sys/net/pfil.h @@ -4,6 +4,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * + * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org> * Copyright (c) 1996 Matthew R. Green * All rights reserved. * @@ -34,98 +35,158 @@ #ifndef _NET_PFIL_H_ #define _NET_PFIL_H_ -#include <sys/systm.h> -#include <sys/queue.h> -#include <sys/_lock.h> -#include <sys/_mutex.h> -#include <sys/lock.h> -#include <sys/rmlock.h> -#include <net/vnet.h> +#include <sys/ioccom.h> +enum pfil_types { + PFIL_TYPE_IP4, + PFIL_TYPE_IP6, + PFIL_TYPE_ETHERNET, +}; + +#define MAXPFILNAME 64 + +struct pfilioc_head { + char pio_name[MAXPFILNAME]; + int pio_nhooksin; + int pio_nhooksout; + enum pfil_types pio_type; +}; + +struct pfilioc_hook { + char pio_module[MAXPFILNAME]; + char pio_ruleset[MAXPFILNAME]; + int pio_flags; + enum pfil_types pio_type; +}; + +struct pfilioc_list { + u_int pio_nheads; + u_int pio_nhooks; + struct pfilioc_head *pio_heads; + struct pfilioc_hook *pio_hooks; +}; + +struct pfilioc_link { + char pio_name[MAXPFILNAME]; + char pio_module[MAXPFILNAME]; + char pio_ruleset[MAXPFILNAME]; + int pio_flags; +}; + +#define PFILDEV "pfil" +#define PFILIOC_LISTHEADS _IOWR('P', 1, struct pfilioc_list) +#define PFILIOC_LISTHOOKS _IOWR('P', 2, struct pfilioc_list) +#define PFILIOC_LINK _IOW('P', 3, struct pfilioc_link) + +#define PFIL_IN 0x00010000 +#define PFIL_OUT 0x00020000 +#define PFIL_FWD 0x00040000 +#define PFIL_DIR(f) ((f) & (PFIL_IN|PFIL_OUT)) +#define PFIL_MEMPTR 0x00080000 +#define PFIL_HEADPTR 0x00100000 +#define PFIL_HOOKPTR 0x00200000 +#define PFIL_APPEND 0x00400000 +#define PFIL_UNLINK 0x00800000 +#define PFIL_LENMASK 0x0000ffff +#define PFIL_LENGTH(f) ((f) & PFIL_LENMASK) + +#ifdef _KERNEL struct mbuf; struct ifnet; struct inpcb; -typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); -typedef int (*pfil_func_flags_t)(void *, struct mbuf **, struct ifnet *, - int, int, struct inpcb *); +typedef union { + struct mbuf **m; + void *mem; +} pfil_packet_t __attribute__((__transparent_union__)); + +typedef enum { + PFIL_PASS = 0, + PFIL_DROPPED, + PFIL_CONSUMED, + PFIL_REALLOCED, +} pfil_return_t; + +typedef pfil_return_t (*pfil_func_t)(pfil_packet_t, struct ifnet *, int, + void *, struct inpcb *); +/* + * A pfil head is created by a packet intercept point. + * + * A pfil hook is created by a packet filter. + * + * Hooks are chained on heads. Historically some hooking happens + * automatically, e.g. ipfw(4), pf(4) and ipfilter(4) would register + * theirselves on IPv4 and IPv6 input/output. + */ + +typedef struct pfil_hook * pfil_hook_t; +typedef struct pfil_head * pfil_head_t; /* - * The packet filter hooks are designed for anything to call them to - * possibly intercept the packet. Multiple filter hooks are chained - * together and after each other in the specified order. + * Give us a chance to modify pfil_xxx_args structures in future. */ -struct packet_filter_hook { - TAILQ_ENTRY(packet_filter_hook) pfil_chain; - pfil_func_t pfil_func; - pfil_func_flags_t pfil_func_flags; - void *pfil_arg; +#define PFIL_VERSION 1 + +/* Argument structure used by packet filters to register themselves. */ +struct pfil_hook_args { + int pa_version; + int pa_flags; + enum pfil_types pa_type; + pfil_func_t pa_func; + void *pa_ruleset; + const char *pa_modname; + const char *pa_rulname; }; -#define PFIL_IN 0x00000001 -#define PFIL_OUT 0x00000002 -#define PFIL_WAITOK 0x00000004 -#define PFIL_FWD 0x00000008 -#define PFIL_ALL (PFIL_IN|PFIL_OUT) +/* Public functions for pfil hook management by packet filters. */ +pfil_hook_t pfil_add_hook(struct pfil_hook_args *); +void pfil_remove_hook(pfil_hook_t); + +/* Argument structure used by ioctl() and packet filters to set filters. */ +struct pfil_link_args { + int pa_version; + int pa_flags; + union { + const char *pa_headname; + pfil_head_t pa_head; + }; + union { + struct { + const char *pa_modname; + const char *pa_rulname; + }; + pfil_hook_t pa_hook; + }; +}; -typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t; +/* Public function to configure filter chains. Used by ioctl() and filters. */ +int pfil_link(struct pfil_link_args *); -#define PFIL_TYPE_AF 1 /* key is AF_* type */ -#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */ +/* Argument structure used by inspection points to register themselves. */ +struct pfil_head_args { + int pa_version; + int pa_flags; + enum pfil_types pa_type; + const char *pa_headname; +}; -#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */ +/* Public functions for pfil head management by inspection points. */ +pfil_head_t pfil_head_register(struct pfil_head_args *); +void pfil_head_unregister(pfil_head_t); +/* Public functions to run the packet inspection by inspection points. */ +int pfil_run_hooks(struct pfil_head *, pfil_packet_t, struct ifnet *, int, + struct inpcb *inp); /* - * A pfil head is created by each protocol or packet intercept point. - * For packet is then run through the hook chain for inspection. + * Minimally exposed structure to avoid function call in case of absence + * of any filters by protocols and macros to do the check. */ -struct pfil_head { - pfil_chain_t ph_in; - pfil_chain_t ph_out; - int ph_type; - int ph_nhooks; -#if defined( __linux__ ) || defined( _WIN32 ) - rwlock_t ph_mtx; -#else - struct rmlock *ph_plock; /* Pointer to the used lock */ - struct rmlock ph_lock; /* Private lock storage */ - int flags; -#endif - union { - u_long phu_val; - void *phu_ptr; - } ph_un; -#define ph_af ph_un.phu_val -#define ph_ifnet ph_un.phu_ptr - LIST_ENTRY(pfil_head) ph_list; +struct _pfil_head { + int head_nhooksin; + int head_nhooksout; }; +#define PFIL_HOOKED_IN(p) (((struct _pfil_head *)(p))->head_nhooksin > 0) +#define PFIL_HOOKED_OUT(p) (((struct _pfil_head *)(p))->head_nhooksout > 0) -VNET_DECLARE(struct rmlock, pfil_lock); -#define V_pfil_lock VNET(pfil_lock) - -/* Public functions for pfil hook management by packet filters. */ -struct pfil_head *pfil_head_get(int, u_long); -int pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *); -int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); -int pfil_remove_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *); -int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); -#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) - -/* Public functions to run the packet inspection by protocols. */ -int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int, - int, struct inpcb *inp); - -/* Public functions for pfil head management by protocols. */ -int pfil_head_register(struct pfil_head *); -int pfil_head_unregister(struct pfil_head *); - -/* Public pfil locking functions for self managed locks by packet filters. */ -int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_runlock(struct pfil_head *, struct rm_priotracker *); -void pfil_wlock(struct pfil_head *); -void pfil_wunlock(struct pfil_head *); -int pfil_wowned(struct pfil_head *ph); - +#endif /* _KERNEL */ #endif /* _NET_PFIL_H_ */ diff --git a/sys/net80211/ieee80211_dfs.c b/sys/net80211/ieee80211_dfs.c index 0266769d9b0f..abe67576d711 100644 --- a/sys/net80211/ieee80211_dfs.c +++ b/sys/net80211/ieee80211_dfs.c @@ -156,8 +156,7 @@ cac_timeout(void *arg) /* XXX clobbers any existing desired channel */ /* NB: dfs->newchan may be NULL, that's ok */ vap->iv_des_chan = dfs->newchan; - /* XXX recursive lock need ieee80211_new_state_locked */ - ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); + ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, 0); } else { if_printf(vap->iv_ifp, "CAC timer on channel %u (%u MHz) expired; " diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c index 9e82e9e1ff83..251b6b8d3c96 100644 --- a/sys/net80211/ieee80211_freebsd.c +++ b/sys/net80211/ieee80211_freebsd.c @@ -68,8 +68,6 @@ SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug, 0, "debugging printfs"); #endif -static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state"); - static const char wlanname[] = "wlan"; static struct if_clone *wlan_cloner; diff --git a/sys/net80211/ieee80211_proto.c b/sys/net80211/ieee80211_proto.c index c4a6c4ccac71..24f78e88c070 100644 --- a/sys/net80211/ieee80211_proto.c +++ b/sys/net80211/ieee80211_proto.c @@ -347,6 +347,9 @@ ieee80211_proto_vattach(struct ieee80211vap *vap) * driver and/or user applications. */ for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) { + if (isclr(ic->ic_modecaps, i)) + continue; + const struct ieee80211_rateset *rs = &ic->ic_sup_rates[i]; vap->iv_txparms[i].ucastrate = IEEE80211_FIXED_RATE_NONE; diff --git a/sys/net80211/ieee80211_scan.c b/sys/net80211/ieee80211_scan.c index 97186ccff755..ee05d89efb5e 100644 --- a/sys/net80211/ieee80211_scan.c +++ b/sys/net80211/ieee80211_scan.c @@ -130,13 +130,21 @@ void ieee80211_scan_vattach(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; + int m; vap->iv_bgscanidle = (IEEE80211_BGSCAN_IDLE_DEFAULT*1000)/hz; vap->iv_bgscanintvl = IEEE80211_BGSCAN_INTVAL_DEFAULT*hz; vap->iv_scanvalid = IEEE80211_SCAN_VALID_DEFAULT*hz; vap->iv_roaming = IEEE80211_ROAMING_AUTO; - memcpy(vap->iv_roamparms, defroam, sizeof(defroam)); + + memset(vap->iv_roamparms, 0, sizeof(vap->iv_roamparms)); + for (m = IEEE80211_MODE_AUTO + 1; m < IEEE80211_MODE_MAX; m++) { + if (isclr(ic->ic_modecaps, m)) + continue; + + memcpy(&vap->iv_roamparms[m], &defroam[m], sizeof(defroam[m])); + } ic->ic_scan_methods->sc_vattach(vap); } diff --git a/sys/net80211/ieee80211_scan_sta.c b/sys/net80211/ieee80211_scan_sta.c index 1cada2927591..ecfdcd03c7c4 100644 --- a/sys/net80211/ieee80211_scan_sta.c +++ b/sys/net80211/ieee80211_scan_sta.c @@ -1354,12 +1354,14 @@ sta_roam_check(struct ieee80211_scan_state *ss, struct ieee80211vap *vap) mode = ieee80211_chan2mode(ic->ic_bsschan); roamRate = vap->iv_roamparms[mode].rate; roamRssi = vap->iv_roamparms[mode].rssi; + KASSERT(roamRate != 0 && roamRssi != 0, ("iv_roamparms are not" + "initialized for %s mode!", ieee80211_phymode_name[mode])); + ucastRate = vap->iv_txparms[mode].ucastrate; /* NB: the most up to date rssi is in the node, not the scan cache */ curRssi = ic->ic_node_getrssi(ni); if (ucastRate == IEEE80211_FIXED_RATE_NONE) { curRate = ni->ni_txrate; - roamRate &= IEEE80211_RATE_VAL; IEEE80211_DPRINTF(vap, IEEE80211_MSG_ROAM, "%s: currssi %d currate %u roamrssi %d roamrate %u\n", __func__, curRssi, curRate, roamRssi, roamRate); diff --git a/sys/net80211/ieee80211_tdma.c b/sys/net80211/ieee80211_tdma.c index 6ea433ce54ca..361273a53664 100644 --- a/sys/net80211/ieee80211_tdma.c +++ b/sys/net80211/ieee80211_tdma.c @@ -127,6 +127,9 @@ static int tdma_process_params(struct ieee80211_node *ni, static void settxparms(struct ieee80211vap *vap, enum ieee80211_phymode mode, int rate) { + if (isclr(vap->iv_ic->ic_modecaps, mode)) + return; + vap->iv_txparms[mode].ucastrate = rate; vap->iv_txparms[mode].mcastrate = rate; } diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 643a75e2294b..77a08b1a8af1 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -90,11 +90,11 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/sysctl.h> -#include <net/pfil.h> #include <net/if.h> #include <net/if_types.h> #include <net/if_var.h> #include <net/if_dl.h> +#include <net/pfil.h> #include <net/route.h> #include <net/vnet.h> @@ -228,12 +228,11 @@ ip_tryforward(struct mbuf *m) /* * Run through list of ipfilter hooks for input packets */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet_pfil_head)) goto passin; - if (pfil_run_hooks( - &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, 0, NULL) || - m == NULL) + if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, + NULL) != PFIL_PASS) goto drop; M_ASSERTVALID(m); @@ -321,13 +320,12 @@ passin: /* * Step 5: outgoing firewall packet processing */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet_pfil_head)) goto passout; - if (pfil_run_hooks(&V_inet_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, PFIL_FWD, - NULL) || m == NULL) { + if (pfil_run_hooks(V_inet_pfil_head, &m, nh.nh_ifp, + PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS) goto drop; - } M_ASSERTVALID(m); M_ASSERTPKTHDR(m); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index dd00d13a4d71..a1ec5935a826 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -57,11 +57,11 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/sysctl.h> -#include <net/pfil.h> #include <net/if.h> #include <net/if_types.h> #include <net/if_var.h> #include <net/if_dl.h> +#include <net/pfil.h> #include <net/route.h> #include <net/netisr.h> #include <net/rss_config.h> @@ -134,7 +134,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_checkinterface), 0, "Verify packet arrives on correct interface"); -VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ +VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ static struct netisr_handler ip_nh = { .nh_name = "ip", @@ -301,6 +301,7 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, void ip_init(void) { + struct pfil_head_args args; struct protosw *pr; int i; @@ -311,11 +312,11 @@ ip_init(void) ipreass_init(); /* Initialize packet filter hooks. */ - V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; - V_inet_pfil_hook.ph_af = AF_INET; - if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil hook, " - "error %d\n", __func__, i); + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_IP4; + args.pa_headname = PFIL_INET_NAME; + V_inet_pfil_head = pfil_head_register(&args); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, &V_ipsec_hhh_in[HHOOK_IPSEC_INET], @@ -377,10 +378,7 @@ ip_destroy(void *unused __unused) #endif netisr_unregister_vnet(&ip_nh); - if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil hook, " - "error %d\n", __func__, error); - + pfil_head_unregister(V_inet_pfil_head); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " @@ -599,11 +597,12 @@ tooshort: */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet_pfil_head)) goto passin; odst = ip->ip_dst; - if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, 0, NULL) != 0) + if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != + PFIL_PASS) return; if (m == NULL) /* consumed by filter */ return; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 7595c3f90535..0c7a26503d07 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -121,11 +121,16 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; - *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp); - m = *mp; - if ((*error) != 0 || m == NULL) + switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, PFIL_OUT, inp)) { + case PFIL_DROPPED: + *error = EPERM; + /* FALLTHROUGH */ + case PFIL_CONSUMED: return 1; /* Finished */ - + case PFIL_PASS: + *error = 0; + } + m = *mp; ip = mtod(m, struct ip *); /* See if destination IP address was changed by packet filter. */ @@ -568,7 +573,7 @@ sendit: #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ - if (PFIL_HOOKED(&V_inet_pfil_hook)) { + if (PFIL_HOOKED_OUT(V_inet_pfil_head)) { switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { case 1: /* Finished */ goto done; diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 86615a15ad26..d55a18bba91d 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -241,8 +241,9 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern int (*rsvp_input_p)(struct mbuf **, int *, int); -VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */ -#define V_inet_pfil_hook VNET(inet_pfil_hook) +VNET_DECLARE(struct pfil_head *, inet_pfil_head); +#define V_inet_pfil_head VNET(inet_pfil_head) +#define PFIL_INET_NAME "inet" void in_delayed_cksum(struct mbuf *m); diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 217d41c44723..c251fb44c8bc 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -94,10 +94,12 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/ip_var.h> #include <netinet/tcp_var.h> #ifdef SIFTR_IPV6 #include <netinet/ip6.h> +#include <netinet/ip6_var.h> #include <netinet6/in6_pcb.h> #endif /* SIFTR_IPV6 */ @@ -170,8 +172,11 @@ static MALLOC_DEFINE(M_SIFTR_HASHNODE, "siftr_hashnode", struct pkt_node { /* Timestamp of pkt as noted in the pfil hook. */ struct timeval tval; - /* Direction pkt is travelling; either PFIL_IN or PFIL_OUT. */ - uint8_t direction; + /* Direction pkt is travelling. */ + enum { + DIR_IN = 0, + DIR_OUT = 1, + } direction; /* IP version pkt_node relates to; either INP_IPV4 or INP_IPV6. */ uint8_t ipver; /* Hash of the pkt which triggered the log message. */ @@ -272,6 +277,7 @@ static volatile unsigned int siftr_exit_pkt_manager_thread = 0; static unsigned int siftr_enabled = 0; static unsigned int siftr_pkts_per_log = 1; static unsigned int siftr_generate_hashes = 0; +static uint16_t siftr_port_filter = 0; /* static unsigned int siftr_binary_log = 0; */ static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log"; static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log"; @@ -283,11 +289,7 @@ static struct alq *siftr_alq = NULL; static struct mtx siftr_pkt_queue_mtx; static struct mtx siftr_pkt_mgr_mtx; static struct thread *siftr_pkt_manager_thr = NULL; -/* - * pfil.h defines PFIL_IN as 1 and PFIL_OUT as 2, - * which we use as an index into this array. - */ -static char direction[3] = {'\0', 'i','o'}; +static char direction[2] = {'i','o'}; /* Required function prototypes. */ static int siftr_sysctl_enabled_handler(SYSCTL_HANDLER_ARGS); @@ -317,6 +319,10 @@ SYSCTL_UINT(_net_inet_siftr, OID_AUTO, genhashes, CTLFLAG_RW, &siftr_generate_hashes, 0, "enable packet hash generation"); +SYSCTL_U16(_net_inet_siftr, OID_AUTO, port_filter, CTLFLAG_RW, + &siftr_port_filter, 0, + "enable packet filter on a TCP port"); + /* XXX: TODO SYSCTL_UINT(_net_inet_siftr, OID_AUTO, binary, CTLFLAG_RW, &siftr_binary_log, 0, @@ -402,7 +408,7 @@ siftr_process_pkt(struct pkt_node * pkt_node) LIST_INSERT_HEAD(counter_list, hash_node, nodes); } else { /* Malloc failed. */ - if (pkt_node->direction == PFIL_IN) + if (pkt_node->direction == DIR_IN) ss->nskip_in_malloc++; else ss->nskip_out_malloc++; @@ -805,7 +811,7 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, INP_RUNLOCK(inp); pn->ipver = ipver; - pn->direction = dir; + pn->direction = (dir == PFIL_IN ? DIR_IN : DIR_OUT); /* * Significantly more accurate than using getmicrotime(), but slower! @@ -826,9 +832,9 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, * It's very important to use the M_NOWAIT flag with all function calls * that support it so that they won't sleep, otherwise you get a panic. */ -static int -siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +siftr_chkpkt(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { struct pkt_node *pn; struct ip *ip; @@ -836,9 +842,10 @@ siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct tcpcb *tp; struct siftr_stats *ss; unsigned int ip_hl; - int inp_locally_locked; + int inp_locally_locked, dir; inp_locally_locked = 0; + dir = PFIL_DIR(flags); ss = DPCPU_PTR(ss); /* @@ -907,6 +914,16 @@ siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, goto inp_unlock; } + /* + * Only pkts selected by the tcp port filter + * can be inserted into the pkt_queue + */ + if ((siftr_port_filter != 0) && + (siftr_port_filter != ntohs(inp->inp_lport)) && + (siftr_port_filter != ntohs(inp->inp_fport))) { + goto inp_unlock; + } + pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); if (pn == NULL) { @@ -992,15 +1009,13 @@ inp_unlock: INP_RUNLOCK(inp); ret: - /* Returning 0 ensures pfil will not discard the pkt */ - return (0); + return (PFIL_PASS); } #ifdef SIFTR_IPV6 static int -siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, - struct inpcb *inp) +siftr_chkpkt6(struct mbuf **m, struct ifnet *ifp, int flags, struct inpcb *inp) { struct pkt_node *pn; struct ip6_hdr *ip6; @@ -1008,9 +1023,10 @@ siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct tcpcb *tp; struct siftr_stats *ss; unsigned int ip6_hl; - int inp_locally_locked; + int inp_locally_locked, dir; inp_locally_locked = 0; + dir = PFIL_DIR(flags); ss = DPCPU_PTR(ss); /* @@ -1083,6 +1099,16 @@ siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, goto inp_unlock6; } + /* + * Only pkts selected by the tcp port filter + * can be inserted into the pkt_queue + */ + if ((siftr_port_filter != 0) && + (siftr_port_filter != ntohs(inp->inp_lport)) && + (siftr_port_filter != ntohs(inp->inp_fport))) { + goto inp_unlock6; + } + pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); if (pn == NULL) { @@ -1113,37 +1139,53 @@ ret6: } #endif /* #ifdef SIFTR_IPV6 */ - +VNET_DEFINE_STATIC(pfil_hook_t, siftr_inet_hook); +#define V_siftr_inet_hook VNET(siftr_inet_hook) +#ifdef INET6 +VNET_DEFINE_STATIC(pfil_hook_t, siftr_inet6_hook); +#define V_siftr_inet6_hook VNET(siftr_inet6_hook) +#endif static int siftr_pfil(int action) { - struct pfil_head *pfh_inet; -#ifdef SIFTR_IPV6 - struct pfil_head *pfh_inet6; -#endif + struct pfil_hook_args pha; + struct pfil_link_args pla; + + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "siftr"; + pha.pa_ruleset = NULL; + pha.pa_rulname = "default"; + + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; + VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -#ifdef SIFTR_IPV6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -#endif if (action == HOOK) { - pfil_add_hook(siftr_chkpkt, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); + pha.pa_func = siftr_chkpkt; + pha.pa_type = PFIL_TYPE_IP4; + V_siftr_inet_hook = pfil_add_hook(&pha); + pla.pa_hook = V_siftr_inet_hook; + pla.pa_head = V_inet_pfil_head; + (void)pfil_link(&pla); #ifdef SIFTR_IPV6 - pfil_add_hook(siftr_chkpkt6, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pha.pa_func = siftr_chkpkt6; + pha.pa_type = PFIL_TYPE_IP6; + V_siftr_inet6_hook = pfil_add_hook(&pha); + pla.pa_hook = V_siftr_inet6_hook; + pla.pa_head = V_inet6_pfil_head; + (void)pfil_link(&pla); #endif } else if (action == UNHOOK) { - pfil_remove_hook(siftr_chkpkt, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); + pfil_remove_hook(V_siftr_inet_hook); #ifdef SIFTR_IPV6 - pfil_remove_hook(siftr_chkpkt6, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pfil_remove_hook(V_siftr_inet6_hook); #endif } CURVNET_RESTORE(); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 39351d897f25..b2c4f66da9f0 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2385,8 +2385,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; - tp->snd_wnd = tiwin; } + tp->snd_wnd = tiwin; /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 158d2cb3abf8..608b6e7e15ad 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -5433,6 +5433,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } + tp->snd_wnd = tiwin; /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later @@ -5440,7 +5441,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & TH_ACK) == 0) { if (IS_FASTOPEN(tp->t_flags)) { - tp->snd_wnd = tiwin; cc_conn_init(tp); } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, @@ -5452,7 +5452,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; - tp->snd_wnd = tiwin; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c index 8f8b176607c5..11eb0e7548d5 100644 --- a/sys/netinet6/ip6_fastfwd.c +++ b/sys/netinet6/ip6_fastfwd.c @@ -156,10 +156,10 @@ ip6_tryforward(struct mbuf *m) /* * Incoming packet firewall processing. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, rcvif, PFIL_IN, 0, - NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, rcvif, PFIL_IN, NULL) != + PFIL_PASS) goto dropin; /* * If packet filter sets the M_FASTFWD_OURS flag, this means @@ -195,7 +195,7 @@ passin: in6_ifstat_inc(rcvif, ifs6_in_noroute); goto dropin; } - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) { + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) { if (m->m_pkthdr.len > nh.nh_mtu) { in6_ifstat_inc(nh.nh_ifp, ifs6_in_toobig); icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu); @@ -208,8 +208,8 @@ passin: /* * Outgoing packet firewall processing. */ - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, - PFIL_FWD, NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, nh.nh_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) goto dropout; /* diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index ed743f65c867..0676a58225a3 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -320,15 +320,14 @@ again2: in6_clearscope(&ip6->ip6_dst); /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto pass; odst = ip6->ip6_dst; /* Run through list of hooks for forwarded packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, - PFIL_FWD, NULL); - if (error != 0 || m == NULL) - goto freecopy; /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, rt->rt_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) + goto freecopy; ip6 = mtod(m, struct ip6_hdr *); /* See if destination IP address was changed by packet filter. */ diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 712b9923d8e7..531cfff43f0e 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -191,7 +191,7 @@ SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, #endif -VNET_DEFINE(struct pfil_head, inet6_pfil_hook); +VNET_DEFINE(pfil_head_t, inet6_pfil_head); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); VNET_PCPUSTAT_SYSINIT(ip6stat); @@ -214,6 +214,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); void ip6_init(void) { + struct pfil_head_args args; struct protosw *pr; int i; @@ -227,11 +228,11 @@ ip6_init(void) &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ - V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; - V_inet6_pfil_hook.ph_af = AF_INET6; - if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil hook, " - "error %d\n", __func__, i); + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_IP6; + args.pa_headname = PFIL_INET6_NAME; + V_inet6_pfil_head = pfil_head_register(&args); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], @@ -359,9 +360,7 @@ ip6_destroy(void *unused __unused) #endif netisr_unregister_vnet(&ip6_nh); - if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil hook, " - "error %d\n", __func__, error); + pfil_head_unregister(V_inet6_pfil_head); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " @@ -758,14 +757,12 @@ ip6_input(struct mbuf *m) */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; odst = ip6->ip6_dst; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, - m->m_pkthdr.rcvif, PFIL_IN, 0, NULL)) - return; - if (m == NULL) /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, + NULL) != PFIL_PASS) return; ip6 = mtod(m, struct ip6_hdr *); srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 741521abb8a1..e36beb355b38 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -792,16 +792,21 @@ again: } /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, 0, inp); - if (error != 0 || m == NULL) + switch (pfil_run_hooks(V_inet6_pfil_head, &m, ifp, PFIL_OUT, inp)) { + case PFIL_PASS: + ip6 = mtod(m, struct ip6_hdr *); + break; + case PFIL_DROPPED: + error = EPERM; + /* FALLTHROUGH */ + case PFIL_CONSUMED: goto done; - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); + } needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index f235572dd03e..bf15f833b326 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -346,8 +346,10 @@ VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) -VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ -#define V_inet6_pfil_hook VNET(inet6_pfil_hook) +VNET_DECLARE(struct pfil_head *, inet6_pfil_head); +#define V_inet6_pfil_head VNET(inet6_pfil_head) +#define PFIL_INET6_NAME "inet6" + #ifdef IPSTEALTH VNET_DECLARE(int, ip6stealth); #define V_ip6stealth VNET(ip6stealth) diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 5c91b76c2fd6..833f6b6ecebf 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -1404,6 +1404,7 @@ ipfw_chk(struct ip_fw_args *args) int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ + IPFW_RLOCK_TRACKER; if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index d48af280f105..473db72454d4 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include <net/ethernet.h> #include <net/if.h> #include <net/if_var.h> -#include <net/pfil.h> #include <net/vnet.h> #include <netinet/in.h> diff --git a/sys/netpfil/ipfw/ip_fw_eaction.c b/sys/netpfil/ipfw/ip_fw_eaction.c index 05cc174cb283..1cb2f812936c 100644 --- a/sys/netpfil/ipfw/ip_fw_eaction.c +++ b/sys/netpfil/ipfw/ip_fw_eaction.c @@ -38,9 +38,9 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/queue.h> -#include <net/pfil.h> #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ +#include <net/pfil.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ #include <netinet/ip_fw.h> diff --git a/sys/netpfil/ipfw/ip_fw_iface.c b/sys/netpfil/ipfw/ip_fw_iface.c index beb3b9115aac..b2aa7d3205db 100644 --- a/sys/netpfil/ipfw/ip_fw_iface.c +++ b/sys/netpfil/ipfw/ip_fw_iface.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include <sys/eventhandler.h> #include <net/if.h> #include <net/if_var.h> -#include <net/pfil.h> #include <net/vnet.h> #include <netinet/in.h> diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 7cd1d33d5910..9702d9dadaae 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_var.h> -#include <net/pfil.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_var.h> diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index feb4a20f9b69..25726bd4636e 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #include <net/ethernet.h> #include <net/pfil.h> @@ -85,10 +86,6 @@ int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Forward declarations. */ static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int); -int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); -int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); #ifdef SYSCTL_NODE @@ -120,16 +117,17 @@ SYSEND * dummynet, divert, netgraph or other modules. * The packet may be consumed. */ -int -ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int dir, + void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct m_tag *tag; - int ipfw, ret; + pfil_return_t ret; + int ipfw; /* convert dir to IPFW values */ - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; + dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; args.flags = 0; again: /* @@ -155,17 +153,15 @@ again: KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); - /* breaking out of the switch means drop */ + ret = PFIL_PASS; switch (ipfw) { case IP_FW_PASS: /* next_hop may be set by ipfw_chk */ if ((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR | - IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0) { - ret = 0; + IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0) break; - } #if (!defined(INET6) && !defined(INET)) - ret = EACCES; + ret = PFIL_DROPPED; #else { void *psa; @@ -210,8 +206,8 @@ again: tag = m_tag_get(PACKET_TAG_IPFORWARD, len, M_NOWAIT); if (tag == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } } if ((args.flags & IPFW_ARGS_NH6) == 0) @@ -238,7 +234,7 @@ again: * comparisons. */ if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { - ret = EACCES; + ret = PFIL_DROPPED; break; } if (in6_localip(&sa6->sin6_addr)) @@ -250,20 +246,23 @@ again: break; case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; case IP_FW_DUMMYNET: - ret = EACCES; - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ + if (ip_dn_io_ptr == NULL) { + ret = PFIL_DROPPED; + break; + } MPASS(args.flags & IPFW_ARGS_REF); if (mtod(*m0, struct ip *)->ip_v == 4) - ret = ip_dn_io_ptr(m0, dir, &args); + (void )ip_dn_io_ptr(m0, dir, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); - else - break; /* drop it */ + (void )ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); + else { + ret = PFIL_DROPPED; + break; + } /* * XXX should read the return value. * dummynet normally eats the packet and sets *m0=NULL @@ -273,41 +272,42 @@ again: */ if (*m0 != NULL) goto again; + ret = PFIL_CONSUMED; break; case IP_FW_TEE: case IP_FW_DIVERT: if (ip_divert_ptr == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ipfw_divert(m0, dir, &args.rule, + (void )ipfw_divert(m0, dir, &args.rule, (ipfw == IP_FW_TEE) ? 1 : 0); /* continue processing for the original packet (tee). */ if (*m0) goto again; + ret = PFIL_CONSUMED; break; case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ng_ipfw_input_p(m0, dir, &args, + (void )ng_ipfw_input_p(m0, dir, &args, (ipfw == IP_FW_NGTEE) ? 1 : 0); if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ + ret = PFIL_CONSUMED; break; case IP_FW_NAT: /* honor one-pass in case of successful nat */ - if (V_fw_one_pass) { - ret = 0; + if (V_fw_one_pass) break; - } goto again; case IP_FW_REASS: @@ -317,7 +317,7 @@ again: KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != 0) { + if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; @@ -329,16 +329,17 @@ again: /* * ipfw processing for ethernet packets (in and out). */ -int -ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int dir, + void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct ether_header save_eh; struct ether_header *eh; struct m_tag *mtag; struct mbuf *m; - int i, ret; + pfil_return_t ret; + int i; args.flags = IPFW_ARGS_ETHER; again: @@ -367,7 +368,7 @@ again: m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ args.m = m; /* the packet we are looking at */ - args.oif = dir == PFIL_OUT ? ifp: NULL; /* destination, if any */ + args.oif = dir & PFIL_OUT ? ifp: NULL; /* destination, if any */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = inp; /* used by ipfw uid/gid/jail rules */ i = ipfw_chk(&args); @@ -388,46 +389,46 @@ again: } *m0 = m; - ret = 0; + ret = PFIL_PASS; /* Check result of ipfw_chk() */ switch (i) { case IP_FW_PASS: break; case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; case IP_FW_DUMMYNET: - ret = EACCES; - - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ - + if (ip_dn_io_ptr == NULL) { + ret = PFIL_DROPPED; + break; + } *m0 = NULL; - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; + dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; MPASS(args.flags & IPFW_ARGS_REF); ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args); - return 0; + return (PFIL_CONSUMED); case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ng_ipfw_input_p(m0, (dir == PFIL_IN) ? DIR_IN : DIR_OUT, + (void )ng_ipfw_input_p(m0, (dir & PFIL_IN) ? DIR_IN : DIR_OUT, &args, (i == IP_FW_NGTEE) ? 1 : 0); if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ + ret = PFIL_CONSUMED; break; default: KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != 0) { + if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; @@ -531,20 +532,64 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, /* * attach or detach hooks for a given protocol family */ +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet_hook); +#define V_ipfw_inet_hook VNET(ipfw_inet_hook) +#ifdef INET6 +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook); +#define V_ipfw_inet6_hook VNET(ipfw_inet6_hook) +#endif +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook); +#define V_ipfw_link_hook VNET(ipfw_link_hook) + static int ipfw_hook(int onoff, int pf) { - struct pfil_head *pfh; - pfil_func_t hook_func; - - pfh = pfil_head_get(PFIL_TYPE_AF, pf); - if (pfh == NULL) - return ENOENT; - - hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; + struct pfil_hook_args pha; + struct pfil_link_args pla; + pfil_hook_t *h; + + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "ipfw"; + pha.pa_ruleset = NULL; + + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; + + switch (pf) { + case AF_INET: + pha.pa_func = ipfw_check_packet; + pha.pa_type = PFIL_TYPE_IP4; + pha.pa_rulname = "default"; + h = &V_ipfw_inet_hook; + pla.pa_head = V_inet_pfil_head; + break; +#ifdef INET6 + case AF_INET6: + pha.pa_func = ipfw_check_packet; + pha.pa_type = PFIL_TYPE_IP6; + pha.pa_rulname = "default6"; + h = &V_ipfw_inet6_hook; + pla.pa_head = V_inet6_pfil_head; + break; +#endif + case AF_LINK: + pha.pa_func = ipfw_check_frame; + pha.pa_type = PFIL_TYPE_ETHERNET; + pha.pa_rulname = "default-link"; + h = &V_ipfw_link_hook; + pla.pa_head = V_link_pfil_head; + break; + } - (void) (onoff ? pfil_add_hook : pfil_remove_hook) - (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); + if (onoff) { + *h = pfil_add_hook(&pha); + pla.pa_hook = *h; + (void)pfil_link(&pla); + } else + if (*h != NULL) + pfil_remove_hook(*h); return 0; } diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index dcd38eb8e8eb..e0e24122168b 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -312,6 +312,8 @@ struct ip_fw_chain { void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; +#else + struct rmlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ @@ -452,23 +454,25 @@ struct ipfw_ifc { #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ + rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ + rm_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) -#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED) -#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED) +#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) +#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker -#define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker) -#define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker) -#define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock) -#define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock) -#define IPFW_PF_RLOCK(p) -#define IPFW_PF_RUNLOCK(p) +#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker) +#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker) +#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx) +#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx) +#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) +#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index edbd96a91283..a83e75447633 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -60,7 +60,6 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/fnv_hash.h> #include <net/if.h> -#include <net/pfil.h> #include <net/route.h> #include <net/vnet.h> #include <vm/vm.h> diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 67593aa9e2fc..7d34977c224f 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$"); #include <sys/socketvar.h> #include <sys/queue.h> #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/pfil.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ diff --git a/sys/netpfil/ipfw/ip_fw_table_value.c b/sys/netpfil/ipfw/ip_fw_table_value.c index d60fc34f39ea..e92a5a979fb8 100644 --- a/sys/netpfil/ipfw/ip_fw_table_value.c +++ b/sys/netpfil/ipfw/ip_fw_table_value.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include <sys/socketvar.h> #include <sys/queue.h> #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/pfil.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index eba8a7f64a3c..fd946d158ba1 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -169,16 +169,16 @@ static void pf_tbladdr_copyout(struct pf_addr_wrap *); * Wrapper functions for pfil(9) hooks */ #ifdef INET -static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif #ifdef INET6 -static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif static int hook_pf(void); @@ -4002,46 +4002,54 @@ shutdown_pf(void) return (error); } +static pfil_return_t +pf_check_return(int chk, struct mbuf **m) +{ + + switch (chk) { + case PF_PASS: + if (*m == NULL) + return (PFIL_CONSUMED); + else + return (PFIL_PASS); + break; + default: + if (*m != NULL) { + m_freem(*m); + *m = NULL; + } + return (PFIL_DROPPED); + } +} + #ifdef INET -static int -pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_IN, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (pf_check_return(chk, m)); } -static int -pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_OUT, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (pf_check_return(chk, m)); } #endif #ifdef INET6 -static int -pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4053,67 +4061,89 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (pf_check_return(chk, m)); } -static int -pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, flags, ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (pf_check_return(chk, m)); } #endif /* INET6 */ -static int -hook_pf(void) -{ #ifdef INET - struct pfil_head *pfh_inet; +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook); +#define V_pf_ip4_in_hook VNET(pf_ip4_in_hook) +#define V_pf_ip4_out_hook VNET(pf_ip4_out_hook) #endif #ifdef INET6 - struct pfil_head *pfh_inet6; +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook); +#define V_pf_ip6_in_hook VNET(pf_ip6_in_hook) +#define V_pf_ip6_out_hook VNET(pf_ip6_out_hook) #endif +static int +hook_pf(void) +{ + struct pfil_hook_args pha; + struct pfil_link_args pla; + if (V_pf_pfil_hooked) return (0); + pha.pa_version = PFIL_VERSION; + pha.pa_modname = "pf"; + pha.pa_ruleset = NULL; + + pla.pa_version = PFIL_VERSION; + #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_add_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pha.pa_type = PFIL_TYPE_IP4; + pha.pa_func = pf_check_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in"; + V_pf_ip4_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check_out; + pha.pa_flags = PFIL_OUT; + pha.pa_rulname = "default-out"; + V_pf_ip4_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_out_hook; + (void)pfil_link(&pla); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) { -#ifdef INET - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); -#endif - return (ESRCH); /* XXX */ - } - pfil_add_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pha.pa_type = PFIL_TYPE_IP6; + pha.pa_func = pf_check6_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in6"; + V_pf_ip6_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check6_out; + pha.pa_rulname = "default-out6"; + pha.pa_flags = PFIL_OUT; + V_pf_ip6_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_out_hook; + (void)pfil_link(&pla); #endif V_pf_pfil_hooked = 1; @@ -4123,33 +4153,17 @@ hook_pf(void) static int dehook_pf(void) { -#ifdef INET - struct pfil_head *pfh_inet; -#endif -#ifdef INET6 - struct pfil_head *pfh_inet6; -#endif if (V_pf_pfil_hooked == 0) return (0); #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); + pfil_remove_hook(V_pf_ip4_in_hook); + pfil_remove_hook(V_pf_ip4_out_hook); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet6); - pfil_remove_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet6); + pfil_remove_hook(V_pf_ip6_in_hook); + pfil_remove_hook(V_pf_ip6_out_hook); #endif V_pf_pfil_hooked = 0; diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 34a2f3636f85..05447f451e13 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -159,6 +159,8 @@ device scc device uart device uart_z8530 +device iflib + # Ethernet hardware device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet Family diff --git a/sys/powerpc/conf/MPC85XX b/sys/powerpc/conf/MPC85XX index cdf48ddbb6f4..402107ba9868 100644 --- a/sys/powerpc/conf/MPC85XX +++ b/sys/powerpc/conf/MPC85XX @@ -74,6 +74,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device ether diff --git a/sys/powerpc/conf/MPC85XXSPE b/sys/powerpc/conf/MPC85XXSPE index 5d7dc3d5fd4e..08b1eee4342f 100644 --- a/sys/powerpc/conf/MPC85XXSPE +++ b/sys/powerpc/conf/MPC85XXSPE @@ -74,6 +74,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device ether diff --git a/sys/powerpc/conf/QORIQ64 b/sys/powerpc/conf/QORIQ64 index 57cd4437aff7..4493c30d5afa 100644 --- a/sys/powerpc/conf/QORIQ64 +++ b/sys/powerpc/conf/QORIQ64 @@ -81,6 +81,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device dpaa diff --git a/sys/powerpc/conf/dpaa/DPAA b/sys/powerpc/conf/dpaa/DPAA index d7e784731cbc..2cbc908ec120 100644 --- a/sys/powerpc/conf/dpaa/DPAA +++ b/sys/powerpc/conf/dpaa/DPAA @@ -74,6 +74,7 @@ device sdhci # Network devices device miibus # MII bus support +device iflib device em diff --git a/sys/powerpc/powernv/opal.h b/sys/powerpc/powernv/opal.h index fafeec390e68..043b61a291a3 100644 --- a/sys/powerpc/powernv/opal.h +++ b/sys/powerpc/powernv/opal.h @@ -82,6 +82,20 @@ int opal_call(uint64_t token, ...); #define OPAL_INT_SET_MFRR 125 #define OPAL_PCI_TCE_KILL 126 #define OPAL_XIVE_RESET 128 +#define OPAL_XIVE_GET_IRQ_INFO 129 +#define OPAL_XIVE_GET_IRQ_CONFIG 130 +#define OPAL_XIVE_SET_IRQ_CONFIG 131 +#define OPAL_XIVE_GET_QUEUE_INFO 132 +#define OPAL_XIVE_SET_QUEUE_INFO 133 +#define OPAL_XIVE_DONATE_PAGE 134 +#define OPAL_XIVE_ALLOCATE_VP_BLOCK 135 +#define OPAL_XIVE_FREE_VP_BLOCK 136 +#define OPAL_XIVE_GET_VP_INFO 137 +#define OPAL_XIVE_SET_VP_INFO 138 +#define OPAL_XIVE_ALLOCATE_IRQ 139 +#define OPAL_XIVE_FREE_IRQ 140 +#define OPAL_XIVE_SYNC 141 +#define OPAL_XIVE_DUMP 142 #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_SENSOR_READ_U64 162 #define OPAL_SENSOR_GROUP_ENABLE 163 @@ -114,6 +128,18 @@ int opal_call(uint64_t token, ...); #define OPAL_BUSY_EVENT -12 #define OPAL_ASYNC_COMPLETION -15 #define OPAL_EMPTY -16 +#define OPAL_XIVE_PROVISIONING -31 +#define OPAL_XIVE_FREE_ACTIVE -32 + +#define OPAL_XIVE_XICS_MODE_EMU 0 +#define OPAL_XIVE_XICS_MODE_EXP 1 + +#define OPAL_XIVE_VP_ENABLED 0x00000001 +#define OPAL_XIVE_VP_SINGLE_ESCALATION 0x00000002 + +#define OPAL_XIVE_EQ_ENABLED 0x00000001 +#define OPAL_XIVE_EQ_ALWAYS_NOTIFY 0x00000002 +#define OPAL_XIVE_EQ_ESCALATE 0x00000004 struct opal_msg { uint32_t msg_type; diff --git a/sys/powerpc/powernv/platform_powernv.c b/sys/powerpc/powernv/platform_powernv.c index afcb3d5b90b8..1291eb02e4e5 100644 --- a/sys/powerpc/powernv/platform_powernv.c +++ b/sys/powerpc/powernv/platform_powernv.c @@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$"); extern void *ap_pcpu; #endif -extern void xicp_smp_cpu_startup(void); +void (*powernv_smp_ap_extra_init)(void); + static int powernv_probe(platform_t); static int powernv_attach(platform_t); void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, @@ -473,7 +474,8 @@ static void powernv_smp_ap_init(platform_t platform) { - xicp_smp_cpu_startup(); + if (powernv_smp_ap_extra_init != NULL) + powernv_smp_ap_extra_init(); } static void diff --git a/sys/powerpc/powernv/xive.c b/sys/powerpc/powernv/xive.c new file mode 100644 index 000000000000..0c1406199ac5 --- /dev/null +++ b/sys/powerpc/powernv/xive.c @@ -0,0 +1,764 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2019 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_platform.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/endian.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#ifdef POWERNV +#include <powerpc/powernv/opal.h> +#endif + +#include "pic_if.h" + +#define XIVE_PRIORITY 7 /* Random non-zero number */ +#define MAX_XIVE_IRQS (1<<24) /* 24-bit XIRR field */ + +/* Registers */ +#define XIVE_TM_QW1_OS 0x010 /* Guest OS registers */ +#define XIVE_TM_QW2_HV_POOL 0x020 /* Hypervisor pool registers */ +#define XIVE_TM_QW3_HV 0x030 /* Hypervisor registers */ + +#define XIVE_TM_NSR 0x00 +#define XIVE_TM_CPPR 0x01 +#define XIVE_TM_IPB 0x02 +#define XIVE_TM_LSMFB 0x03 +#define XIVE_TM_ACK_CNT 0x04 +#define XIVE_TM_INC 0x05 +#define XIVE_TM_AGE 0x06 +#define XIVE_TM_PIPR 0x07 + +#define TM_WORD0 0x0 +#define TM_WORD2 0x8 +#define TM_QW2W2_VP 0x80000000 + +#define XIVE_TM_SPC_ACK 0x800 +#define TM_QW3NSR_HE_SHIFT 14 +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define XIVE_TM_SPC_PULL_POOL_CTX 0x828 + +#define XIVE_IRQ_LOAD_EOI 0x000 +#define XIVE_IRQ_STORE_EOI 0x400 +#define XIVE_IRQ_PQ_00 0xc00 +#define XIVE_IRQ_PQ_01 0xd00 + +#define XIVE_IRQ_VAL_P 0x02 +#define XIVE_IRQ_VAL_Q 0x01 + +struct xive_softc; +struct xive_irq; + +extern void (*powernv_smp_ap_extra_init)(void); + +/* Private support */ +static void xive_setup_cpu(void); +static void xive_smp_cpu_startup(void); +static void xive_init_irq(struct xive_irq *irqd, u_int irq); +static struct xive_irq *xive_configure_irq(u_int irq); +static int xive_provision_page(struct xive_softc *sc); + + +/* Interfaces */ +static int xive_probe(device_t); +static int xive_attach(device_t); +static int xics_probe(device_t); +static int xics_attach(device_t); + +static void xive_bind(device_t, u_int, cpuset_t, void **); +static void xive_dispatch(device_t, struct trapframe *); +static void xive_enable(device_t, u_int, u_int, void **); +static void xive_eoi(device_t, u_int, void *); +static void xive_ipi(device_t, u_int); +static void xive_mask(device_t, u_int, void *); +static void xive_unmask(device_t, u_int, void *); +static void xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol); + +static device_method_t xive_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xive_probe), + DEVMETHOD(device_attach, xive_attach), + + /* PIC interface */ + DEVMETHOD(pic_bind, xive_bind), + DEVMETHOD(pic_dispatch, xive_dispatch), + DEVMETHOD(pic_enable, xive_enable), + DEVMETHOD(pic_eoi, xive_eoi), + DEVMETHOD(pic_ipi, xive_ipi), + DEVMETHOD(pic_mask, xive_mask), + DEVMETHOD(pic_unmask, xive_unmask), + DEVMETHOD(pic_translate_code, xive_translate_code), + + DEVMETHOD_END +}; + +static device_method_t xics_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xics_probe), + DEVMETHOD(device_attach, xics_attach), + + DEVMETHOD_END +}; + +struct xive_softc { + struct mtx sc_mtx; + struct resource *sc_mem; + vm_size_t sc_prov_page_size; + uint32_t sc_offset; +}; + +struct xive_queue { + uint32_t *q_page; + uint32_t *q_eoi_page; + uint32_t q_toggle; + uint32_t q_size; + uint32_t q_index; + uint32_t q_mask; +}; + +struct xive_irq { + uint32_t girq; + uint32_t lirq; + uint64_t vp; + uint64_t flags; +#define OPAL_XIVE_IRQ_EOI_VIA_FW 0x00000020 +#define OPAL_XIVE_IRQ_MASK_VIA_FW 0x00000010 +#define OPAL_XIVE_IRQ_SHIFT_BUG 0x00000008 +#define OPAL_XIVE_IRQ_LSI 0x00000004 +#define OPAL_XIVE_IRQ_STORE_EOI 0x00000002 +#define OPAL_XIVE_IRQ_TRIGGER_PAGE 0x00000001 + uint8_t prio; + vm_offset_t eoi_page; + vm_offset_t trig_page; + vm_size_t esb_size; + int chip; +}; + +struct xive_cpu { + uint64_t vp; + uint64_t flags; + struct xive_irq ipi_data; + struct xive_queue queue; /* We only use a single queue for now. */ + uint64_t cam; + uint32_t chip; +}; + +static driver_t xive_driver = { + "xive", + xive_methods, + sizeof(struct xive_softc) +}; + +static driver_t xics_driver = { + "xivevc", + xics_methods, + 0 +}; + +static devclass_t xive_devclass; +static devclass_t xics_devclass; + +EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, xive_devclass, 0, 0, + BUS_PASS_INTERRUPT-1); +EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, xics_devclass, 0, 0, + BUS_PASS_INTERRUPT); + +MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory"); + +DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data); + +static int xive_ipi_vector = -1; + +/* + * XIVE Exploitation mode driver. + * + * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation + * mode, and "Exploitation mode". XICS emulation mode is compatible with the + * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate + * hypervisor calls and memory accesses. Exploitation mode gives us raw access + * to the XIVE MMIO, improving performance significantly. + * + * The XIVE controller is a very bizarre interrupt controller. It uses queues + * in memory to pass interrupts around, and maps itself into 512GB of physical + * device address space, giving each interrupt in the system one or more pages + * of address space. An IRQ is tied to a virtual processor, which could be a + * physical CPU thread, or a guest CPU thread (LPAR running on a physical + * thread). Thus, the controller can route interrupts directly to guest OSes + * bypassing processing by the hypervisor, thereby improving performance of the + * guest OS. + * + * An IRQ, in addition to being tied to a virtual processor, has one or two + * page mappings: an EOI page, and an optional trigger page. The trigger page + * could be the same as the EOI page. Level-sensitive interrupts (LSIs) don't + * have a trigger page, as they're external interrupts controlled by physical + * lines. MSIs and IPIs have trigger pages. An IPI is really just another IRQ + * in the XIVE, which is triggered by software. + * + * An interesting behavior of the XIVE controller is that oftentimes the + * contents of an address location don't actually matter, but the direction of + * the action is the signifier (read vs write), and the address is significant. + * Hence, masking and unmasking an interrupt is done by reading different + * addresses in the EOI page, and triggering an interrupt consists of writing to + * the trigger page. + * + * Additionally, the MMIO region mapped is CPU-sensitive, just like the + * per-processor register space (private access) in OpenPIC. In order for a CPU + * to receive interrupts it must itself configure its CPPR (Current Processor + * Priority Register), it cannot be set by any other processor. This + * necessitates the xive_smp_cpu_startup() function. + * + * Queues are pages of memory, sized powers-of-two, that are shared with the + * XIVE. The XIVE writes into the queue with an alternating polarity bit, which + * flips when the queue wraps. + */ + +/* + * Offset-based read/write interfaces. + */ +static uint16_t +xive_read_2(struct xive_softc *sc, bus_size_t offset) +{ + + return (bus_read_2(sc->sc_mem, sc->sc_offset + offset)); +} + +static void +xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val) +{ + + bus_write_1(sc->sc_mem, sc->sc_offset + offset, val); +} + +/* EOI and Trigger page access interfaces. */ +static uint64_t +xive_read_mmap8(vm_offset_t addr) +{ + return (*(volatile uint64_t *)addr); +} + +static void +xive_write_mmap8(vm_offset_t addr, uint64_t val) +{ + *(uint64_t *)(addr) = val; +} + + +/* Device interfaces. */ +static int +xive_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine"); + + /* Make sure we always win against the xicp driver. */ + return (BUS_PROBE_DEFAULT); +} + +static int +xics_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine Root"); + return (BUS_PROBE_DEFAULT); +} + +static int +xive_attach(device_t dev) +{ + struct xive_softc *sc = device_get_softc(dev); + struct xive_cpu *xive_cpud; + phandle_t phandle = ofw_bus_get_node(dev); + int64_t vp_block; + int error; + int rid; + int i, order; + uint64_t vp_id; + int64_t ipi_irq; + + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP); + + error = OF_getencprop(phandle, "ibm,xive-provision-page-size", + (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size)); + + rid = 1; /* Get the Hypervisor-level register set. */ + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + sc->sc_offset = XIVE_TM_QW3_HV; + + mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF); + + order = fls(mp_maxid + (mp_maxid - 1)) - 1; + + do { + vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order); + if (vp_block == OPAL_BUSY) + DELAY(10); + else if (vp_block == OPAL_XIVE_PROVISIONING) + xive_provision_page(sc); + else + break; + } while (1); + + if (vp_block < 0) { + device_printf(dev, + "Unable to allocate VP block. Opal error %d\n", + (int)vp_block); + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem); + return (ENXIO); + } + + /* + * Set up the VPs. Try to do as much as we can in attach, to lessen + * what's needed at AP spawn time. + */ + CPU_FOREACH(i) { + vp_id = pcpu_find(i)->pc_hwref; + + xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data); + xive_cpud->vp = vp_id + vp_block; + opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL, + vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip)); + + /* Allocate the queue page and populate the queue state data. */ + xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE, + M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + xive_cpud->queue.q_size = 1 << PAGE_SHIFT; + xive_cpud->queue.q_mask = + ((xive_cpud->queue.q_size / sizeof(int)) - 1); + xive_cpud->queue.q_toggle = 0; + xive_cpud->queue.q_index = 0; + do { + error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp, + OPAL_XIVE_VP_ENABLED, 0); + } while (error == OPAL_BUSY); + error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id, + XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT, + OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED); + + do { + ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ, + xive_cpud->chip); + } while (ipi_irq == OPAL_BUSY); + + if (ipi_irq < 0) + device_printf(root_pic, + "Failed allocating IPI. OPAL error %d\n", + (int)ipi_irq); + else { + xive_init_irq(&xive_cpud->ipi_data, ipi_irq); + xive_cpud->ipi_data.vp = vp_id; + xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS; + opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq, + xive_cpud->ipi_data.vp, XIVE_PRIORITY, + MAX_XIVE_IRQS); + } + } + + powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS, + 1 /* Number of IPIs */, FALSE); + root_pic = dev; + + xive_setup_cpu(); + powernv_smp_ap_extra_init = xive_smp_cpu_startup; + + return (0); +} + +static int +xics_attach(device_t dev) +{ + phandle_t phandle = ofw_bus_get_node(dev); + + /* The XIVE (root PIC) will handle all our interrupts */ + powerpc_register_pic(root_pic, OF_xref_from_node(phandle), + MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE); + + return (0); +} + +/* + * PIC I/F methods. + */ + +static void +xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv) +{ + struct xive_irq *irqd; + int cpu; + int ncpus, i, error; + + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* + * This doesn't appear to actually support affinity groups, so pick a + * random CPU. + */ + ncpus = 0; + CPU_FOREACH(cpu) + if (CPU_ISSET(cpu, &cpumask)) ncpus++; + + i = mftb() % ncpus; + ncpus = 0; + CPU_FOREACH(cpu) { + if (!CPU_ISSET(cpu, &cpumask)) + continue; + if (ncpus == i) + break; + ncpus++; + } + + opal_call(OPAL_XIVE_SYNC); + + irqd->vp = pcpu_find(cpu)->pc_hwref; + error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp, + XIVE_PRIORITY, irqd->lirq); + + if (error < 0) + panic("Cannot bind interrupt %d to CPU %d", irq, cpu); + + xive_eoi(dev, irq, irqd); +} + +/* Read the next entry in the queue page and update the index. */ +static int +xive_read_eq(struct xive_queue *q) +{ + uint32_t i = be32toh(q->q_page[q->q_index]); + + /* Check validity, using current queue polarity. */ + if ((i >> 31) == q->q_toggle) + return (0); + + q->q_index = (q->q_index + 1) & q->q_mask; + + if (q->q_index == 0) + q->q_toggle ^= 1; + + return (i & 0x7fffffff); +} + +static void +xive_dispatch(device_t dev, struct trapframe *tf) +{ + struct xive_softc *sc; + struct xive_cpu *xive_cpud; + uint32_t vector; + uint16_t ack; + uint8_t cppr, he; + + sc = device_get_softc(dev); + + for (;;) { + ack = xive_read_2(sc, XIVE_TM_SPC_ACK); + cppr = (ack & 0xff); + + he = ack >> TM_QW3NSR_HE_SHIFT; + + if (he == TM_QW3_NSR_HE_NONE) + break; + switch (he) { + case TM_QW3_NSR_HE_NONE: + goto end; + case TM_QW3_NSR_HE_POOL: + case TM_QW3_NSR_HE_LSI: + device_printf(dev, + "Unexpected interrupt he type: %d\n", he); + goto end; + case TM_QW3_NSR_HE_PHYS: + break; + } + + xive_cpud = DPCPU_PTR(xive_cpu_data); + xive_write_1(sc, XIVE_TM_CPPR, cppr); + + for (;;) { + vector = xive_read_eq(&xive_cpud->queue); + + if (vector == 0) + break; + + if (vector == MAX_XIVE_IRQS) + vector = xive_ipi_vector; + + powerpc_dispatch_intr(vector, tf); + } + } +end: + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +static void +xive_enable(device_t dev, u_int irq, u_int vector, void **priv) +{ + struct xive_irq *irqd; + cell_t status, cpu; + + if (irq == MAX_XIVE_IRQS) { + if (xive_ipi_vector == -1) + xive_ipi_vector = vector; + return; + } + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* Bind to this CPU to start */ + cpu = PCPU_GET(hwref); + irqd->lirq = vector; + + for (;;) { + status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu, + XIVE_PRIORITY, vector); + if (status != OPAL_BUSY) + break; + DELAY(10); + } + + if (status != 0) + panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq, + cpu, status); + + xive_unmask(dev, irq, *priv); +} + +static void +xive_eoi(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + struct xive_cpu *cpud; + uint8_t eoi_val; + + if (irq == MAX_XIVE_IRQS) { + cpud = DPCPU_PTR(xive_cpu_data); + rirq = &cpud->ipi_data; + } else + rirq = priv; + + if (rirq->flags & OPAL_XIVE_IRQ_EOI_VIA_FW) + opal_call(OPAL_INT_EOI, irq); + else if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI) + xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0); + else if (rirq->flags & OPAL_XIVE_IRQ_LSI) + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI); + else { + eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); + if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0) + xive_write_mmap8(rirq->trig_page, 0); + } +} + +static void +xive_ipi(device_t dev, u_int cpu) +{ + struct xive_cpu *xive_cpud; + + xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data); + + if (xive_cpud->ipi_data.trig_page == 0) + return; + xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0); +} + +static void +xive_mask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + /* Never mask IPIs */ + if (irq == MAX_XIVE_IRQS) + return; + + rirq = priv; + + if (!(rirq->flags & OPAL_XIVE_IRQ_LSI)) + return; + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01); +} + +static void +xive_unmask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + rirq = priv; + + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); +} + +static void +xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol) +{ + switch (code) { + case 0: + /* L to H edge */ + *trig = INTR_TRIGGER_EDGE; + *pol = INTR_POLARITY_HIGH; + break; + case 1: + /* Active L level */ + *trig = INTR_TRIGGER_LEVEL; + *pol = INTR_POLARITY_LOW; + break; + default: + *trig = INTR_TRIGGER_CONFORM; + *pol = INTR_POLARITY_CONFORM; + } +} + +/* Private functions. */ +/* + * Setup the current CPU. Called by the BSP at driver attachment, and by each + * AP at wakeup (via xive_smp_cpu_startup()). + */ +static void +xive_setup_cpu(void) +{ + struct xive_softc *sc; + struct xive_cpu *cpup; + uint32_t val; + + cpup = DPCPU_PTR(xive_cpu_data); + + sc = device_get_softc(root_pic); + + val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2); + if (val & TM_QW2W2_VP) + bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX); + + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff); + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2, + TM_QW2W2_VP | cpup->cam); + + xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data); + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +/* Populate an IRQ structure, mapping the EOI and trigger pages. */ +static void +xive_init_irq(struct xive_irq *irqd, u_int irq) +{ + uint64_t eoi_phys, trig_phys; + uint32_t esb_shift; + + opal_call(OPAL_XIVE_GET_IRQ_INFO, irq, + vtophys(&irqd->flags), vtophys(&eoi_phys), + vtophys(&trig_phys), vtophys(&esb_shift), + vtophys(&irqd->chip)); + + irqd->girq = irq; + irqd->esb_size = 1 << esb_shift; + irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size); + + if (eoi_phys == trig_phys) + irqd->trig_page = irqd->eoi_page; + else if (trig_phys != 0) + irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys, + irqd->esb_size); + else + irqd->trig_page = 0; + + opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp), + vtophys(&irqd->prio), vtophys(&irqd->lirq)); +} + +/* Allocate an IRQ struct before populating it. */ +static struct xive_irq * +xive_configure_irq(u_int irq) +{ + struct xive_irq *irqd; + + irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK); + + xive_init_irq(irqd, irq); + + return (irqd); +} + +/* + * Part of the OPAL API. OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages, + * provisioned through this call. + */ +static int +xive_provision_page(struct xive_softc *sc) +{ + void *prov_page; + int error; + + do { + prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0, + 0, BUS_SPACE_MAXADDR, + sc->sc_prov_page_size, sc->sc_prov_page_size); + + error = opal_call(OPAL_XIVE_DONATE_PAGE, -1, + vtophys(prov_page)); + } while (error == OPAL_XIVE_PROVISIONING); + + return (0); +} + +/* The XIVE_TM_CPPR register must be set by each thread */ +static void +xive_smp_cpu_startup(void) +{ + + xive_setup_cpu(); +} diff --git a/sys/powerpc/pseries/xics.c b/sys/powerpc/pseries/xics.c index 4dbfcfbd30cb..fc9a82dd2b4d 100644 --- a/sys/powerpc/pseries/xics.c +++ b/sys/powerpc/pseries/xics.c @@ -61,9 +61,6 @@ __FBSDID("$FreeBSD$"); #define XICP_IPI 2 #define MAX_XICP_IRQS (1<<24) /* 24-bit XIRR field */ -#define XIVE_XICS_MODE_EMU 0 -#define XIVE_XICS_MODE_EXP 1 - static int xicp_probe(device_t); static int xicp_attach(device_t); static int xics_probe(device_t); @@ -78,7 +75,8 @@ static void xicp_mask(device_t, u_int, void *priv); static void xicp_unmask(device_t, u_int, void *priv); #ifdef POWERNV -void xicp_smp_cpu_startup(void); +extern void (*powernv_smp_ap_extra_init)(void); +static void xicp_smp_cpu_startup(void); #endif static device_method_t xicp_methods[] = { @@ -238,7 +236,7 @@ xicp_attach(device_t dev) * compatibility mode. */ sc->xics_emu = true; - opal_call(OPAL_XIVE_RESET, XIVE_XICS_MODE_EMU); + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EMU); #endif } else { sc->cpu_range[0] = 0; @@ -280,6 +278,11 @@ xicp_attach(device_t dev) 1 /* Number of IPIs */, FALSE); root_pic = dev; +#ifdef POWERNV + if (sc->xics_emu) + powernv_smp_ap_extra_init = xicp_smp_cpu_startup; +#endif + return (0); } @@ -556,7 +559,7 @@ xicp_unmask(device_t dev, u_int irq, void *priv) #ifdef POWERNV /* This is only used on POWER9 systems with the XIVE's XICS emulation. */ -void +static void xicp_smp_cpu_startup(void) { struct xicp_softc *sc; diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index 92d76773d17e..cb768e39a55c 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -181,6 +181,8 @@ device uart # Multi-uart driver #device ppi # Parallel port interface device #device vpo # Requires scbus and da +device iflib + # PCI Ethernet NICs. #device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel PRO/1000 adapter Gigabit Ethernet Card diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h index 618a5e2e1acb..2d61e953401a 100644 --- a/sys/sys/elf_common.h +++ b/sys/sys/elf_common.h @@ -762,6 +762,9 @@ typedef struct { #define NT_FREEBSD_ARCH_TAG 3 #define NT_FREEBSD_FEATURE_CTL 4 +/* NT_FREEBSD_FEATURE_CTL desc[0] bits */ +#define NT_FREEBSD_FCTL_ASLR_DISABLE 0x00000001 + /* Values for n_type. Used in core files. */ #define NT_PRSTATUS 1 /* Process status. */ #define NT_FPREGSET 2 /* Floating point registers. */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 827a01e3ba79..75217ed65c37 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -443,6 +443,7 @@ struct mbuf { #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ +#define EXT_RXRING 8 /* data in NIC receive ring */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 58bbd715d061..d16a92a3b68c 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -560,7 +560,7 @@ struct sysctl_args { char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; char old_l_[PADL_(void *)]; void * old; char old_r_[PADR_(void *)]; char oldlenp_l_[PADL_(size_t *)]; size_t * oldlenp; char oldlenp_r_[PADR_(size_t *)]; - char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)]; + char new_l_[PADL_(const void *)]; const void * new; char new_r_[PADR_(const void *)]; char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)]; }; struct mlock_args { diff --git a/sys/teken/teken.c b/sys/teken/teken.c index bc3af434e2d9..f4e4c8a57a2d 100644 --- a/sys/teken/teken.c +++ b/sys/teken/teken.c @@ -58,6 +58,7 @@ #define TS_CONS25 0x0040 /* cons25 emulation. */ #define TS_INSTRING 0x0080 /* Inside string. */ #define TS_CURSORKEYS 0x0100 /* Cursor keys mode. */ +#define TS_CONS25KEYS 0x0400 /* Fuller cons25 emul (fix function keys). */ /* Character that blanks a cell. */ #define BLANK ' ' @@ -411,7 +412,7 @@ void teken_set_cons25(teken_t *t) { - t->t_stateflags |= TS_CONS25; + t->t_stateflags |= TS_CONS25 | TS_CONS25KEYS; } /* @@ -722,6 +723,9 @@ teken_get_sequence(const teken_t *t, unsigned int k) { /* Cons25 mode. */ + if ((t->t_stateflags & (TS_CONS25 | TS_CONS25KEYS)) == + (TS_CONS25 | TS_CONS25KEYS)) + return (NULL); /* Don't override good kbd(4) strings. */ if (t->t_stateflags & TS_CONS25 && k < sizeof special_strings_cons25 / sizeof(char *)) return (special_strings_cons25[k]); diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 63196404e53d..c2571f1c4153 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -623,7 +623,7 @@ static int hash_alloc(struct uma_hash *hash) { int oldsize; - int alloc; + size_t alloc; oldsize = hash->uh_hashsize; @@ -2844,7 +2844,7 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max) return (NULL); bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, - max, domain, flags); + MIN(max, bucket->ub_entries), domain, flags); /* * Initialize the memory if necessary. diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index 40f8b01b1213..91877499ca6c 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rman.h> #include <sys/interrupt.h> +#include <machine/md_var.h> #include <machine/vmparam.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -269,11 +270,7 @@ nexus_init_resources(void) panic("nexus_init_resources port_rman"); mem_rman.rm_start = 0; -#ifndef PAE - mem_rman.rm_end = BUS_SPACE_MAXADDR; -#else - mem_rman.rm_end = ((1ULL << cpu_maxphyaddr) - 1); -#endif + mem_rman.rm_end = cpu_getmaxphyaddr(); mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) @@ -787,6 +784,7 @@ ram_attach(device_t dev) { struct bios_smap *smapbase, *smap, *smapend; struct resource *res; + rman_res_t length; vm_paddr_t *p; caddr_t kmdp; uint32_t smapsize; @@ -807,16 +805,12 @@ ram_attach(device_t dev) if (smap->type != SMAP_TYPE_MEMORY || smap->length == 0) continue; -#ifdef __i386__ - /* - * Resources use long's to track resources, so - * we can't include memory regions above 4GB. - */ - if (smap->base > ~0ul) + if (smap->base > mem_rman.rm_end) continue; -#endif + length = smap->base + smap->length > mem_rman.rm_end ? + mem_rman.rm_end - smap->base : smap->length; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, - smap->base, smap->length); + smap->base, length); if (error) panic( "ram_attach: resource %d failed set with %d", @@ -841,16 +835,12 @@ ram_attach(device_t dev) * segment is 0. */ for (rid = 0, p = dump_avail; p[1] != 0; rid++, p += 2) { -#ifdef PAE - /* - * Resources use long's to track resources, so we can't - * include memory regions above 4GB. - */ - if (p[0] > ~0ul) + if (p[0] > mem_rman.rm_end) break; -#endif + length = (p[1] > mem_rman.rm_end ? mem_rman.rm_end : p[1]) - + p[0]; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, p[0], - p[1] - p[0]); + length); if (error) panic("ram_attach: resource %d failed set with %d", rid, error); diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 6983a20ecf82..a0a0e4b75153 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <x86/apicreg.h> #include <xen/xen-os.h> +#include <xen/error.h> #include <xen/features.h> #include <xen/gnttab.h> #include <xen/hypervisor.h> @@ -88,6 +89,12 @@ int xen_vector_callback_enabled; */ uint32_t hvm_start_flags; +/** + * Signal whether the vector injected for the event channel upcall requires to + * be EOI'ed on the local APIC. + */ +bool xen_evtchn_needs_ack; + /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); @@ -223,6 +230,19 @@ xen_hvm_init_shared_info_page(void) panic("HYPERVISOR_memory_op failed"); } +static int +set_percpu_callback(unsigned int vcpu) +{ + struct xen_hvm_evtchn_upcall_vector vec; + int error; + + vec.vcpu = vcpu; + vec.vector = IDT_EVTCHN; + error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); + + return (error != 0 ? xen_translate_error(error) : 0); +} + /* * Tell the hypervisor how to contact us for event channel callbacks. */ @@ -240,12 +260,20 @@ xen_hvm_set_callback(device_t dev) if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { int error; - xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); + error = set_percpu_callback(0); + if (error == 0) { + xen_evtchn_needs_ack = true; + /* Trick toolstack to think we are enlightened */ + xhp.value = 1; + } else + xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); if (error == 0) { xen_vector_callback_enabled = 1; return; - } + } else if (xen_evtchn_needs_ack) + panic("Unable to setup fake HVM param: %d", error); + printf("Xen HVM callback vector registration failed (%d). " "Falling back to emulated device interrupt\n", error); } @@ -360,6 +388,7 @@ xen_hvm_init(enum xen_hvm_init_type init_type) } xen_vector_callback_enabled = 0; + xen_evtchn_needs_ack = false; xen_hvm_set_callback(NULL); /* @@ -427,6 +456,20 @@ xen_hvm_cpu_init(void) PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? regs[1] : PCPU_GET(acpi_id)); + if (xen_evtchn_needs_ack && !IS_BSP()) { + /* + * Setup the per-vpcu event channel upcall vector. This is only + * required when using the new HVMOP_set_evtchn_upcall_vector + * hypercall, which allows using a different vector for each + * vCPU. Note that FreeBSD uses the same vector for all vCPUs + * because it's not dynamically allocated. + */ + rc = set_percpu_callback(PCPU_GET(vcpu_id)); + if (rc != 0) + panic("Event channel upcall vector setup failed: %d", + rc); + } + /* * Set the vCPU info. * diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index d366a61dc9ec..f230794d3a50 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <machine/xen/xen-os.h> #include <xen/xen-os.h> +#include <xen/hvm.h> #include <xen/hypervisor.h> #include <xen/xen_intr.h> #include <xen/evtchn/evtchnvar.h> @@ -620,6 +621,10 @@ xen_intr_handle_upcall(struct trapframe *trap_frame) l1 &= ~(1UL << l1i); } } + + if (xen_evtchn_needs_ack) + lapic_eoi(); + critical_exit(); } diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h index bc7518d26575..e34a552dc714 100644 --- a/sys/xen/hvm.h +++ b/sys/xen/hvm.h @@ -104,5 +104,6 @@ void xen_hvm_suspend(void); void xen_hvm_resume(bool suspend_cancelled); extern uint32_t hvm_start_flags; +extern bool xen_evtchn_needs_ack; #endif /* __XEN_HVM_H__ */ diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index aadf1e84088d..2bdf98830088 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -317,10 +317,19 @@ static const char *p_flags[] = { "PF_X|PF_W|PF_R" }; +#define NT_ELEM(x) [x] = #x, +static const char *nt_types[] = { + "", + NT_ELEM(NT_FREEBSD_ABI_TAG) + NT_ELEM(NT_FREEBSD_NOINIT_TAG) + NT_ELEM(NT_FREEBSD_ARCH_TAG) + NT_ELEM(NT_FREEBSD_FEATURE_CTL) +}; + /* http://www.sco.com/developers/gabi/latest/ch4.sheader.html#sh_type */ static const char * sh_types(uint64_t machine, uint64_t sht) { - static char unknown_buf[64]; + static char unknown_buf[64]; if (sht < 0x60000000) { switch (sht) { @@ -1061,19 +1070,26 @@ elf_print_note(Elf32_Ehdr *e, void *sh) u_int32_t namesz; u_int32_t descsz; u_int32_t desc; + u_int32_t type; char *n, *s; + const char *nt_type; offset = elf_get_off(e, sh, SH_OFFSET); size = elf_get_size(e, sh, SH_SIZE); name = elf_get_word(e, sh, SH_NAME); n = (char *)e + offset; fprintf(out, "\nnote (%s):\n", shstrtab + name); - while (n < ((char *)e + offset + size)) { + while (n < ((char *)e + offset + size)) { namesz = elf_get_word(e, n, N_NAMESZ); descsz = elf_get_word(e, n, N_DESCSZ); - s = n + sizeof(Elf_Note); - desc = elf_get_word(e, n + sizeof(Elf_Note) + namesz, 0); - fprintf(out, "\t%s %d\n", s, desc); + type = elf_get_word(e, n, N_TYPE); + if (type < nitems(nt_types) && nt_types[type] != NULL) + nt_type = nt_types[type]; + else + nt_type = "Unknown type"; + s = n + sizeof(Elf_Note); + desc = elf_get_word(e, n + sizeof(Elf_Note) + namesz, 0); + fprintf(out, "\t%s %d (%s)\n", s, desc, nt_type); n += sizeof(Elf_Note) + namesz + descsz; } } diff --git a/usr.bin/shar/shar.1 b/usr.bin/shar/shar.1 index 432ae6fb86bc..c3153821250c 100644 --- a/usr.bin/shar/shar.1 +++ b/usr.bin/shar/shar.1 @@ -28,7 +28,7 @@ .\" @(#)shar.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd June 6, 1993 +.Dd January 31, 2019 .Dt SHAR 1 .Os .Sh NAME @@ -103,5 +103,5 @@ Archives produced using this implementation of .Nm may be easily examined with the command: .Bd -literal -offset indent -egrep -v '^[X#]' shar.file +egrep -av '^[X#]' shar.file .Ed diff --git a/usr.bin/xinstall/tests/install_test.sh b/usr.bin/xinstall/tests/install_test.sh index 4332f9b8268c..92044f34c20b 100755 --- a/usr.bin/xinstall/tests/install_test.sh +++ b/usr.bin/xinstall/tests/install_test.sh @@ -377,6 +377,29 @@ mkdir_simple_body() { atf_check install -d dir1/dir2/dir3 } +atf_test_case symbolic_link_relative_absolute_common +symbolic_link_relative_absolute_common_head() { + atf_set "descr" "Verify -l rs with absolute paths having common components" +} +symbolic_link_relative_absolute_common_body() { + filename=foo.so + src_path=lib + src_path_prefixed=$PWD/$src_path + dest_path=$PWD/libexec/ + src_file=$src_path_prefixed/$filename + dest_file=$dest_path/$filename + + atf_check mkdir $src_path_prefixed $dest_path + atf_check touch $src_file + atf_check install -l sr $src_file $dest_path + + dest_path_relative=$(readlink $dest_file) + src_path_relative="../lib/$filename" + if [ "$src_path_relative" != "$dest_path_relative" ]; then + atf_fail "unexpected symlink contents ('$src_path_relative' != '$dest_path_relative')" + fi +} + atf_init_test_cases() { atf_add_test_case copy_to_nonexistent atf_add_test_case copy_to_nonexistent_safe @@ -415,5 +438,6 @@ atf_init_test_cases() { atf_add_test_case symbolic_link_relative_absolute_source_and_dest1 atf_add_test_case symbolic_link_relative_absolute_source_and_dest1_double_slash atf_add_test_case symbolic_link_relative_absolute_source_and_dest2 + atf_add_test_case symbolic_link_relative_absolute_common atf_add_test_case mkdir_simple } diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 880766b34623..d9aca00d8efc 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -673,7 +673,7 @@ makelink(const char *from_name, const char *to_name, } if (dolink & LN_RELATIVE) { - char *to_name_copy, *cp, *d, *s; + char *to_name_copy, *cp, *d, *ld, *ls, *s; if (*from_name != '/') { /* this is already a relative link */ @@ -709,8 +709,19 @@ makelink(const char *from_name, const char *to_name, free(to_name_copy); /* Trim common path components. */ - for (s = src, d = dst; *s == *d; s++, d++) + ls = ld = NULL; + for (s = src, d = dst; *s == *d; ls = s, ld = d, s++, d++) continue; + /* + * If we didn't end after a directory separator, then we've + * falsely matched the last component. For example, if one + * invoked install -lrs /lib/foo.so /libexec/ then the source + * would terminate just after the separator while the + * destination would terminate in the middle of 'libexec', + * leading to a full directory getting falsely eaten. + */ + if ((ls != NULL && *ls != '/') || (ld != NULL && *ld != '/')) + s--, d--; while (*s != '/') s--, d--; diff --git a/usr.sbin/bluetooth/sdpd/Makefile b/usr.sbin/bluetooth/sdpd/Makefile index c47441f02cd0..df443fca4f4e 100644 --- a/usr.sbin/bluetooth/sdpd/Makefile +++ b/usr.sbin/bluetooth/sdpd/Makefile @@ -3,7 +3,8 @@ PROG= sdpd MAN= sdpd.8 -SRCS= bgd.c dun.c ftrn.c gn.c irmc.c irmc_command.c lan.c log.c \ +SRCS= audio_sink.c audio_source.c \ + bgd.c dun.c ftrn.c gn.c irmc.c irmc_command.c lan.c log.c \ main.c nap.c opush.c panu.c profile.c provider.c sar.c scr.c \ sd.c server.c sp.c srr.c ssar.c ssr.c sur.c uuid.c diff --git a/usr.sbin/bluetooth/sdpd/audio_sink.c b/usr.sbin/bluetooth/sdpd/audio_sink.c new file mode 100644 index 000000000000..0ffdfa88146b --- /dev/null +++ b/usr.sbin/bluetooth/sdpd/audio_sink.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Hans Petter Selasky <hselasky@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/queue.h> +#define L2CAP_SOCKET_CHECKED +#include <bluetooth.h> +#include <sdp.h> +#include <string.h> +#include "profile.h" +#include "provider.h" + +static int32_t +audio_sink_profile_create_service_class_id_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t service_classes[] = { + SDP_SERVICE_CLASS_AUDIO_SINK, + }; + + return (common_profile_create_service_class_id_list( + buf, eob, + (uint8_t const *)service_classes, + sizeof(service_classes))); +} + +static int32_t +audio_sink_profile_create_protocol_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_sink_profile_p audio_sink = (sdp_audio_sink_profile_p) provider->data; + + if (buf + 18 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(16, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_L2CAP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->psm, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_AVDTP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->protover, buf); + + return (18); +} + +static int32_t +audio_sink_profile_create_browse_group_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + + if (buf + 5 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(3, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_SERVICE_CLASS_PUBLIC_BROWSE_GROUP, buf); + + return (5); +} + +static int32_t +audio_sink_profile_create_bluetooth_profile_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t profile_descriptor_list[] = { + SDP_SERVICE_CLASS_ADVANCED_AUDIO_DISTRIBUTION, + 0x0100 + }; + + return (common_profile_create_bluetooth_profile_descriptor_list( + buf, eob, + (uint8_t const *)profile_descriptor_list, + sizeof(profile_descriptor_list))); +} + +static int32_t +audio_sink_profile_create_service_name( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const char service_name[] = "Audio SNK"; + + return (common_profile_create_string8( + buf, eob, + (uint8_t const *)service_name, strlen(service_name))); +} + +static int32_t +audio_sink_create_supported_features( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_sink_profile_p audio_sink = (sdp_audio_sink_profile_p) provider->data; + + if (buf + 3 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->features, buf); + + return (3); +} + +static int32_t +audio_sink_profile_valid(uint8_t const *data, uint32_t datalen) +{ + + if (datalen < sizeof(struct sdp_audio_sink_profile)) + return (0); + return (1); +} + +static const attr_t audio_sink_profile_attrs[] = { + {SDP_ATTR_SERVICE_RECORD_HANDLE, + common_profile_create_service_record_handle}, + {SDP_ATTR_SERVICE_CLASS_ID_LIST, + audio_sink_profile_create_service_class_id_list}, + {SDP_ATTR_PROTOCOL_DESCRIPTOR_LIST, + audio_sink_profile_create_protocol_descriptor_list}, + {SDP_ATTR_BROWSE_GROUP_LIST, + audio_sink_profile_create_browse_group_list}, + {SDP_ATTR_LANGUAGE_BASE_ATTRIBUTE_ID_LIST, + common_profile_create_language_base_attribute_id_list}, + {SDP_ATTR_BLUETOOTH_PROFILE_DESCRIPTOR_LIST, + audio_sink_profile_create_bluetooth_profile_descriptor_list}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_SERVICE_NAME_OFFSET, + audio_sink_profile_create_service_name}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_PROVIDER_NAME_OFFSET, + common_profile_create_service_provider_name}, + {SDP_ATTR_SUPPORTED_FEATURES, + audio_sink_create_supported_features}, + {} /* end entry */ +}; + +profile_t audio_sink_profile_descriptor = { + SDP_SERVICE_CLASS_AUDIO_SINK, + sizeof(sdp_audio_sink_profile_t), + audio_sink_profile_valid, + (attr_t const *const)&audio_sink_profile_attrs +}; diff --git a/usr.sbin/bluetooth/sdpd/audio_source.c b/usr.sbin/bluetooth/sdpd/audio_source.c new file mode 100644 index 000000000000..1d58a323f8cb --- /dev/null +++ b/usr.sbin/bluetooth/sdpd/audio_source.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Hans Petter Selasky <hselasky@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/queue.h> +#define L2CAP_SOCKET_CHECKED +#include <bluetooth.h> +#include <sdp.h> +#include <string.h> +#include "profile.h" +#include "provider.h" + +static int32_t +audio_source_profile_create_service_class_id_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t service_classes[] = { + SDP_SERVICE_CLASS_AUDIO_SOURCE, + }; + + return (common_profile_create_service_class_id_list( + buf, eob, + (uint8_t const *)service_classes, + sizeof(service_classes))); +} + +static int32_t +audio_source_profile_create_protocol_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_source_profile_p audio_source = (sdp_audio_source_profile_p) provider->data; + + if (buf + 18 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(16, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_L2CAP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->psm, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_AVDTP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->protover, buf); + + return (18); +} + +static int32_t +audio_source_profile_create_browse_group_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + + if (buf + 5 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(3, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_SERVICE_CLASS_PUBLIC_BROWSE_GROUP, buf); + + return (5); +} + +static int32_t +audio_source_profile_create_bluetooth_profile_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t profile_descriptor_list[] = { + SDP_SERVICE_CLASS_ADVANCED_AUDIO_DISTRIBUTION, + 0x0100 + }; + + return (common_profile_create_bluetooth_profile_descriptor_list( + buf, eob, + (uint8_t const *)profile_descriptor_list, + sizeof(profile_descriptor_list))); +} + +static int32_t +audio_source_profile_create_service_name( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const char service_name[] = "Audio SRC"; + + return (common_profile_create_string8( + buf, eob, + (uint8_t const *)service_name, strlen(service_name))); +} + +static int32_t +audio_source_create_supported_features( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_source_profile_p audio_source = (sdp_audio_source_profile_p) provider->data; + + if (buf + 3 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->features, buf); + + return (3); +} + +static int32_t +audio_source_profile_valid(uint8_t const *data, uint32_t datalen) +{ + + if (datalen < sizeof(struct sdp_audio_source_profile)) + return (0); + return (1); +} + +static const attr_t audio_source_profile_attrs[] = { + {SDP_ATTR_SERVICE_RECORD_HANDLE, + common_profile_create_service_record_handle}, + {SDP_ATTR_SERVICE_CLASS_ID_LIST, + audio_source_profile_create_service_class_id_list}, + {SDP_ATTR_PROTOCOL_DESCRIPTOR_LIST, + audio_source_profile_create_protocol_descriptor_list}, + {SDP_ATTR_BROWSE_GROUP_LIST, + audio_source_profile_create_browse_group_list}, + {SDP_ATTR_LANGUAGE_BASE_ATTRIBUTE_ID_LIST, + common_profile_create_language_base_attribute_id_list}, + {SDP_ATTR_BLUETOOTH_PROFILE_DESCRIPTOR_LIST, + audio_source_profile_create_bluetooth_profile_descriptor_list}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_SERVICE_NAME_OFFSET, + audio_source_profile_create_service_name}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_PROVIDER_NAME_OFFSET, + common_profile_create_service_provider_name}, + {SDP_ATTR_SUPPORTED_FEATURES, + audio_source_create_supported_features}, + {} /* end entry */ +}; + +profile_t audio_source_profile_descriptor = { + SDP_SERVICE_CLASS_AUDIO_SOURCE, + sizeof(sdp_audio_source_profile_t), + audio_source_profile_valid, + (attr_t const *const)&audio_source_profile_attrs +}; diff --git a/usr.sbin/bluetooth/sdpd/profile.c b/usr.sbin/bluetooth/sdpd/profile.c index d2be21e1e34c..cef9a03ceea6 100644 --- a/usr.sbin/bluetooth/sdpd/profile.c +++ b/usr.sbin/bluetooth/sdpd/profile.c @@ -48,6 +48,8 @@ profile_p profile_get_descriptor(uint16_t uuid) { + extern profile_t audio_sink_profile_descriptor; + extern profile_t audio_source_profile_descriptor; extern profile_t dun_profile_descriptor; extern profile_t ftrn_profile_descriptor; extern profile_t irmc_profile_descriptor; @@ -60,6 +62,8 @@ profile_get_descriptor(uint16_t uuid) extern profile_t panu_profile_descriptor; static const profile_p profiles[] = { + &audio_sink_profile_descriptor, + &audio_source_profile_descriptor, &dun_profile_descriptor, &ftrn_profile_descriptor, &irmc_profile_descriptor, diff --git a/usr.sbin/freebsd-update/freebsd-update.sh b/usr.sbin/freebsd-update/freebsd-update.sh index 2ed1f43fa0b1..8349fccb4301 100644 --- a/usr.sbin/freebsd-update/freebsd-update.sh +++ b/usr.sbin/freebsd-update/freebsd-update.sh @@ -2943,6 +2943,17 @@ Kernel updates have been installed. Please reboot and run cap_mkdb ${BASEDIR}/etc/login.conf fi + # Rebuild man page databases, if necessary. + for D in /usr/share/man /usr/share/openssl/man; do + if [ ! -d ${BASEDIR}/$D ]; then + continue + fi + if [ -z "$(find ${BASEDIR}/$D -type f -newer ${BASEDIR}/$D/mandoc.db)" ]; then + continue; + fi + makewhatis ${BASEDIR}/$D + done + # We've finished installing the world and deleting old files # which are not shared libraries. touch $1/worlddone diff --git a/usr.sbin/ndiscvt/inf.c b/usr.sbin/ndiscvt/inf.c index eb1015083dbb..442e7f61b1fd 100644 --- a/usr.sbin/ndiscvt/inf.c +++ b/usr.sbin/ndiscvt/inf.c @@ -62,9 +62,9 @@ static struct assign (struct assign *); static struct section *find_section (const char *); -static void dump_deviceids_pci (void); -static void dump_deviceids_pcmcia (void); -static void dump_deviceids_usb (void); +static int dump_deviceids_pci (void); +static int dump_deviceids_pcmcia (void); +static int dump_deviceids_usb (void); static void dump_pci_id (const char *); static void dump_pcmcia_id (const char *); static void dump_usb_id (const char *); @@ -85,9 +85,11 @@ inf_parse (FILE *fp, FILE *outfp) yyin = fp; yyparse(); - dump_deviceids_pci(); - dump_deviceids_pcmcia(); - dump_deviceids_usb(); + if (dump_deviceids_pci() == 0 && + dump_deviceids_pcmcia() == 0 && + dump_deviceids_usb() == 0) + return (-1); + fprintf(outfp, "#ifdef NDIS_REGVALS\n"); dump_regvals(); fprintf(outfp, "#endif /* NDIS_REGVALS */\n"); @@ -280,7 +282,7 @@ dump_usb_id(const char *s) fprintf(ofp, "\t\\\n\t{ %s, %s, ", vidstr, pidstr); } -static void +static int dump_deviceids_pci() { struct assign *manf, *dev; @@ -370,10 +372,10 @@ done: fprintf(ofp, "\n\n"); - return; + return (found); } -static void +static int dump_deviceids_pcmcia() { struct assign *manf, *dev; @@ -463,10 +465,10 @@ done: fprintf(ofp, "\n\n"); - return; + return (found); } -static void +static int dump_deviceids_usb() { struct assign *manf, *dev; @@ -556,7 +558,7 @@ done: fprintf(ofp, "\n\n"); - return; + return (found); } static void diff --git a/usr.sbin/ndiscvt/ndiscvt.c b/usr.sbin/ndiscvt/ndiscvt.c index 276a6ef7bfdd..1b7660700786 100644 --- a/usr.sbin/ndiscvt/ndiscvt.c +++ b/usr.sbin/ndiscvt/ndiscvt.c @@ -372,7 +372,8 @@ main(int argc, char *argv[]) err(1, "opening .INF file '%s' failed", inffile); - inf_parse(fp, outfp); + if (inf_parse(fp, outfp) != 0) + errx(1, "creating .INF file - no entries created, are you using the correct files?"); fclose(fp); } diff --git a/usr.sbin/rpc.statd/file.c b/usr.sbin/rpc.statd/file.c index c2207c73aebf..ed5d00cc808c 100644 --- a/usr.sbin/rpc.statd/file.c +++ b/usr.sbin/rpc.statd/file.c @@ -248,9 +248,12 @@ void init_file(const char *filename) /* Purpose: Perform SM_NOTIFY procedure at specified host Returns: TRUE if success, FALSE if failed. + Notes: Only report failure if verbose is non-zero. Caller will + only set verbose to non-zero for the first attempt to + contact the host. */ -static int notify_one_host(char *hostname) +static int notify_one_host(char *hostname, int verbose) { struct timeval timeout = { 20, 0 }; /* 20 secs timeout */ CLIENT *cli; @@ -277,7 +280,8 @@ static int notify_one_host(char *hostname) (xdrproc_t)xdr_void, &dummy, timeout) != RPC_SUCCESS) { - syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", hostname); + if (verbose) + syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", hostname); clnt_destroy(cli); return (FALSE); } @@ -346,7 +350,7 @@ void notify_hosts(void) { if (hp->notifyReqd) { - if (notify_one_host(hp->hostname)) + if (notify_one_host(hp->hostname, attempts == 0)) { hp->notifyReqd = FALSE; sync_file(); |
