diff options
Diffstat (limited to 'sys')
114 files changed, 3172 insertions, 697 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 2ab8c3b17e22..0044f27729f6 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1302,7 +1302,7 @@ static bool pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, - vm_offset_t va); + vm_offset_t va, vm_page_t m); static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, @@ -1334,7 +1334,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp); + bool remove_pt, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -5999,7 +5999,7 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, SLIST_INIT(&free); sva = trunc_2mpage(va); - pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_remove_pde(pmap, pde, sva, true, &free, lockp); if ((oldpde & pmap_global_bit(pmap)) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); @@ -6153,7 +6153,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + bool remove_pt) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6161,7 +6162,10 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte = pmap_remove_pt_page(pmap, va); + if (remove_pt) + mpte = pmap_remove_pt_page(pmap, va); + else + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); @@ -6193,7 +6197,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6234,7 +6238,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, } } if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva); + pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { @@ -6476,7 +6480,8 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; - pmap_remove_pde(pmap, pde, sva, &free, &lock); + pmap_remove_pde(pmap, pde, sva, true, &free, + &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { @@ -7552,13 +7557,36 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, /* * The reference to the PD page that was acquired by * pmap_alloc_pde() ensures that it won't be freed. - * However, if the PDE resulted from a promotion, then + * However, if the PDE resulted from a promotion, and + * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, + pmap != kernel_pmap, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { + if (va >= VM_MAXUSER_ADDRESS) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, false)) { + if (pdpg != NULL) + pdpg->ref_count--; + CTR1(KTR_PMAP, + "pmap_enter_pde: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_pde() and + * pmap_remove_ptes() will zero-fill + * the kernel page table page. + */ + } pmap_delayed_invl_start(); if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, lockp)) @@ -7572,14 +7600,6 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_pde: freed kernel page table page")); - - /* - * Both pmap_remove_pde() and pmap_remove_ptes() will - * leave the kernel page table page zero filled. - */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_pde: trie insert failed"); } } @@ -9547,7 +9567,7 @@ pmap_unmapdev(void *p, vm_size_t size) * Tries to demote a 1GB page mapping. */ static bool -pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) +pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; @@ -9564,12 +9584,19 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); - pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, - VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT); - if (pdpg == NULL) { - CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" - " in pmap %p", va, pmap); - return (false); + if (m == NULL) { + pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, + VM_ALLOC_WIRED); + if (pdpg == NULL) { + CTR2(KTR_PMAP, + "pmap_demote_pdpe: failure for va %#lx in pmap %p", + va, pmap); + return (false); + } + } else { + pdpg = m; + pdpg->pindex = va >> PDPSHIFT; + pmap_pt_page_count_adj(pmap, 1); } pdpgpa = VM_PAGE_TO_PHYS(pdpg); firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); @@ -9779,7 +9806,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, tmpva += NBPDP; continue; } - if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva, NULL)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); @@ -9937,17 +9964,20 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, } /* - * Demotes any mapping within the direct map region that covers more than the - * specified range of physical addresses. This range's size must be a power - * of two and its starting address must be a multiple of its size. Since the - * demotion does not change any attributes of the mapping, a TLB invalidation - * is not mandatory. The caller may, however, request a TLB invalidation. + * Demotes any mapping within the direct map region that covers more + * than the specified range of physical addresses. This range's size + * must be a power of two and its starting address must be a multiple + * of its size, which means that any pdp from the mapping is fully + * covered by the range if len > NBPDP. Since the demotion does not + * change any attributes of the mapping, a TLB invalidation is not + * mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) { pdp_entry_t *pdpe; pd_entry_t *pde; + vm_page_t m; vm_offset_t va; bool changed; @@ -9956,17 +9986,28 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "pmap_demote_DMAP"); + if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = false; + + /* + * Assume that it is fine to sleep there. + * The only existing caller of pmap_demote_DMAP() is the + * x86_mr_split_dmap() function. + */ + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); + PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { - if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, va, m)) panic("pmap_demote_DMAP: PDPE failed"); changed = true; + m = NULL; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); @@ -9981,6 +10022,10 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); + if (m != NULL) { + vm_page_unwire_noq(m); + vm_page_free(m); + } } } diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h index 15e1dfc1a444..f1d9c96a78d7 100644 --- a/sys/amd64/linux/linux_proto.h +++ b/sys/amd64/linux/linux_proto.h @@ -914,10 +914,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c index 8413d2723551..62b50cf68a32 100644 --- a/sys/amd64/linux/linux_sysent.c +++ b/sys/amd64/linux/linux_sysent.c @@ -268,8 +268,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */ diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c index 20322f7a8660..1dc4de019080 100644 --- a/sys/amd64/linux/linux_systrace_args.c +++ b/sys/amd64/linux/linux_systrace_args.c @@ -1918,12 +1918,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 254: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 255: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -5860,9 +5867,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 254: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 255: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 256: @@ -8353,8 +8383,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 253: /* linux_inotify_add_watch */ case 254: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 255: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 256: /* linux_openat */ diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index fd08c9b0279d..5e1394751ef6 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -1476,10 +1476,17 @@ int linux_inotify_init(void); } 254 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 255 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 256 AUE_NULL STD { int linux_migrate_pages(void); diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h index ab0edd99df42..57a303271f1c 100644 --- a/sys/amd64/linux32/linux32_proto.h +++ b/sys/amd64/linux32/linux32_proto.h @@ -983,10 +983,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1184,7 +1187,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c index add9844254ce..1bc8841badf3 100644 --- a/sys/amd64/linux32/linux32_sysent.c +++ b/sys/amd64/linux32/linux32_sysent.c @@ -307,8 +307,8 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -347,7 +347,7 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c index 7793124e6935..cbd1641c2a34 100644 --- a/sys/amd64/linux32/linux32_systrace_args.c +++ b/sys/amd64/linux32/linux32_systrace_args.c @@ -2036,12 +2036,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2379,7 +2386,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6536,9 +6545,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7116,6 +7148,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9809,8 +9848,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -9982,6 +10027,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index 92d5f09c423f..7bd522a598e8 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -1589,10 +1589,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1860,7 +1867,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c index 18b47bab12d9..2061e38a155f 100644 --- a/sys/arm/allwinner/aw_gpio.c +++ b/sys/arm/allwinner/aw_gpio.c @@ -1154,10 +1154,6 @@ aw_gpio_attach(device_t dev) aw_gpio_register_isrcs(sc); intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; - /* * Register as a pinctrl device */ @@ -1166,6 +1162,10 @@ aw_gpio_attach(device_t dev) fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; + config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); return (0); diff --git a/sys/arm/allwinner/aw_rtc.c b/sys/arm/allwinner/aw_rtc.c index 9938601f17ce..4af57ab879e8 100644 --- a/sys/arm/allwinner/aw_rtc.c +++ b/sys/arm/allwinner/aw_rtc.c @@ -134,6 +134,7 @@ static struct ofw_compat_data compat_data[] = { { "allwinner,sun7i-a20-rtc", (uintptr_t) &a20_conf }, { "allwinner,sun6i-a31-rtc", (uintptr_t) &a31_conf }, { "allwinner,sun8i-h3-rtc", (uintptr_t) &h3_conf }, + { "allwinner,sun20i-d1-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h5-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h6-rtc", (uintptr_t) &h3_conf }, { NULL, 0 } @@ -147,11 +148,13 @@ struct aw_rtc_softc { static struct clk_fixed_def aw_rtc_osc32k = { .clkdef.id = 0, + .clkdef.name = "osc32k", .freq = 32768, }; static struct clk_fixed_def aw_rtc_iosc = { .clkdef.id = 2, + .clkdef.name = "iosc", }; static void aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev); @@ -250,23 +253,33 @@ aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev) { int nclocks; node = ofw_bus_get_node(dev); - nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); - /* No clocks to export */ - if (nclocks <= 0) - return; - if (nclocks != 3) { - device_printf(dev, "Having only %d clocks instead of 3, aborting\n", nclocks); + /* Nothing to do. */ + if (!OF_hasprop(node, "clocks")) return; + + /* + * If the device tree gives us specific output names for the clocks, + * use them. + */ + nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); + if (nclocks > 0) { + if (nclocks != 3) { + device_printf(dev, + "Found %d clocks names instead of 3, aborting\n", + nclocks); + return; + } + + aw_rtc_osc32k.clkdef.name = clknames[0]; + aw_rtc_iosc.clkdef.name = clknames[2]; } clkdom = clkdom_create(dev); - aw_rtc_osc32k.clkdef.name = clknames[0]; if (clknode_fixed_register(clkdom, &aw_rtc_osc32k) != 0) device_printf(dev, "Cannot register osc32k clock\n"); - aw_rtc_iosc.clkdef.name = clknames[2]; aw_rtc_iosc.freq = sc->conf->iosc_freq; if (clknode_fixed_register(clkdom, &aw_rtc_iosc) != 0) device_printf(dev, "Cannot register iosc clock\n"); diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c index e4fc57b79ba5..48d1d2af5abc 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c @@ -837,12 +837,12 @@ bcm_gpio_attach(device_t dev) } sc->sc_gpio_npins = i; bcm_gpio_sysctl_init(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; fdt_pinctrl_register(dev, "brcm,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; return (0); diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c index 681cf20f7f9f..7acdfff539dc 100644 --- a/sys/arm/mv/mvebu_gpio.c +++ b/sys/arm/mv/mvebu_gpio.c @@ -810,7 +810,6 @@ mvebu_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm/nvidia/as3722_gpio.c b/sys/arm/nvidia/as3722_gpio.c index 073d057884c9..f7b3d4d43bab 100644 --- a/sys/arm/nvidia/as3722_gpio.c +++ b/sys/arm/nvidia/as3722_gpio.c @@ -544,7 +544,7 @@ as3722_gpio_attach(struct as3722_softc *sc, phandle_t node) sc->gpio_pins = malloc(sizeof(struct as3722_gpio_pin *) * sc->gpio_npins, M_AS3722_GPIO, M_WAITOK | M_ZERO); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); for (i = 0; i < sc->gpio_npins; i++) { diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c index 16e1ef94d6a9..e37fd69a121e 100644 --- a/sys/arm/nvidia/tegra_gpio.c +++ b/sys/arm/nvidia/tegra_gpio.c @@ -824,7 +824,6 @@ tegra_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c index ec2dd5907024..ebaaccea1d99 100644 --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -161,22 +161,22 @@ apple_pinctrl_attach(device_t dev) goto error; } + fdt_pinctrl_register(dev, "pinmux"); + fdt_pinctrl_configure_tree(dev); + + if (OF_hasprop(node, "interrupt-controller")) { + sc->sc_irqs = mallocarray(sc->sc_ngpios, + sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); + intr_pic_register(dev, + OF_xref_from_node(ofw_bus_get_node(dev))); + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } - fdt_pinctrl_register(dev, "pinmux"); - fdt_pinctrl_configure_tree(dev); - - if (!OF_hasprop(node, "interrupt-controller")) - return (0); - - sc->sc_irqs = mallocarray(sc->sc_ngpios, - sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); - intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/linux/linux_proto.h b/sys/arm64/linux/linux_proto.h index ae3d8569df58..82f57f77ffae 100644 --- a/sys/arm64/linux/linux_proto.h +++ b/sys/arm64/linux/linux_proto.h @@ -141,10 +141,13 @@ struct linux_inotify_init1_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_ioctl_args { char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)]; diff --git a/sys/arm64/linux/linux_sysent.c b/sys/arm64/linux/linux_sysent.c index 722ada465730..e54a76cfd55e 100644 --- a/sys/arm64/linux/linux_sysent.c +++ b/sys/arm64/linux/linux_sysent.c @@ -41,8 +41,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 24 = linux_dup3 */ { .sy_narg = AS(linux_fcntl_args), .sy_call = (sy_call_t *)linux_fcntl, .sy_auevent = AUE_FCNTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 25 = linux_fcntl */ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 26 = linux_inotify_init1 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ { .sy_narg = AS(linux_ioctl_args), .sy_call = (sy_call_t *)linux_ioctl, .sy_auevent = AUE_IOCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 29 = linux_ioctl */ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 30 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 31 = linux_ioprio_get */ diff --git a/sys/arm64/linux/linux_systrace_args.c b/sys/arm64/linux/linux_systrace_args.c index 54e4dd82355d..1b946a9406a5 100644 --- a/sys/arm64/linux/linux_systrace_args.c +++ b/sys/arm64/linux/linux_systrace_args.c @@ -210,12 +210,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 27: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 28: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_ioctl */ @@ -2780,9 +2787,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 28: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_ioctl */ case 29: @@ -6455,8 +6485,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 28: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_ioctl */ case 29: if (ndx == 0 || ndx == 1) diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master index 79c04c398e00..2babdcaf03bf 100644 --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -170,10 +170,17 @@ ); } 27 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 28 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 29 AUE_IOCTL STD { int linux_ioctl( diff --git a/sys/arm64/nvidia/tegra210/max77620_gpio.c b/sys/arm64/nvidia/tegra210/max77620_gpio.c index 8dcf98099dac..5d91e23324c7 100644 --- a/sys/arm64/nvidia/tegra210/max77620_gpio.c +++ b/sys/arm64/nvidia/tegra210/max77620_gpio.c @@ -672,7 +672,7 @@ max77620_gpio_attach(struct max77620_softc *sc, phandle_t node) sx_init(&sc->gpio_lock, "MAX77620 GPIO lock"); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index a86392f16624..847bc7394dd0 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -362,12 +362,6 @@ rk_gpio_attach(device_t dev) return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) { - rk_gpio_detach(dev); - return (ENXIO); - } - /* Set the cached value to unknown */ for (i = 0; i < RK_GPIO_MAX_PINS; i++) sc->pin_cached[i].is_gpio = 2; @@ -377,6 +371,12 @@ rk_gpio_attach(device_t dev) sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) { + rk_gpio_detach(dev); + return (ENXIO); + } + return (0); } diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index 0f110d5f9ddd..9381396f247c 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -663,6 +663,7 @@ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ #define AUE_SETCRED 43271 /* FreeBSD-specific. */ +#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c index 1c455e1951d7..9246f95a080e 100644 --- a/sys/cam/mmc/mmc_da.c +++ b/sys/cam/mmc/mmc_da.c @@ -1198,27 +1198,6 @@ sdda_get_host_caps(struct cam_periph *periph, union ccb *ccb) return (cts->host_caps); } -static uint32_t -sdda_get_max_data(struct cam_periph *periph, union ccb *ccb) -{ - struct ccb_trans_settings_mmc *cts; - - cts = &ccb->cts.proto_specific.mmc; - memset(cts, 0, sizeof(struct ccb_trans_settings_mmc)); - - ccb->ccb_h.func_code = XPT_GET_TRAN_SETTINGS; - ccb->ccb_h.flags = CAM_DIR_NONE; - ccb->ccb_h.retry_count = 0; - ccb->ccb_h.timeout = 100; - ccb->ccb_h.cbfcnp = NULL; - xpt_action(ccb); - - if (ccb->ccb_h.status != CAM_REQ_CMP) - panic("Cannot get host max data"); - KASSERT(cts->host_max_data != 0, ("host_max_data == 0?!")); - return (cts->host_max_data); -} - static void sdda_start_init(void *context, union ccb *start_ccb) { diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index eaa086188b5f..8d2748098c00 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -511,4 +511,6 @@ #define FREEBSD32_SYS_fchroot 590 #define FREEBSD32_SYS_freebsd32_setcred 591 #define FREEBSD32_SYS_exterrctl 592 -#define FREEBSD32_SYS_MAXSYSCALL 593 +#define FREEBSD32_SYS_inotify_add_watch_at 593 +#define FREEBSD32_SYS_inotify_rm_watch 594 +#define FREEBSD32_SYS_MAXSYSCALL 595 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 989f32a5c6f0..bda373268cc5 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -598,4 +598,6 @@ const char *freebsd32_syscallnames[] = { "fchroot", /* 590 = fchroot */ "freebsd32_setcred", /* 591 = freebsd32_setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 476fe2ac3f80..3718a1b0c8ee 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -660,4 +660,6 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(freebsd32_setcred_args), .sy_call = (sy_call_t *)freebsd32_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = freebsd32_setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index cf08938cd5de..37564a737a62 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3395,6 +3395,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9172,6 +9190,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11070,6 +11120,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c index 35d6debe0da9..19cd55849f65 100644 --- a/sys/compat/linux/linux_dummy.c +++ b/sys/compat/linux/linux_dummy.c @@ -74,9 +74,6 @@ DUMMY(kexec_load); DUMMY(add_key); DUMMY(request_key); DUMMY(keyctl); -/* Linux 2.6.13: */ -DUMMY(inotify_add_watch); -DUMMY(inotify_rm_watch); /* Linux 2.6.16: */ DUMMY(migrate_pages); DUMMY(unshare); @@ -87,7 +84,6 @@ DUMMY(vmsplice); DUMMY(move_pages); /* Linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(inotify_init1); /* Linux 2.6.31: */ DUMMY(perf_event_open); /* Linux 2.6.36: */ diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 246bc26d85d4..86834a7ecea8 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -32,11 +32,13 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/mman.h> #include <sys/selinfo.h> #include <sys/pipe.h> #include <sys/proc.h> +#include <sys/specialfd.h> #include <sys/stat.h> #include <sys/sx.h> #include <sys/syscallsubr.h> @@ -1877,3 +1879,122 @@ linux_writev(struct thread *td, struct linux_writev_args *args) freeuio(auio); return (linux_enobufs2eagain(td, args->fd, error)); } + +static int +linux_inotify_init_flags(int l_flags) +{ + int bsd_flags; + + if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0) + linux_msg(NULL, "inotify_init1 unsupported flags 0x%x", + l_flags); + + bsd_flags = 0; + if ((l_flags & LINUX_IN_CLOEXEC) != 0) + bsd_flags |= O_CLOEXEC; + if ((l_flags & LINUX_IN_NONBLOCK) != 0) + bsd_flags |= O_NONBLOCK; + return (bsd_flags); +} + +static int +inotify_init_common(struct thread *td, int flags) +{ + struct specialfd_inotify si; + + si.flags = linux_inotify_init_flags(flags); + return (kern_specialfd(td, SPECIALFD_INOTIFY, &si)); +} + +#if defined(__i386__) || defined(__amd64__) +int +linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args) +{ + return (inotify_init_common(td, 0)); +} +#endif + +int +linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args) +{ + return (inotify_init_common(td, args->flags)); +} + +/* + * The native implementation uses the same values for inotify events as + * libinotify, which gives us binary compatibility with Linux. This simplifies + * the shim implementation a lot, as otherwise we would have to handle read(2) + * calls on inotify descriptors and translate events to Linux's ABI. + */ +_Static_assert(LINUX_IN_ACCESS == IN_ACCESS, + "IN_ACCESS mismatch"); +_Static_assert(LINUX_IN_MODIFY == IN_MODIFY, + "IN_MODIFY mismatch"); +_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB, + "IN_ATTRIB mismatch"); +_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE, + "IN_CLOSE_WRITE mismatch"); +_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE, + "IN_CLOSE_NOWRITE mismatch"); +_Static_assert(LINUX_IN_OPEN == IN_OPEN, + "IN_OPEN mismatch"); +_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM, + "IN_MOVED_FROM mismatch"); +_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO, + "IN_MOVED_TO mismatch"); +_Static_assert(LINUX_IN_CREATE == IN_CREATE, + "IN_CREATE mismatch"); +_Static_assert(LINUX_IN_DELETE == IN_DELETE, + "IN_DELETE mismatch"); +_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF, + "IN_DELETE_SELF mismatch"); +_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF, + "IN_MOVE_SELF mismatch"); + +_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT, + "IN_UNMOUNT mismatch"); +_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW, + "IN_Q_OVERFLOW mismatch"); +_Static_assert(LINUX_IN_IGNORED == IN_IGNORED, + "IN_IGNORED mismatch"); + +_Static_assert(LINUX_IN_ISDIR == IN_ISDIR, + "IN_ISDIR mismatch"); +_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR, + "IN_ONLYDIR mismatch"); +_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW, + "IN_DONT_FOLLOW mismatch"); +_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE, + "IN_MASK_CREATE mismatch"); +_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD, + "IN_MASK_ADD mismatch"); +_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT, + "IN_ONESHOT mismatch"); +_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK, + "IN_EXCL_UNLINK mismatch"); + +static int +linux_inotify_watch_flags(int l_flags) +{ + if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) { + linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x", + l_flags); + } + + return (l_flags); +} + +int +linux_inotify_add_watch(struct thread *td, + struct linux_inotify_add_watch_args *args) +{ + return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname, + linux_inotify_watch_flags(args->mask), td)); +} + +int +linux_inotify_rm_watch(struct thread *td, + struct linux_inotify_rm_watch_args *args) +{ + return (kern_inotify_rm_watch(args->fd, args->wd, td)); +} diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h index 2e56942b0f40..7448dc597230 100644 --- a/sys/compat/linux/linux_file.h +++ b/sys/compat/linux/linux_file.h @@ -189,6 +189,38 @@ #define LINUX_HUGETLB_FLAG_ENCODE_2GB (31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) #define LINUX_HUGETLB_FLAG_ENCODE_16GB (34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) +/* inotify flags */ +#define LINUX_IN_ACCESS 0x00000001 +#define LINUX_IN_MODIFY 0x00000002 +#define LINUX_IN_ATTRIB 0x00000004 +#define LINUX_IN_CLOSE_WRITE 0x00000008 +#define LINUX_IN_CLOSE_NOWRITE 0x00000010 +#define LINUX_IN_OPEN 0x00000020 +#define LINUX_IN_MOVED_FROM 0x00000040 +#define LINUX_IN_MOVED_TO 0x00000080 +#define LINUX_IN_CREATE 0x00000100 +#define LINUX_IN_DELETE 0x00000200 +#define LINUX_IN_DELETE_SELF 0x00000400 +#define LINUX_IN_MOVE_SELF 0x00000800 + +#define LINUX_IN_UNMOUNT 0x00002000 +#define LINUX_IN_Q_OVERFLOW 0x00004000 +#define LINUX_IN_IGNORED 0x00008000 + +#define LINUX_IN_ONLYDIR 0x01000000 +#define LINUX_IN_DONT_FOLLOW 0x02000000 +#define LINUX_IN_EXCL_UNLINK 0x04000000 +#define LINUX_IN_MASK_CREATE 0x10000000 +#define LINUX_IN_MASK_ADD 0x20000000 +#define LINUX_IN_ISDIR 0x40000000 +#define LINUX_IN_ONESHOT 0x80000000 + +#define LINUX_IN_ALL_EVENTS 0x00000fff +#define LINUX_IN_ALL_FLAGS 0xf700e000 + +#define LINUX_IN_NONBLOCK 0x00000800 +#define LINUX_IN_CLOEXEC 0x00080000 + #if defined(_KERNEL) struct l_file_handle { l_uint handle_bytes; diff --git a/sys/conf/files b/sys/conf/files index f6d473b1431b..dd6f9a3021d4 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3992,6 +3992,7 @@ kern/vfs_export.c standard kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard +kern/vfs_inotify.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard diff --git a/sys/contrib/dev/iwlwifi/iwl-debug.h b/sys/contrib/dev/iwlwifi/iwl-debug.h index 43288a5a8d74..7b3b402766b4 100644 --- a/sys/contrib/dev/iwlwifi/iwl-debug.h +++ b/sys/contrib/dev/iwlwifi/iwl-debug.h @@ -47,7 +47,7 @@ enum iwl_dl { IWL_DL_DROP = 0x00000010, IWL_DL_EEPROM = 0x00000020, IWL_DL_FW = 0x00000040, - /* = 0x00000080, */ + IWL_DL_DEV_RADIO = 0x00000080, IWL_DL_HC = 0x00000100, IWL_DL_HT = 0x00000200, IWL_DL_INFO = 0x00000400, @@ -195,6 +195,8 @@ void __iwl_dbg(struct device *, u32, bool, const char *, const char *fmt, ...); IWL_DPRINTF(_subsys, IWL_DL_WEP, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_WOWLAN(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_WOWLAN, _fmt, ##__VA_ARGS__) +#define IWL_DEBUG_DEV_RADIO(_dev, _fmt, ...) \ + IWL_DPRINTF_DEV((_dev), IWL_DL_DEV_RADIO, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_PCI_RW(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_PCI_RW, _fmt, ##__VA_ARGS__) diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index 2987af634866..f9468e0deda0 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -36,6 +36,7 @@ #include <dev/gpio/gpiobusvar.h> #include <dev/gpio/acpi_gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 2e2618805e7b..ab7f13177969 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -39,6 +39,7 @@ #include <sys/sbuf.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" @@ -213,20 +214,40 @@ gpio_pin_is_active(gpio_pin_t pin, bool *active) return (0); } +/* + * Note that this function should only + * be used in cases where a pre-existing + * gpiobus_pin structure exists. In most + * cases, the gpio_pin_get_by_* functions + * suffice. + */ +int +gpio_pin_acquire(gpio_pin_t gpio) +{ + device_t busdev; + + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); + KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); + + busdev = GPIO_GET_BUS(gpio->dev); + if (busdev == NULL) + return (ENXIO); + + return (gpiobus_acquire_pin(busdev, gpio->pin)); +} + void gpio_pin_release(gpio_pin_t gpio) { device_t busdev; - if (gpio == NULL) - return; - + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); busdev = GPIO_GET_BUS(gpio->dev); - if (busdev != NULL) - gpiobus_release_pin(busdev, gpio->pin); + KASSERT(busdev != NULL, ("gpiobus dev is NULL.")); + gpiobus_release_pin(busdev, gpio->pin); free(gpio, M_DEVBUF); } @@ -293,7 +314,7 @@ gpiobus_print_pins(struct gpiobus_ivar *devi, struct sbuf *sb) } device_t -gpiobus_attach_bus(device_t dev) +gpiobus_add_bus(device_t dev) { device_t busdev; @@ -307,8 +328,24 @@ gpiobus_attach_bus(device_t dev) #ifdef FDT ofw_gpiobus_register_provider(dev); #endif - bus_attach_children(dev); + return (busdev); +} + +/* + * Attach a gpiobus child. + * Note that the controller is expected + * to be fully initialized at this point. + */ +device_t +gpiobus_attach_bus(device_t dev) +{ + device_t busdev; + busdev = gpiobus_add_bus(dev); + if (busdev == NULL) + return (NULL); + + bus_attach_children(dev); return (busdev); } @@ -385,14 +422,13 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max: %d\n", pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } /* Mark pin as mapped and give warning if it's already mapped. */ if (sc->sc_pins[pin].mapped) { device_printf(bus, "warning: pin %d is already mapped\n", pin); - return (-1); + return (EBUSY); } sc->sc_pins[pin].mapped = 1; @@ -400,7 +436,7 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) } /* Release mapped pin */ -int +void gpiobus_release_pin(device_t bus, uint32_t pin) { struct gpiobus_softc *sc; @@ -408,19 +444,15 @@ gpiobus_release_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max=%d\n", - pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } - if (!sc->sc_pins[pin].mapped) { - device_printf(bus, "pin %d is not mapped\n", pin); - return (-1); - } - sc->sc_pins[pin].mapped = 0; + if (!sc->sc_pins[pin].mapped) + panic("%s: pin %d is not mapped", device_get_nameunit(bus), + pin); - return (0); + sc->sc_pins[pin].mapped = 0; } static int @@ -435,8 +467,7 @@ gpiobus_acquire_child_pins(device_t dev, device_t child) device_printf(child, "cannot acquire pin %d\n", devi->pins[i]); while (--i >= 0) { - (void)gpiobus_release_pin(dev, - devi->pins[i]); + gpiobus_release_pin(dev, devi->pins[i]); } gpiobus_free_ivars(devi); return (EBUSY); diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h new file mode 100644 index 000000000000..de3f57663132 --- /dev/null +++ b/sys/dev/gpio/gpiobus_internal.h @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2009 Oleksandr Tymoshenko <gonzo@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef __GPIOBUS_INTERNAL_H__ +#define __GPIOBUS_INTERNAL_H__ + +/* + * Functions shared between gpiobus and other bus classes that derive from it; + * these should not be called directly by other drivers. + */ +int gpiobus_attach(device_t); +int gpiobus_detach(device_t); +int gpiobus_init_softc(device_t); +int gpiobus_alloc_ivars(struct gpiobus_ivar *); +void gpiobus_free_ivars(struct gpiobus_ivar *); +int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); +int gpiobus_acquire_pin(device_t, uint32_t); +void gpiobus_release_pin(device_t, uint32_t); + +extern driver_t gpiobus_driver; +#endif diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h index 74783e112f89..7f504236a774 100644 --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -156,6 +156,8 @@ int gpio_pin_get_by_bus_pinnum(device_t _bus, uint32_t _pinnum, gpio_pin_t *_gp) /* Acquire a pin by child and index (used by direct children of gpiobus). */ int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp); +/* Acquire a pin from an existing gpio_pin_t. */ +int gpio_pin_acquire(gpio_pin_t gpio); /* Release a pin acquired via any gpio_pin_get_xxx() function. */ void gpio_pin_release(gpio_pin_t gpio); @@ -167,22 +169,9 @@ int gpio_pin_setflags(gpio_pin_t pin, uint32_t flags); struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode); -/* - * Functions shared between gpiobus and other bus classes that derive from it; - * these should not be called directly by other drivers. - */ int gpio_check_flags(uint32_t, uint32_t); +device_t gpiobus_add_bus(device_t); device_t gpiobus_attach_bus(device_t); int gpiobus_detach_bus(device_t); -int gpiobus_attach(device_t); -int gpiobus_detach(device_t); -int gpiobus_init_softc(device_t); -int gpiobus_alloc_ivars(struct gpiobus_ivar *); -void gpiobus_free_ivars(struct gpiobus_ivar *); -int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); -int gpiobus_acquire_pin(device_t, uint32_t); -int gpiobus_release_pin(device_t, uint32_t); - -extern driver_t gpiobus_driver; #endif /* __GPIOBUS_H__ */ diff --git a/sys/dev/gpio/gpiopps.c b/sys/dev/gpio/gpiopps.c index bb8afa5e062c..82620a50a798 100644 --- a/sys/dev/gpio/gpiopps.c +++ b/sys/dev/gpio/gpiopps.c @@ -160,7 +160,7 @@ gpiopps_detach(device_t dev) if (sc->ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irid, sc->ires); if (sc->gpin != NULL) - gpiobus_release_pin(GPIO_GET_BUS(sc->gpin->dev), sc->gpin->pin); + gpio_pin_release(sc->gpin); return (0); } diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index 32dc5b55e698..fc5fb03d6824 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -36,6 +36,7 @@ #include <sys/module.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include <dev/ofw/ofw_bus.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c index cc39790322b6..87d4310a6396 100644 --- a/sys/dev/gpio/pl061.c +++ b/sys/dev/gpio/pl061.c @@ -487,14 +487,21 @@ pl061_attach(device_t dev) } } + mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); + + if (sc->sc_xref != 0 && !intr_pic_register(dev, sc->sc_xref)) { + device_printf(dev, "couldn't register PIC\n"); + PL061_LOCK_DESTROY(sc); + goto free_isrc; + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "couldn't attach gpio bus\n"); + PL061_LOCK_DESTROY(sc); goto free_isrc; } - mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); - return (0); free_isrc: @@ -503,6 +510,7 @@ free_isrc: * for (irq = 0; irq < PL061_NUM_GPIO; irq++) * intr_isrc_deregister(PIC_INTR_ISRC(sc, irq)); */ + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_hdlr); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc->sc_irq_res); free_pic: diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/pl061.h index 809a1168493d..d9fe23e502b1 100644 --- a/sys/dev/gpio/pl061.h +++ b/sys/dev/gpio/pl061.h @@ -46,6 +46,7 @@ struct pl061_softc { struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_irq_hdlr; + intptr_t sc_xref; int sc_mem_rid; int sc_irq_rid; struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO]; diff --git a/sys/dev/gpio/pl061_acpi.c b/sys/dev/gpio/pl061_acpi.c index f5885025083e..8e9921261e4e 100644 --- a/sys/dev/gpio/pl061_acpi.c +++ b/sys/dev/gpio/pl061_acpi.c @@ -67,19 +67,12 @@ pl061_acpi_probe(device_t dev) static int pl061_acpi_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = ACPI_GPIO_XREF; - if (!intr_pic_register(dev, ACPI_GPIO_XREF)) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_acpi_methods[] = { diff --git a/sys/dev/gpio/pl061_fdt.c b/sys/dev/gpio/pl061_fdt.c index aa22298b43c6..681b3ccdfdeb 100644 --- a/sys/dev/gpio/pl061_fdt.c +++ b/sys/dev/gpio/pl061_fdt.c @@ -61,19 +61,12 @@ pl061_fdt_probe(device_t dev) static int pl061_fdt_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev)); - if (!intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)))) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_fdt_methods[] = { diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c index 25dfccede29f..8b44cd256c79 100644 --- a/sys/dev/gpio/qoriq_gpio.c +++ b/sys/dev/gpio/qoriq_gpio.c @@ -369,11 +369,6 @@ qoriq_gpio_attach(device_t dev) for (i = 0; i <= MAXPIN; i++) sc->sc_pins[i].gp_caps = DEFAULT_CAPS; - sc->busdev = gpiobus_attach_bus(dev); - if (sc->busdev == NULL) { - qoriq_gpio_detach(dev); - return (ENOMEM); - } /* * Enable the GPIO Input Buffer for all GPIOs. * This is safe on devices without a GPIBE register, because those @@ -384,6 +379,12 @@ qoriq_gpio_attach(device_t dev) OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); + if (sc->busdev == NULL) { + qoriq_gpio_detach(dev); + return (ENOMEM); + } + return (0); } diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c index 3b3bca9936f1..cd011ae9be75 100644 --- a/sys/dev/iicbus/gpio/tca64xx.c +++ b/sys/dev/iicbus/gpio/tca64xx.c @@ -261,14 +261,13 @@ tca64xx_attach(device_t dev) sc->addr = iicbus_get_addr(dev); mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF); + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); return (ENXIO); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - #ifdef DEBUG switch (sc->chip) { case TCA6416_TYPE: diff --git a/sys/dev/mem/memutil.c b/sys/dev/mem/memutil.c index cf9714d6ec8f..20ce337df0ab 100644 --- a/sys/dev/mem/memutil.c +++ b/sys/dev/mem/memutil.c @@ -26,15 +26,14 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/param.h> +#include <sys/systm.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/memrange.h> -#include <sys/rwlock.h> -#include <sys/systm.h> +#include <sys/sx.h> -static struct rwlock mr_lock; +static struct sx mr_lock; /* * Implementation-neutral, kernel-callable functions for manipulating @@ -46,7 +45,7 @@ mem_range_init(void) if (mem_range_softc.mr_op == NULL) return; - rw_init(&mr_lock, "memrange"); + sx_init(&mr_lock, "memrange"); mem_range_softc.mr_op->init(&mem_range_softc); } @@ -56,7 +55,7 @@ mem_range_destroy(void) if (mem_range_softc.mr_op == NULL) return; - rw_destroy(&mr_lock); + sx_destroy(&mr_lock); } int @@ -67,12 +66,12 @@ mem_range_attr_get(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); nd = *arg; - rw_rlock(&mr_lock); + sx_slock(&mr_lock); if (nd == 0) *arg = mem_range_softc.mr_ndesc; else bcopy(mem_range_softc.mr_desc, mrd, nd * sizeof(*mrd)); - rw_runlock(&mr_lock); + sx_sunlock(&mr_lock); return (0); } @@ -83,8 +82,8 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); - rw_wlock(&mr_lock); + sx_xlock(&mr_lock); ret = mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg); - rw_wunlock(&mr_lock); + sx_xunlock(&mr_lock); return (ret); } diff --git a/sys/dev/regulator/regulator_fixed.c b/sys/dev/regulator/regulator_fixed.c index 0a76da7140a0..55cdb5e4aeae 100644 --- a/sys/dev/regulator/regulator_fixed.c +++ b/sys/dev/regulator/regulator_fixed.c @@ -100,12 +100,8 @@ static struct gpio_entry * regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) { struct gpio_entry *entry, *tmp; - device_t busdev; int rv; - busdev = GPIO_GET_BUS(gpio_pin->dev); - if (busdev == NULL) - return (NULL); entry = malloc(sizeof(struct gpio_entry), M_FIXEDREGULATOR, M_WAITOK | M_ZERO); @@ -122,8 +118,8 @@ regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) } /* Reserve pin. */ - /* XXX Can we call gpiobus_acquire_pin() with gpio_list_mtx held? */ - rv = gpiobus_acquire_pin(busdev, gpio_pin->pin); + /* XXX Can we call gpio_pin_acquire() with gpio_list_mtx held? */ + rv = gpio_pin_acquire(gpio_pin); if (rv != 0) { mtx_unlock(&gpio_list_mtx); free(entry, M_FIXEDREGULATOR); diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index c0ec5a94b8d3..ae28617537fd 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -89,6 +89,8 @@ #include <sys/buf.h> #include <sys/sysctl.h> #include <sys/vmmeter.h> +#define EXTERR_CATEGORY EXTERR_CAT_FUSE +#include <sys/exterrvar.h> #include <vm/vm.h> #include <vm/vm_extern.h> @@ -439,7 +441,8 @@ fuse_vnop_access(struct vop_access_args *ap) if (vnode_isvroot(vp)) { return 0; } - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!(data->dataflags & FSESS_INITED)) { if (vnode_isvroot(vp)) { @@ -448,7 +451,8 @@ fuse_vnop_access(struct vop_access_args *ap) return 0; } } - return EBADF; + return (EXTERROR(EBADF, "Access denied until FUSE session " + "is initialized")); } if (vnode_islnk(vp)) { return 0; @@ -489,7 +493,8 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } switch(ap->a_op) { @@ -506,7 +511,7 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) op = FUSE_SETLK; break; default: - return EINVAL; + return (EXTERROR(EINVAL, "Unsupported lock flags")); } if (!(dataflags & FSESS_POSIX_LOCKS)) @@ -534,14 +539,14 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) size = vattr.va_size; if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { - err = EOVERFLOW; + err = EXTERROR(EOVERFLOW, "Offset is too large"); goto out; } start = size + fl->l_start; break; default: - return (EINVAL); + return (EXTERROR(EINVAL, "Unsupported offset type")); } err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); @@ -603,15 +608,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); switch (vp->v_type) { case VFIFO: return (ESPIPE); case VLNK: case VREG: - if (vfs_isrdonly(mp)) - return (EROFS); break; default: return (ENODEV); @@ -621,7 +625,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) return (EROFS); if (fsess_not_impl(mp, FUSE_FALLOCATE)) - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not implement " + "FUSE_FALLOCATE")); io.uio_offset = *offset; io.uio_resid = *len; @@ -651,13 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FALLOCATE); - err = EINVAL; + err = EXTERROR(EINVAL, "This server does not implement " + "FUSE_ALLOCATE"); } else if (err == EOPNOTSUPP) { /* * The file system server does not support FUSE_FALLOCATE with * the supplied mode for this particular file. */ - err = EINVAL; + err = EXTERROR(EINVAL, "This file can't be pre-allocated"); } else if (!err) { *offset += *len; *len = 0; @@ -703,7 +709,8 @@ fuse_vnop_bmap(struct vop_bmap_args *ap) int maxrun; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } mp = vnode_mount(vp); @@ -870,19 +877,21 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) pid_t pid; int err; - err = ENOSYS; if (mp == NULL || mp != vnode_mount(outvp)) - goto fallback; + return (EXTERROR(ENOSYS, "Mount points do not match")); if (incred->cr_uid != outcred->cr_uid) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); if (incred->cr_groups[0] != outcred->cr_groups[0]) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); /* Caller busied mp, mnt_data can be safely accessed. */ if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) - goto fallback; + return (EXTERROR(ENOSYS, "This daemon does not " + "implement COPY_FILE_RANGE")); if (ap->a_fsizetd == NULL) td = curthread; @@ -892,7 +901,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); if (invp->v_data == NULL || outvp->v_data == NULL) { - err = EBADF; + err = EXTERROR(EBADF, "vnode got reclaimed"); goto unlock; } @@ -956,7 +965,6 @@ unlock: if (err == ENOSYS) fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); -fallback: /* * No need to call vn_rlimit_fsizex_res before return, since the uio is @@ -1024,7 +1032,8 @@ fuse_vnop_create(struct vop_create_args *ap) int flags; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); /* FUSE expects sockets to be created with FUSE_MKNOD */ if (vap->va_type == VSOCK) @@ -1040,7 +1049,7 @@ fuse_vnop_create(struct vop_create_args *ap) bzero(&fdi, sizeof(fdi)); if (vap->va_type != VREG) - return (EINVAL); + return (EXTERROR(EINVAL, "Only regular files can be created")); if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { /* Fallback to FUSE_MKNOD/FUSE_OPEN */ @@ -1221,8 +1230,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap) if (!(dataflags & FSESS_INITED)) { if (!vnode_isvroot(vp)) { fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); - err = ENOTCONN; - return err; + return (EXTERROR(ENOTCONN, "FUSE daemon is not " + "initialized")); } else { goto fake; } @@ -1351,10 +1360,11 @@ fuse_vnop_link(struct vop_link_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_mount(tdvp) != vnode_mount(vp)) { - return EXDEV; + return (EXDEV); } /* @@ -1364,7 +1374,7 @@ fuse_vnop_link(struct vop_link_args *ap) * validating that nlink does not overflow. */ if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) - return EMLINK; + return (EMLINK); fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); @@ -1376,12 +1386,13 @@ fuse_vnop_link(struct vop_link_args *ap) feo = fdi.answ; if (fli.oldnodeid != feo->nodeid) { + static const char exterr[] = "Server assigned wrong inode " + "for a hard link."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, - "Assigned wrong inode for a hard link."); + fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr); fuse_vnode_clear_attr_cache(vp); fuse_vnode_clear_attr_cache(tdvp); - err = EIO; + err = EXTERROR(EIO, exterr); goto out; } @@ -1458,7 +1469,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) if (fuse_isdeadfs(dvp)) { *vpp = NULL; - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_isdir(dvp)) return ENOTDIR; @@ -1478,7 +1490,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) * Since the file system doesn't support ".." lookups, * we have no way to find this entry. */ - return ESTALE; + return (EXTERROR(ESTALE, "This server does not support " + "'..' lookups")); } nid = VTOFUD(dvp)->parent_nid; if (nid == 0) @@ -1601,11 +1614,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) vref(dvp); *vpp = dvp; } else { + static const char exterr[] = "Server assigned " + "same inode to both parent and child."; fuse_warn(fuse_get_mpdata(mp), - FSESS_WARN_ILLEGAL_INODE, - "Assigned same inode to both parent and " - "child."); - err = EIO; + FSESS_WARN_ILLEGAL_INODE, exterr); + err = EXTERROR(EIO, exterr); } } else { @@ -1693,7 +1706,8 @@ fuse_vnop_mkdir(struct vop_mkdir_args *ap) struct fuse_mkdir_in fmdi; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmdi.umask = curthread->td_proc->p_pd->pd_cmask; @@ -1720,7 +1734,8 @@ fuse_vnop_mknod(struct vop_mknod_args *ap) struct vattr *vap = ap->a_vap; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); return fuse_internal_mknod(dvp, vpp, cnp, vap); } @@ -1744,11 +1759,13 @@ fuse_vnop_open(struct vop_open_args *ap) pid_t pid = td->td_proc->p_pid; if (fuse_isdeadfs(vp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type", + vp->v_type)); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) - return EINVAL; + return (EXTERROR(EINVAL, "Illegal mode", a_mode)); if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { fuse_vnode_open(vp, 0, td); @@ -1830,7 +1847,8 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap) return (0); } else if (fsess_not_impl(mp, FUSE_LSEEK)) { /* FUSE_LSEEK is not implemented */ - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not " + "implement FUSE_LSEEK")); } else { return (err); } @@ -1864,7 +1882,8 @@ fuse_vnop_read(struct vop_read_args *ap) MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -1941,10 +1960,11 @@ fuse_vnop_readdir(struct vop_readdir_args *ap) if (ap->a_eofflag) *ap->a_eofflag = 0; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (uio_resid(uio) < sizeof(struct dirent)) - return EINVAL; + return (EXTERROR(EINVAL, "Buffer is too small")); tresid = uio->uio_resid; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); @@ -2014,7 +2034,8 @@ fuse_vnop_readlink(struct vop_readlink_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_islnk(vp)) { return EINVAL; @@ -2025,10 +2046,11 @@ fuse_vnop_readlink(struct vop_readlink_args *ap) goto out; } if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { + static const char exterr[] = "Server returned an embedded NUL " + "from FUSE_READLINK."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, - "Returned an embedded NUL from FUSE_READLINK."); - err = EIO; + fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr); + err = EXTERROR(EIO, exterr); goto out; } if (((char *)fdi.answ)[0] == '/' && @@ -2112,10 +2134,11 @@ fuse_vnop_remove(struct vop_remove_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_isdir(vp)) { - return EPERM; + return (EXTERROR(EPERM, "vnode is a directory")); } err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); @@ -2148,12 +2171,13 @@ fuse_vnop_rename(struct vop_rename_args *ap) int err = 0; if (fuse_isdeadfs(fdvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); - err = EXDEV; + err = EXTERROR(EXDEV, "Cross-device rename"); goto out; } cache_purge(fvp); @@ -2224,10 +2248,12 @@ fuse_vnop_rmdir(struct vop_rmdir_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp) == VTOFUD(dvp)) { - return EINVAL; + return (EXTERROR(EINVAL, "Directory to be removed " + "contains itself")); } err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); @@ -2264,7 +2290,8 @@ fuse_vnop_setattr(struct vop_setattr_args *ap) checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vap->va_uid != (uid_t)VNOVAL) { @@ -2429,7 +2456,8 @@ fuse_vnop_symlink(struct vop_symlink_args *ap) size_t len; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } /* * Unlike the other creator type calls, here we have to create a message @@ -2475,7 +2503,8 @@ fuse_vnop_write(struct vop_write_args *ap) MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -2628,10 +2657,12 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_GETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2669,7 +2700,8 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap) if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_GETXATTR); - err = EOPNOTSUPP; + err = (EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes")); } goto out; } @@ -2715,10 +2747,12 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_SETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -2730,9 +2764,11 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) * return EOPNOTSUPP. */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "This server does not " + "implement removing extended attributess")); else - return (EINVAL); + return (EXTERROR(EINVAL, "DELETEEXTATTR should be used " + "to remove extattrs")); } err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, @@ -2778,7 +2814,8 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_SETXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes"); } if (err == ERESTART) { /* Can't restart after calling uiomove */ @@ -2889,10 +2926,12 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_LISTXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2920,7 +2959,8 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LISTXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes"); } goto out; } @@ -3020,7 +3060,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap) bool closefufh = false; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vfs_isrdonly(mp)) return (EROFS); @@ -3126,10 +3167,12 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -3158,7 +3201,8 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_REMOVEXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes"); } fdisp_destroy(&fdi); @@ -3212,7 +3256,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) /* NFS requires lookups for "." and ".." */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server is " + "missing FUSE_EXPORT_SUPPORT")); } if ((mp->mnt_flag & MNT_EXPORTED) && fsess_is_impl(mp, FUSE_OPENDIR)) @@ -3230,7 +3275,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH with FUSE_OPENDIR"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server implements " + "FUSE_OPENDIR so is not compatible with getfh")); } err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); @@ -3244,6 +3290,7 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) if (fvdat->generation <= UINT32_MAX) fhp->gen = fvdat->generation; else - return EOVERFLOW; + return (EXTERROR(EOVERFLOW, "inode generation " + "number overflow")); return (0); } diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c index e799a5ce05f6..8ab6d35a2685 100644 --- a/sys/fs/msdosfs/msdosfs_lookup.c +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -845,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn) *wait_scn = 0; pmp = target->de_pmp; - lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED); KASSERT(pmp == source->de_pmp, ("doscheckpath: source and target on different filesystems")); diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index adcffe45df82..4431d36c8a8e 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp) pmp->pm_bo = bo; lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); - lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0); TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp); @@ -871,7 +870,6 @@ error_exit: } if (pmp != NULL) { lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; @@ -971,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags) dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; return (error); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 6417b7dac16b..120b97ba72d5 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap) struct denode *fdip, *fip, *tdip, *tip, *nip; u_char toname[12], oldname[11]; u_long to_diroffset; - bool checkpath_locked, doingdirectory, newparent; + bool doingdirectory, newparent; int error; u_long cn, pcl, blkoff; daddr_t bn, wait_scn, scn; @@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap) if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); - checkpath_locked = false; - relock: doingdirectory = newparent = false; @@ -1108,12 +1106,8 @@ relock: if (doingdirectory && newparent) { if (error != 0) /* write access check above */ goto unlock; - lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL); - checkpath_locked = true; error = doscheckpath(fip, tdip, &wait_scn); if (wait_scn != 0) { - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); @@ -1276,8 +1270,6 @@ relock: cache_purge(fvp); unlock: - if (checkpath_locked) - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); vput(fdvp); vput(fvp); if (tvp != NULL) { @@ -1289,7 +1281,6 @@ unlock: vput(tdvp); return (error); releout: - MPASS(!checkpath_locked); vrele(tdvp); if (tvp != NULL) vrele(tvp); diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h index fcaac544a74d..04e6b75bea2a 100644 --- a/sys/fs/msdosfs/msdosfsmount.h +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -118,7 +118,6 @@ struct msdosfsmount { void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ struct lock pm_fatlock; /* lockmgr protecting allocations */ - struct lock pm_checkpath_lock; /* protects doscheckpath result */ struct task pm_rw2ro_task; /* context for emergency remount ro */ }; diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 0356877eaf05..7dcc83880bb9 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) vp->v_object = lowervp->v_object; vn_irflag_set(vp, VIRF_PGREAD); } + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0) + vn_irflag_set(vp, VIRF_INOTIFY); + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0) + vn_irflag_set(vp, VIRF_INOTIFY_PARENT); if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp) vp->v_vflag |= VV_ROOT; diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 8608216e10e5..74c1a8f3acb6 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); /* + * Synchronize inotify flags with the lower vnode: + * - If the upper vnode has the flag set and the lower does not, then the lower + * vnode is unwatched and the upper vnode does not need to go through + * VOP_INOTIFY. + * - If the lower vnode is watched, then the upper vnode should go through + * VOP_INOTIFY, so copy the flag up. + */ +static void +null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag) +{ + if ((vn_irflag_read(vp) & flag) != 0) { + if (__predict_false((vn_irflag_read(lvp) & flag) == 0)) + vn_irflag_unset(vp, flag); + } else if ((vn_irflag_read(lvp) & flag) != 0) { + if (__predict_false((vn_irflag_read(vp) & flag) == 0)) + vn_irflag_set(vp, flag); + } +} + +/* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as @@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap) lvp = *(vps_p[i]); /* - * Get rid of the transient hold on lvp. + * Get rid of the transient hold on lvp. Copy inotify + * flags up in case something is watching the lower + * layer. + * * If lowervp was unlocked during VOP * operation, nullfs upper vnode could have * been reclaimed, which changes its v_vnlock @@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap) * upper (reclaimed) vnode. */ if (lvp != NULLVP) { + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY); + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY_PARENT); if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE && old_vps[i]->v_vnlock != lvp->v_vnlock) { VOP_UNLOCK(lvp); diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h index aa2dfbb68745..49f002a633d2 100644 --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -981,10 +981,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1178,7 +1181,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c index 7be646f34144..b8893008944b 100644 --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -306,8 +306,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -346,7 +346,7 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c index f3e3c32a2bbf..563d1a795ae1 100644 --- a/sys/i386/linux/linux_systrace_args.c +++ b/sys/i386/linux/linux_systrace_args.c @@ -2071,12 +2071,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2410,7 +2417,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6604,9 +6613,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7172,6 +7204,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9889,8 +9928,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -10062,6 +10107,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 958336be0f08..2113ea51ac5d 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -1605,10 +1605,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1872,7 +1879,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index a48a513aa3b5..34e71a0665ed 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -659,4 +659,6 @@ struct sysent sysent[] = { { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index c8b01afeab4f..dcd38c6e6fbe 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1637,6 +1637,12 @@ uifree(struct uidinfo *uip) if (uip->ui_pipecnt != 0) printf("freeing uidinfo: uid = %d, pipecnt = %ld\n", uip->ui_uid, uip->ui_pipecnt); + if (uip->ui_inotifycnt != 0) + printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n", + uip->ui_uid, uip->ui_inotifycnt); + if (uip->ui_inotifywatchcnt != 0) + printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n", + uip->ui_uid, uip->ui_inotifywatchcnt); free(uip, M_UIDINFO); } @@ -1742,6 +1748,21 @@ chgpipecnt(struct uidinfo *uip, int diff, rlim_t max) return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt")); } +int +chginotifycnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt")); +} + +int +chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max, + "inotifywatchcnt")); +} + static int sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS) { diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 17b53208157a..35b258e68701 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -27,12 +27,12 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #include "opt_kern_tls.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/capsicum.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/ktls.h> @@ -1246,6 +1246,8 @@ out: */ if (error == 0) { td->td_retval[0] = 0; + if (sbytes > 0 && vp != NULL) + INOTIFY(vp, IN_ACCESS); } if (sent != NULL) { (*sent) = sbytes; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 4565abc4b540..a61ebfc5c7c8 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -4139,7 +4139,7 @@ coredump(struct thread *td) struct flock lf; struct vattr vattr; size_t fullpathsize; - int error, error1, locked; + int error, error1, jid, locked, ppid, sig; char *name; /* name of corefile */ void *rl_cookie; off_t limit; @@ -4168,6 +4168,10 @@ coredump(struct thread *td) PROC_UNLOCK(p); return (EFBIG); } + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, @@ -4253,6 +4257,9 @@ coredump(struct thread *td) } devctl_safe_quote_sb(sb, name); sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); if (sbuf_finish(sb) == 0) devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); out2: diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c index 7cc6fb593697..5ad5b0af1681 100644 --- a/sys/kern/subr_capability.c +++ b/sys/kern/subr_capability.c @@ -74,6 +74,10 @@ const cap_rights_t cap_getsockopt_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKOPT); const cap_rights_t cap_getsockname_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKNAME); +const cap_rights_t cap_inotify_add_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_ADD); +const cap_rights_t cap_inotify_rm_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM); const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL); const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN); const cap_rights_t cap_linkat_source_rights = diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index d31ff3b939cc..94e44d888181 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -37,16 +37,17 @@ #include "opt_capsicum.h" #include "opt_ktrace.h" -#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC +#define EXTERR_CATEGORY EXTERR_CAT_GENIO #include <sys/param.h> #include <sys/systm.h> #include <sys/sysproto.h> #include <sys/capsicum.h> +#include <sys/exterrvar.h> #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/fcntl.h> #include <sys/file.h> -#include <sys/exterrvar.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/proc.h> #include <sys/signalvar.h> @@ -195,7 +196,7 @@ sys_read(struct thread *td, struct read_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -233,7 +234,7 @@ kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -329,7 +330,7 @@ kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -396,7 +397,7 @@ sys_write(struct thread *td, struct write_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -435,7 +436,7 @@ kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -531,7 +532,7 @@ kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -602,14 +603,14 @@ kern_ftruncate(struct thread *td, int fd, off_t length) AUDIT_ARG_FD(fd); if (length < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative length")); error = fget(td, fd, &cap_ftruncate_rights, &fp); if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); if (!(fp->f_flag & FWRITE)) { fdrop(fp, td); - return (EINVAL); + return (EXTERROR(EINVAL, "non-writable")); } error = fo_truncate(fp, length, td->td_ucred, td); fdrop(fp, td); @@ -840,8 +841,10 @@ kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) int error; AUDIT_ARG_FD(fd); - if (offset < 0 || len <= 0) - return (EINVAL); + if (offset < 0) + return (EXTERROR(EINVAL, "negative offset")); + if (len <= 0) + return (EXTERROR(EINVAL, "negative length")); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); @@ -898,16 +901,21 @@ kern_fspacectl(struct thread *td, int fd, int cmd, AUDIT_ARG_FFLAGS(flags); if (rqsr == NULL) - return (EINVAL); + return (EXTERROR(EINVAL, "no range")); rmsr = *rqsr; if (rmsrp != NULL) *rmsrp = rmsr; - if (cmd != SPACECTL_DEALLOC || - rqsr->r_offset < 0 || rqsr->r_len <= 0 || - rqsr->r_offset > OFF_MAX - rqsr->r_len || - (flags & ~SPACECTL_F_SUPPORTED) != 0) - return (EINVAL); + if (cmd != SPACECTL_DEALLOC) + return (EXTERROR(EINVAL, "cmd", cmd)); + if (rqsr->r_offset < 0) + return (EXTERROR(EINVAL, "neg offset")); + if (rqsr->r_len <= 0) + return (EXTERROR(EINVAL, "neg len")); + if (rqsr->r_offset > OFF_MAX - rqsr->r_len) + return (EXTERROR(EINVAL, "offset too large")); + if ((flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EXTERROR(EINVAL, "reserved flags", flags)); error = fget_write(td, fd, &cap_pwrite_rights, &fp); if (error != 0) @@ -939,7 +947,6 @@ int kern_specialfd(struct thread *td, int type, void *arg) { struct file *fp; - struct specialfd_eventfd *ae; int error, fd, fflags; fflags = 0; @@ -948,14 +955,24 @@ kern_specialfd(struct thread *td, int type, void *arg) return (error); switch (type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd *ae; + ae = arg; if ((ae->flags & EFD_CLOEXEC) != 0) fflags |= O_CLOEXEC; error = eventfd_create_file(td, fp, ae->initval, ae->flags); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify *si; + + si = arg; + error = inotify_create_file(td, fp, si->flags, &fflags); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "invalid type", type); break; } @@ -970,13 +987,14 @@ kern_specialfd(struct thread *td, int type, void *arg) int sys___specialfd(struct thread *td, struct __specialfd_args *args) { - struct specialfd_eventfd ae; int error; switch (args->type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd ae; + if (args->len != sizeof(struct specialfd_eventfd)) { - error = EINVAL; + error = EXTERROR(EINVAL, "eventfd params ABI"); break; } error = copyin(args->req, &ae, sizeof(ae)); @@ -984,13 +1002,27 @@ sys___specialfd(struct thread *td, struct __specialfd_args *args) break; if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) != 0) { - error = EINVAL; + error = EXTERROR(EINVAL, "reserved flag"); break; } error = kern_specialfd(td, args->type, &ae); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify si; + + if (args->len != sizeof(si)) { + error = EINVAL; + break; + } + error = copyin(args->req, &si, sizeof(si)); + if (error != 0) + break; + error = kern_specialfd(td, args->type, &si); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown type", args->type); break; } return (error); @@ -1166,7 +1198,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, int error, lf, ndu; if (nd < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative ndescs")); fdp = td->td_proc->p_fd; ndu = nd; lf = fdp->fd_nfiles; @@ -1259,7 +1291,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) { - error = EINVAL; + error = EXTERROR(EINVAL, "invalid timeval"); goto done; } if (!timevalisset(&rtv)) @@ -1491,7 +1523,7 @@ sys_poll(struct thread *td, struct poll_args *uap) if (uap->timeout != INFTIM) { if (uap->timeout < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeout")); ts.tv_sec = uap->timeout / 1000; ts.tv_nsec = (uap->timeout % 1000) * 1000000; tsp = &ts; @@ -1516,7 +1548,7 @@ kern_poll_kfds(struct thread *td, struct pollfd *kfds, u_int nfds, precision = 0; if (tsp != NULL) { if (!timespecvalid_interval(tsp)) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timespec")); if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) sbt = 0; else { @@ -1619,7 +1651,7 @@ kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds, int error; if (kern_poll_maxfds(nfds)) - return (EINVAL); + return (EXTERROR(EINVAL, "too large nfds")); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else @@ -1796,7 +1828,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeval")); if (!timevalisset(&rtv)) asbt = 0; else if (rtv.tv_sec <= INT32_MAX) { @@ -2173,7 +2205,7 @@ kern_kcmp(struct thread *td, pid_t pid1, pid_t pid2, int type, (uintptr_t)p2->p_vmspace); break; default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown op"); break; } diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index fa36cc824078..90a4f3a7dad8 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -598,4 +598,6 @@ const char *syscallnames[] = { "fchroot", /* 590 = fchroot */ "setcred", /* 591 = setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 08b557a7a540..2ab17e036d5c 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3356,4 +3356,19 @@ _In_reads_bytes_(4) void *ptr ); } +593 AUE_INOTIFY STD|CAPENABLED { + int inotify_add_watch_at( + int fd, + int dfd, + _In_z_ const char *path, + uint32_t mask + ); + } +594 AUE_INOTIFY STD|CAPENABLED { + int inotify_rm_watch( + int fd, + int wd + ); + } + ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index 15789d3eb5fa..90b21616a558 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3482,6 +3482,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9317,6 +9335,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11305,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 883beaf6d1da..3d455b3874cc 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -41,6 +41,7 @@ #include <sys/counter.h> #include <sys/filedesc.h> #include <sys/fnv_hash.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -2629,6 +2630,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, atomic_store_ptr(&dvp->v_cache_dd, ncp); } else if (vp != NULL) { /* + * Take the slow path in INOTIFY(). This flag will be lazily + * cleared by cache_vop_inotify() once all directories referring + * to vp are unwatched. + */ + if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) + vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT); + + /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. @@ -4008,6 +4017,56 @@ out: return (error); } +void +cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie) +{ + struct mtx *vlp; + struct namecache *ncp; + int isdir; + bool logged, self; + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 && + (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0); + + if (self) { + int selfevent; + + if (event == _IN_ATTRIB_LINKCOUNT) + selfevent = IN_ATTRIB; + else + selfevent = event; + inotify_log(vp, NULL, 0, selfevent | isdir, cookie); + } + if ((event & IN_ALL_EVENTS) == 0) + return; + + logged = false; + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); + TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) { + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) + continue; + if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) { + /* + * XXX-MJ if the vnode has two links in the same + * dir, we'll log the same event twice. + */ + inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen, + event | isdir, cookie); + logged = true; + } + } + if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) { + /* + * We didn't find a watched directory that contains this vnode, + * so stop calling VOP_INOTIFY for operations on the vnode. + */ + vn_irflag_unset(vp, VIRF_INOTIFY_PARENT); + } + mtx_unlock(vlp); +} + #ifdef DDB static void db_print_vpath(struct vnode *vp) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index be49c0887609..2a01ec1e307e 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -39,6 +39,7 @@ #include <sys/conf.h> #include <sys/event.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/limits.h> #include <sys/lock.h> @@ -119,6 +120,8 @@ struct vop_vector default_vnodeops = { .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, + .vop_inotify = vop_stdinotify, + .vop_inotify_add_watch = vop_stdinotify_add_watch, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, @@ -1306,6 +1309,20 @@ vop_stdneed_inactive(struct vop_need_inactive_args *ap) } int +vop_stdinotify(struct vop_inotify_args *ap) +{ + vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie); + return (0); +} + +int +vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap) +{ + return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask, + ap->a_wdp, ap->a_td)); +} + +int vop_stdioctl(struct vop_ioctl_args *ap) { struct vnode *vp; diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c new file mode 100644 index 000000000000..9562350c897f --- /dev/null +++ b/sys/kern/vfs_inotify.c @@ -0,0 +1,1008 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "opt_ktrace.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/caprights.h> +#include <sys/counter.h> +#include <sys/dirent.h> +#define EXTERR_CATEGORY EXTERR_CAT_INOTIFY +#include <sys/exterrvar.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filio.h> +#include <sys/inotify.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/ktrace.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/resourcevar.h> +#include <sys/selinfo.h> +#include <sys/stat.h> +#include <sys/syscallsubr.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/syslimits.h> +#include <sys/sysproto.h> +#include <sys/tree.h> +#include <sys/user.h> +#include <sys/vnode.h> + +uint32_t inotify_rename_cookie; + +static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "inotify configuration"); + +static int inotify_max_queued_events = 16384; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, + &inotify_max_queued_events, 0, + "Maximum number of events to queue on an inotify descriptor"); + +static int inotify_max_user_instances = 256; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, + &inotify_max_user_instances, 0, + "Maximum number of inotify descriptors per user"); + +static int inotify_max_user_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, + &inotify_max_user_watches, 0, + "Maximum number of inotify watches per user"); + +static int inotify_max_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, + &inotify_max_watches, 0, + "Maximum number of inotify watches system-wide"); + +static int inotify_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, + &inotify_watches, 0, + "Total number of inotify watches currently in use"); + +static int inotify_coalesce = 1; +SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &inotify_coalesce, 0, + "Coalesce inotify events when possible"); + +static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); +SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, + &inotify_event_drops, + "Number of inotify events dropped due to limits or allocation failures"); + +static fo_rdwr_t inotify_read; +static fo_ioctl_t inotify_ioctl; +static fo_poll_t inotify_poll; +static fo_kqfilter_t inotify_kqfilter; +static fo_stat_t inotify_stat; +static fo_close_t inotify_close; +static fo_fill_kinfo_t inotify_fill_kinfo; + +static const struct fileops inotifyfdops = { + .fo_read = inotify_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = inotify_ioctl, + .fo_poll = inotify_poll, + .fo_kqfilter = inotify_kqfilter, + .fo_stat = inotify_stat, + .fo_close = inotify_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = inotify_fill_kinfo, + .fo_cmp = file_kcmp_generic, + .fo_flags = DFLAG_PASSABLE, +}; + +static void filt_inotifydetach(struct knote *kn); +static int filt_inotifyevent(struct knote *kn, long hint); + +static const struct filterops inotify_rfiltops = { + .f_isfd = 1, + .f_detach = filt_inotifydetach, + .f_event = filt_inotifyevent, +}; + +static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); + +struct inotify_record { + STAILQ_ENTRY(inotify_record) link; + struct inotify_event ev; +}; + +static uint64_t inotify_ino = 1; + +/* + * On LP64 systems this occupies 64 bytes, so we don't get internal + * fragmentation by allocating watches with malloc(9). If the size changes, + * consider using a UMA zone to improve memory efficiency. + */ +struct inotify_watch { + struct inotify_softc *sc; /* back-pointer */ + int wd; /* unique ID */ + uint32_t mask; /* event mask */ + struct vnode *vp; /* vnode being watched, refed */ + RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ + TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ +}; + +static void +inotify_init(void *arg __unused) +{ + /* Don't let a user hold too many vnodes. */ + inotify_max_user_watches = desiredvnodes / 3; + /* Don't let the system hold too many vnodes. */ + inotify_max_watches = desiredvnodes / 2; +} +SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); + +static int +inotify_watch_cmp(const struct inotify_watch *a, + const struct inotify_watch *b) +{ + if (a->wd < b->wd) + return (-1); + else if (a->wd > b->wd) + return (1); + else + return (0); +} +RB_HEAD(inotify_watch_tree, inotify_watch); +RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); + +struct inotify_softc { + struct mtx lock; /* serialize all softc writes */ + STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ + struct inotify_record overflow; /* preallocated record */ + int nextwatch; /* next watch ID to try */ + int npending; /* number of pending events */ + size_t nbpending; /* bytes available to read */ + uint64_t ino; /* unique identifier */ + struct inotify_watch_tree watches; /* active watches */ + struct selinfo sel; /* select/poll/kevent info */ + struct ucred *cred; /* credential ref */ +}; + +static struct inotify_record * +inotify_dequeue(struct inotify_softc *sc) +{ + struct inotify_record *rec; + + mtx_assert(&sc->lock, MA_OWNED); + KASSERT(!STAILQ_EMPTY(&sc->pending), + ("%s: queue for %p is empty", __func__, sc)); + + rec = STAILQ_FIRST(&sc->pending); + STAILQ_REMOVE_HEAD(&sc->pending, link); + sc->npending--; + sc->nbpending -= sizeof(rec->ev) + rec->ev.len; + return (rec); +} + +static void +inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) +{ + mtx_assert(&sc->lock, MA_OWNED); + + if (head) + STAILQ_INSERT_HEAD(&sc->pending, rec, link); + else + STAILQ_INSERT_TAIL(&sc->pending, rec, link); + sc->npending++; + sc->nbpending += sizeof(rec->ev) + rec->ev.len; +} + +static int +inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, + struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + int error; + bool first; + + sc = fp->f_data; + error = 0; + + mtx_lock(&sc->lock); + while (STAILQ_EMPTY(&sc->pending)) { + if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&sc->lock); + return (EWOULDBLOCK); + } + error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); + if (error != 0) { + mtx_unlock(&sc->lock); + return (error); + } + } + for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { + size_t len; + + rec = inotify_dequeue(sc); + len = sizeof(rec->ev) + rec->ev.len; + if (uio->uio_resid < (ssize_t)len) { + inotify_enqueue(sc, rec, true); + if (first) { + error = EXTERROR(EINVAL, + "read buffer is too small"); + } + break; + } + mtx_unlock(&sc->lock); + error = uiomove(&rec->ev, len, uio); +#ifdef KTRACE + if (error == 0 && KTRPOINT(td, KTR_STRUCT)) + ktrstruct("inotify", &rec->ev, len); +#endif + mtx_lock(&sc->lock); + if (error != 0) { + inotify_enqueue(sc, rec, true); + mtx_unlock(&sc->lock); + return (error); + } + if (rec == &sc->overflow) { + /* + * Signal to inotify_queue_record() that the overflow + * record can be reused. + */ + memset(rec, 0, sizeof(*rec)); + } else { + free(rec, M_INOTIFY); + } + } + mtx_unlock(&sc->lock); + return (error); +} + +static int +inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, + struct thread *td) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + switch (com) { + case FIONREAD: + *(int *)data = (int)sc->nbpending; + return (0); + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (ENOTTY); + } + + return (0); +} + +static int +inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) +{ + struct inotify_softc *sc; + int revents; + + sc = fp->f_data; + revents = 0; + + mtx_lock(&sc->lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(td, &sc->sel); + mtx_unlock(&sc->lock); + return (revents); +} + +static void +filt_inotifydetach(struct knote *kn) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + knlist_remove(&sc->sel.si_note, kn, 0); +} + +static int +filt_inotifyevent(struct knote *kn, long hint) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + mtx_assert(&sc->lock, MA_OWNED); + kn->kn_data = sc->nbpending; + return (kn->kn_data > 0); +} + +static int +inotify_kqfilter(struct file *fp, struct knote *kn) +{ + struct inotify_softc *sc; + + if (kn->kn_filter != EVFILT_READ) + return (EINVAL); + sc = fp->f_data; + kn->kn_fop = &inotify_rfiltops; + kn->kn_hook = sc; + knlist_add(&sc->sel.si_note, kn, 0); + return (0); +} + +static int +inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + memset(sb, 0, sizeof(*sb)); + sb->st_mode = S_IFREG | S_IRUSR; + sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); + mtx_lock(&sc->lock); + sb->st_size = sc->nbpending; + sb->st_blocks = sc->npending; + sb->st_uid = sc->cred->cr_ruid; + sb->st_gid = sc->cred->cr_rgid; + sb->st_ino = sc->ino; + mtx_unlock(&sc->lock); + return (0); +} + +static void +inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) +{ + struct vnode *vp; + + vp = watch->vp; + mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); + + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + + TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); + if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) + vn_irflag_unset_locked(vp, VIRF_INOTIFY); +} + +/* + * Assumes that the watch has already been removed from its softc. + */ +static void +inotify_remove_watch(struct inotify_watch *watch) +{ + struct inotify_softc *sc; + struct vnode *vp; + + sc = watch->sc; + + vp = watch->vp; + mtx_lock(&vp->v_pollinfo->vpi_lock); + inotify_unlink_watch_locked(sc, watch); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + vrele(vp); + free(watch, M_INOTIFY); +} + +static int +inotify_close(struct file *fp, struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch *watch; + + sc = fp->f_data; + + mtx_lock(&sc->lock); + (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); + while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + mtx_unlock(&sc->lock); + inotify_remove_watch(watch); + mtx_lock(&sc->lock); + } + while (!STAILQ_EMPTY(&sc->pending)) { + rec = inotify_dequeue(sc); + if (rec != &sc->overflow) + free(rec, M_INOTIFY); + } + mtx_unlock(&sc->lock); + seldrain(&sc->sel); + knlist_destroy(&sc->sel.si_note); + mtx_destroy(&sc->lock); + crfree(sc->cred); + free(sc, M_INOTIFY); + return (0); +} + +static int +inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + kif->kf_type = KF_TYPE_INOTIFY; + kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; + kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; + return (0); +} + +int +inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) +{ + struct inotify_softc *sc; + int fflags; + + if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) + return (EINVAL); + + if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, + inotify_max_user_instances)) + return (EMFILE); + + sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); + sc->nextwatch = 1; /* Required for compatibility. */ + STAILQ_INIT(&sc->pending); + RB_INIT(&sc->watches); + mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); + knlist_init_mtx(&sc->sel.si_note, &sc->lock); + sc->cred = crhold(td->td_ucred); + sc->ino = atomic_fetchadd_64(&inotify_ino, 1); + + fflags = FREAD; + if ((flags & IN_NONBLOCK) != 0) + fflags |= FNONBLOCK; + if ((flags & IN_CLOEXEC) != 0) + *fflagsp |= O_CLOEXEC; + finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); + + return (0); +} + +static struct inotify_record * +inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, + uint32_t cookie, int waitok) +{ + struct inotify_event *evp; + struct inotify_record *rec; + + rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, + waitok | M_ZERO); + if (rec == NULL) + return (NULL); + evp = &rec->ev; + evp->wd = wd; + evp->mask = event; + evp->cookie = cookie; + evp->len = _IN_NAMESIZE(namelen); + if (name != NULL) + memcpy(evp->name, name, namelen); + return (rec); +} + +static bool +inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) +{ + struct inotify_record *prev; + + mtx_assert(&sc->lock, MA_OWNED); + + prev = STAILQ_LAST(&sc->pending, inotify_record, link); + return (prev != NULL && prev->ev.mask == evp->mask && + prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && + prev->ev.len == evp->len && + (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); +} + +static void +inotify_overflow_event(struct inotify_event *evp) +{ + evp->mask = IN_Q_OVERFLOW; + evp->wd = -1; + evp->cookie = 0; + evp->len = 0; +} + +/* + * Put an event record on the queue for an inotify desscriptor. Return false if + * the record was not enqueued for some reason, true otherwise. + */ +static bool +inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) +{ + struct inotify_event *evp; + + mtx_assert(&sc->lock, MA_OWNED); + + evp = &rec->ev; + if (__predict_false(rec == &sc->overflow)) { + /* + * Is the overflow record already in the queue? If so, there's + * not much else we can do: we're here because a kernel memory + * shortage prevented new record allocations. + */ + counter_u64_add(inotify_event_drops, 1); + if (evp->mask == IN_Q_OVERFLOW) + return (false); + inotify_overflow_event(evp); + } else { + /* Try to coalesce duplicate events. */ + if (inotify_coalesce && inotify_can_coalesce(sc, evp)) + return (false); + + /* + * Would this one overflow the queue? If so, convert it to an + * overflow event and try again to coalesce. + */ + if (sc->npending >= inotify_max_queued_events) { + counter_u64_add(inotify_event_drops, 1); + inotify_overflow_event(evp); + if (inotify_can_coalesce(sc, evp)) + return (false); + } + } + inotify_enqueue(sc, rec, false); + selwakeup(&sc->sel); + KNOTE_LOCKED(&sc->sel.si_note, 0); + wakeup(&sc->pending); + return (true); +} + +static int +inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, + int event, uint32_t cookie) +{ + struct inotify_watch key; + struct inotify_softc *sc; + struct inotify_record *rec; + int relecount; + bool allocfail; + + relecount = 0; + + sc = watch->sc; + rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, + M_NOWAIT); + if (rec == NULL) { + rec = &sc->overflow; + allocfail = true; + } else { + allocfail = false; + } + + mtx_lock(&sc->lock); + if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) + free(rec, M_INOTIFY); + if ((watch->mask & IN_ONESHOT) != 0 || + (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { + if (!allocfail) { + rec = inotify_alloc_record(watch->wd, NULL, 0, + IN_IGNORED, 0, M_NOWAIT); + if (rec == NULL) + rec = &sc->overflow; + if (!inotify_queue_record(sc, rec) && + rec != &sc->overflow) + free(rec, M_INOTIFY); + } + + /* + * Remove the watch, taking care to handle races with + * inotify_close(). + */ + key.wd = watch->wd; + if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + inotify_unlink_watch_locked(sc, watch); + free(watch, M_INOTIFY); + + /* Defer vrele() to until locks are dropped. */ + relecount++; + } + } + mtx_unlock(&sc->lock); + return (relecount); +} + +void +inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, + uint32_t cookie) +{ + struct inotify_watch *watch, *tmp; + int relecount; + + KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, + ("inotify_log: invalid event %#x", event)); + + relecount = 0; + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { + KASSERT(watch->vp == vp, + ("inotify_log: watch %p vp != vp", watch)); + if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { + relecount += inotify_log_one(watch, name, namelen, event, + cookie); + } + } + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + for (int i = 0; i < relecount; i++) + vrele(vp); +} + +/* + * An inotify event occurred on a watched vnode. + */ +void +vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, + int event, uint32_t cookie) +{ + int isdir; + + VNPASS(vp->v_holdcnt > 0, vp); + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + + if (dvp != NULL) { + VNPASS(dvp->v_holdcnt > 0, dvp); + + /* + * Should we log an event for the vnode itself? + */ + if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { + int selfevent; + + switch (event) { + case _IN_MOVE_DELETE: + case IN_DELETE: + /* + * IN_DELETE_SELF is only generated when the + * last hard link of a file is removed. + */ + selfevent = IN_DELETE_SELF; + if (vp->v_type != VDIR) { + struct vattr va; + int error; + + error = VOP_GETATTR(vp, &va, cnp->cn_cred); + if (error == 0 && va.va_nlink != 0) + selfevent = 0; + } + break; + case IN_MOVED_FROM: + cookie = 0; + selfevent = IN_MOVE_SELF; + break; + case _IN_ATTRIB_LINKCOUNT: + selfevent = IN_ATTRIB; + break; + default: + selfevent = event; + break; + } + + if ((selfevent & ~_IN_DIR_EVENTS) != 0) { + inotify_log(vp, NULL, 0, selfevent | isdir, + cookie); + } + } + + /* + * Something is watching the directory through which this vnode + * was referenced, so we may need to log the event. + */ + if ((event & IN_ALL_EVENTS) != 0 && + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { + inotify_log(dvp, cnp->cn_nameptr, + cnp->cn_namelen, event | isdir, cookie); + } + } else { + /* + * We don't know which watched directory might contain the + * vnode, so we have to fall back to searching the name cache. + */ + cache_vop_inotify(vp, event, cookie); + } +} + +int +vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, + uint32_t *wdp, struct thread *td) +{ + struct inotify_watch *watch, *watch1; + uint32_t wd; + + /* + * If this is a directory, make sure all of its entries are present in + * the name cache so that we're able to look them up if an event occurs. + * The persistent reference on the directory prevents the outgoing name + * cache entries from being reclaimed. + */ + if (vp->v_type == VDIR) { + struct dirent *dp; + char *buf; + off_t off; + size_t buflen, len; + int eof, error; + + buflen = 128 * sizeof(struct dirent); + buf = malloc(buflen, M_TEMP, M_WAITOK); + + error = 0; + len = off = eof = 0; + for (;;) { + struct nameidata nd; + + error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, + &len, &off, &eof); + if (error != 0) + break; + if (len == 0) + /* Finished reading. */ + break; + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * namei() consumes a reference on the starting + * directory if it's specified as a vnode. + */ + vrefact(vp); + NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, + dp->d_name, vp); + error = namei(&nd); + if (error != 0) + break; + vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); + vrele(nd.ni_vp); + } + free(buf, M_TEMP); + if (error != 0) + return (error); + } + + /* + * The vnode referenced in kern_inotify_add_watch() might be different + * than this one if nullfs is in the picture. + */ + vrefact(vp); + watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); + watch->sc = sc; + watch->vp = vp; + watch->mask = mask; + + /* + * Are we updating an existing watch? Search the vnode's list rather + * than that of the softc, as the former is likely to be shorter. + */ + v_addpollinfo(vp); + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { + if (watch1->sc == sc) + break; + } + mtx_lock(&sc->lock); + if (watch1 != NULL) { + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + /* + * We found an existing watch, update it based on our flags. + */ + if ((mask & IN_MASK_CREATE) != 0) { + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EEXIST); + } + if ((mask & IN_MASK_ADD) != 0) + watch1->mask |= mask; + else + watch1->mask = mask; + *wdp = watch1->wd; + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EJUSTRETURN); + } + + /* + * We're creating a new watch. Add it to the softc and vnode watch + * lists. + */ + do { + struct inotify_watch key; + + /* + * Search for the next available watch descriptor. This is + * implemented so as to avoid reusing watch descriptors for as + * long as possible. + */ + key.wd = wd = sc->nextwatch++; + watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); + } while (watch1 != NULL || wd == 0); + watch->wd = wd; + RB_INSERT(inotify_watch_tree, &sc->watches, watch); + TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); + mtx_unlock(&sc->lock); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + vn_irflag_set_cond(vp, VIRF_INOTIFY); + + *wdp = wd; + + return (0); +} + +void +vn_inotify_revoke(struct vnode *vp) +{ + if (vp->v_pollinfo == NULL) { + /* This is a nullfs vnode which shadows a watched vnode. */ + return; + } + inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); +} + +static int +fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, + struct file **fpp) +{ + struct file *fp; + int error; + + error = fget(td, fd, needrightsp, &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_INOTIFY) { + fdrop(fp, td); + return (EINVAL); + } + *fpp = fp; + return (0); +} + +int +kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, + struct thread *td) +{ + struct nameidata nd; + struct file *fp; + struct inotify_softc *sc; + struct vnode *vp; + uint32_t wd; + int count, error; + + fp = NULL; + vp = NULL; + + if ((mask & IN_ALL_EVENTS) == 0) + return (EXTERROR(EINVAL, "no events specified")); + if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == + (IN_MASK_ADD | IN_MASK_CREATE)) + return (EXTERROR(EINVAL, + "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); + if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) + return (EXTERROR(EINVAL, "unrecognized flag")); + + error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + NDINIT_AT(&nd, LOOKUP, + ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | + LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); + error = namei(&nd); + if (error != 0) + goto out; + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + + error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); + if (error != 0) + goto out; + + if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + + count = atomic_fetchadd_int(&inotify_watches, 1); + if (count > inotify_max_watches) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, + inotify_max_user_watches)) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); + if (error != 0) { + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + if (error == EJUSTRETURN) { + /* We updated an existing watch, everything is ok. */ + error = 0; + } else { + goto out; + } + } + td->td_retval[0] = wd; + +out: + if (vp != NULL) + vput(vp); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_add_watch_at(struct thread *td, + struct inotify_add_watch_at_args *uap) +{ + return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, + uap->mask, td)); +} + +int +kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) +{ + struct file *fp; + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch key, *watch; + int error; + + error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); + + /* + * For compatibility with Linux, we do not remove pending events + * associated with the watch. Watch descriptors are implemented so as + * to avoid being reused for as long as possible, so one hopes that any + * pending events from the removed watch descriptor will be removed + * before the watch descriptor is recycled. + */ + key.wd = wd; + mtx_lock(&sc->lock); + watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); + if (watch == NULL) { + free(rec, M_INOTIFY); + error = EINVAL; + } else { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + if (!inotify_queue_record(sc, rec)) { + free(rec, M_INOTIFY); + error = 0; + } + } + mtx_unlock(&sc->lock); + if (watch != NULL) + inotify_remove_watch(watch); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) +{ + return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); +} diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 86c7bdaa02c0..fb3e6a7a2534 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -75,14 +75,20 @@ static void NDVALIDATE_impl(struct nameidata *, int); #endif /* + * Reset ndp to its original state. + */ +#define NDRESET(ndp) do { \ + NDREINIT_DBG(ndp); \ + ndp->ni_resflags = 0; \ + ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ +} while (0) +/* * Prepare namei() to restart. Reset components to its original state and set * ISRESTARTED flag which signals the underlying lookup code to change the root * from ABI root to actual root and prevents a further restarts. */ #define NDRESTART(ndp) do { \ - NDREINIT_DBG(ndp); \ - ndp->ni_resflags = 0; \ - ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ + NDRESET(ndp); \ ndp->ni_cnd.cn_flags |= ISRESTARTED; \ } while (0) @@ -162,8 +168,8 @@ static struct vop_vector crossmp_vnodeops = { */ struct nameicap_tracker { - struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; + struct mount *mp; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ @@ -192,49 +198,75 @@ SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, "enables \"..\" components in path lookup in capability mode " "on non-local mount"); -static void +static int nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; + struct mount *mp; + int error; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) - return; + return (0); + mp = NULL; + error = VOP_GETWRITEMOUNT(dp, &mp); + if (error != 0) + return (error); nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); - if (nt != NULL && nt->dp == dp) - return; + if (nt != NULL && nt->mp == mp) { + vfs_rel(mp); + return (0); + } nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); - vhold(dp); - nt->dp = dp; - TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + nt->mp = mp; + error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0); + if (error != 0) { + MPASS(ndp->ni_nctrack_mnt == NULL); + ndp->ni_nctrack_mnt = mp; + free(nt, M_NAMEITRACKER); + error = ERESTART; + } else { + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + } + return (error); } static void -nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) +nameicap_cleanup(struct nameidata *ndp, int error) { struct nameicap_tracker *nt, *nt1; + struct mount *mp; + + KASSERT((ndp->ni_nctrack_mnt == NULL && + TAILQ_EMPTY(&ndp->ni_cap_tracker)) || + (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, + ("tracker active and not strictrelative")); - nt = first; - TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + mp = nt->mp; + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); - vdrop(nt->dp); free(nt, M_NAMEITRACKER); } -} -static void -nameicap_cleanup(struct nameidata *ndp) -{ - KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || - (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); - nameicap_cleanup_from(ndp, NULL); + mp = ndp->ni_nctrack_mnt; + if (mp != NULL) { + if (error == ERESTART) { + lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + } + vfs_rel(mp); + ndp->ni_nctrack_mnt = NULL; + } } /* - * For dotdot lookups in capability mode, only allow the component - * lookup to succeed if the resulting directory was already traversed - * during the operation. This catches situations where already - * traversed directory is moved to different parent, and then we walk - * over it with dotdots. + * For dotdot lookups in capability mode, disallow walking over the + * directory no_rbeneath_dpp that was used as the starting point of + * the lookup. Since we take the mnt_renamelocks of all mounts we + * ever walked over during lookup, parallel renames are disabled. + * This prevents the situation where we circumvent walk over + * ni_rbeneath_dpp following dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to @@ -243,7 +275,6 @@ nameicap_cleanup(struct nameidata *ndp) static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { - struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & @@ -253,22 +284,16 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) - return (ENOTCAPABLE); + goto violation; + if (dp == ndp->ni_rbeneath_dpp) + goto violation; mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) - goto capfail; - TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, - nm_link) { - if (dp == nt->dp) { - nt = TAILQ_NEXT(nt, nm_link); - if (nt != NULL) - nameicap_cleanup_from(ndp, nt); - return (0); - } - } + goto violation; + return (0); -capfail: +violation: if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); return (ENOTCAPABLE); @@ -394,6 +419,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) NI_LCF_CAP_DOTDOT; } } + if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0) + ndp->ni_rbeneath_dpp = *dpp; /* * If we are auditing the kernel pathname, save the user pathname. @@ -631,6 +658,7 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); @@ -661,12 +689,12 @@ restart: else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && (cnp->cn_flags & ISRESTARTED) == 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ERESTART); NDRESTART(ndp); goto restart; } return (error); case CACHE_FPL_STATUS_PARTIAL: - TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: @@ -674,18 +702,21 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } cnp->cn_nameptr = cnp->cn_pnbuf; /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: - TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); if (*cnp->cn_pnbuf == '\0') { if ((cnp->cn_flags & EMPTYPATH) != 0) { - return (namei_emptypath(ndp)); + error = namei_emptypath(ndp); + nameicap_cleanup(ndp, error); + return (error); } namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ENOENT); SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL, false, ndp); return (ENOENT); @@ -693,6 +724,7 @@ restart: error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } break; @@ -705,16 +737,23 @@ restart: ndp->ni_startdir = dp; error = vfs_lookup(ndp); if (error != 0) { - if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && - error == ENOENT && - (cnp->cn_flags & ISRESTARTED) == 0)) { - nameicap_cleanup(ndp); - pwd_drop(pwd); - namei_cleanup_cnp(cnp); - NDRESTART(ndp); - goto restart; - } else + uint64_t was_restarted; + bool abi_restart; + + was_restarted = ndp->ni_cnd.cn_flags & + ISRESTARTED; + abi_restart = pwd->pwd_adir != pwd->pwd_rdir && + error == ENOENT && was_restarted == 0; + if (error != ERESTART && !abi_restart) goto out; + nameicap_cleanup(ndp, error); + pwd_drop(pwd); + namei_cleanup_cnp(cnp); + NDRESET(ndp); + if (abi_restart) + was_restarted = ISRESTARTED; + ndp->ni_cnd.cn_flags |= was_restarted; + goto restart; } /* @@ -723,7 +762,7 @@ restart: if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, false, ndp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, 0); pwd_drop(pwd); NDVALIDATE(ndp); return (0); @@ -756,10 +795,10 @@ restart: ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: - MPASS(error != 0); + MPASS(error != 0 && error != ERESTART); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, error); pwd_drop(pwd); return (error); } @@ -1185,7 +1224,9 @@ dirloop: } } - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; /* * Make sure degenerate names don't get here, their handling was @@ -1210,9 +1251,7 @@ dirloop: * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape - * from the initial file descriptor directory. Checks are - * done by ensuring that namei() already traversed the - * result of dotdot lookup. + * from the initial file descriptor directory. */ if (cnp->cn_flags & ISDOTDOT) { if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR | @@ -1238,7 +1277,7 @@ dirloop: NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) { error = ENOTCAPABLE; - goto capdotdot; + goto bad; } } if (isroot || ((dp->v_vflag & VV_ROOT) != 0 && @@ -1261,11 +1300,6 @@ dirloop: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - error = nameicap_check_dotdot(ndp, dp); - if (error != 0) { -capdotdot: - goto bad; - } } } @@ -1314,7 +1348,9 @@ unionlookup: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; goto unionlookup; } @@ -1415,7 +1451,7 @@ nextname: goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { - error = nameicap_check_dotdot(ndp, ndp->ni_vp); + error = nameicap_check_dotdot(ndp, ndp->ni_dvp); if (error != 0) goto bad2; } @@ -1485,8 +1521,11 @@ success: } success_right_lock: if (ndp->ni_vp != NULL) { - if ((cnp->cn_flags & ISDOTDOT) == 0) - nameicap_tracker_add(ndp, ndp->ni_vp); + if ((cnp->cn_flags & ISDOTDOT) == 0) { + error = nameicap_tracker_add(ndp, ndp->ni_vp); + if (error != 0) + goto bad2; + } if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) return (vfs_lookup_failifexists(ndp)); } diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index cb18468d28bc..8e64a7fe966b 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -156,6 +156,7 @@ mount_init(void *mem, int size, int flags) mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); + lockinit(&mp->mnt_renamelock, PVFS, "rename", 0, 0); mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; @@ -170,6 +171,7 @@ mount_fini(void *mem, int size) mp = (struct mount *)mem; uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu); + lockdestroy(&mp->mnt_renamelock); lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_listmtx); mtx_destroy(&mp->mnt_mtx); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index dc2fb59fb81c..918b256e6c59 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -38,7 +38,6 @@ * External virtual filesystem routines */ -#include <sys/cdefs.h> #include "opt_ddb.h" #include "opt_watchdog.h" @@ -57,6 +56,7 @@ #include <sys/extattr.h> #include <sys/file.h> #include <sys/fcntl.h> +#include <sys/inotify.h> #include <sys/jail.h> #include <sys/kdb.h> #include <sys/kernel.h> @@ -5246,7 +5246,8 @@ destroy_vpollinfo_free(struct vpollinfo *vi) static void destroy_vpollinfo(struct vpollinfo *vi) { - + KASSERT(TAILQ_EMPTY(&vi->vpi_inotify), + ("%s: pollinfo %p has lingering watches", __func__, vi)); knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); @@ -5260,12 +5261,13 @@ v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; - if (vp->v_pollinfo != NULL) + if (atomic_load_ptr(&vp->v_pollinfo) != NULL) return; vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_lock); + TAILQ_INIT(&vi->vpi_inotify); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); @@ -5851,6 +5853,8 @@ vop_rename_pre(void *ap) struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS + struct mount *tmp; + if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); @@ -5868,6 +5872,11 @@ vop_rename_pre(void *ap) if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); + + tmp = NULL; + VOP_GETWRITEMOUNT(a->a_tdvp, &tmp); + lockmgr_assert(&tmp->mnt_renamelock, KA_XLOCKED); + vfs_rel(tmp); #endif /* * It may be tempting to add vn_seqc_write_begin/end calls here and @@ -6057,6 +6066,28 @@ vop_need_inactive_debugpost(void *ap, int rc) #endif void +vop_allocate_post(void *ap, int rc) +{ + struct vop_allocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); +} + +void +vop_copy_file_range_post(void *ap, int rc) +{ + struct vop_copy_file_range_args *a; + + a = ap; + if (rc == 0) { + INOTIFY(a->a_invp, IN_ACCESS); + INOTIFY(a->a_outvp, IN_MODIFY); + } +} + +void vop_create_pre(void *ap) { struct vop_create_args *a; @@ -6076,8 +6107,20 @@ vop_create_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } +} + +void +vop_deallocate_post(void *ap, int rc) +{ + struct vop_deallocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); } void @@ -6122,8 +6165,10 @@ vop_deleteextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6153,6 +6198,8 @@ vop_link_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_LINK); VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE); + INOTIFY_NAME(vp, tdvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE); } } @@ -6176,8 +6223,10 @@ vop_mkdir_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } #ifdef DEBUG_VFS_LOCKS @@ -6212,8 +6261,10 @@ vop_mknod_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6225,8 +6276,10 @@ vop_reclaim_post(void *ap, int rc) a = ap; vp = a->a_vp; ASSERT_VOP_IN_SEQC(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_REVOKE); + INOTIFY_REVOKE(vp); + } } void @@ -6257,6 +6310,8 @@ vop_remove_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6288,6 +6343,8 @@ vop_rename_post(void *ap, int rc) VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); + INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tvp, + a->a_tdvp, a->a_tcnp); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); @@ -6327,6 +6384,7 @@ vop_rmdir_post(void *ap, int rc) vp->v_vflag |= VV_UNLINKED; VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6350,8 +6408,10 @@ vop_setattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6396,8 +6456,10 @@ vop_setextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6420,8 +6482,10 @@ vop_symlink_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6429,8 +6493,10 @@ vop_open_post(void *ap, int rc) { struct vop_open_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); + INOTIFY(a->a_vp, IN_OPEN); + } } void @@ -6442,6 +6508,8 @@ vop_close_post(void *ap, int rc) !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); + INOTIFY(a->a_vp, (a->a_fflag & FWRITE) != 0 ? + IN_CLOSE_WRITE : IN_CLOSE_NOWRITE); } } @@ -6450,8 +6518,10 @@ vop_read_post(void *ap, int rc) { struct vop_read_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } void @@ -6468,8 +6538,10 @@ vop_readdir_post(void *ap, int rc) { struct vop_readdir_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } static struct knlist fs_knlist; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c236f241bf20..d880733cbfe7 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3766,7 +3766,7 @@ int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg) { - struct mount *mp = NULL; + struct mount *mp, *tmp; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; uint64_t tondflags; @@ -3774,6 +3774,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, short irflag; again: + tmp = mp = NULL; bwillwrite(); #ifdef MAC if (mac_vnode_check_rename_from_enabled()) { @@ -3809,6 +3810,7 @@ again: tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { +again1: NDFREE_PNBUF(&fromnd); NDFREE_PNBUF(&tond); if (tvp != NULL) @@ -3819,11 +3821,25 @@ again: vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); + if (tmp != NULL) { + lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); + vfs_rel(tmp); + tmp = NULL; + } error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); if (error != 0) return (error); goto again; } + error = VOP_GETWRITEMOUNT(tdvp, &tmp); + if (error != 0 || tmp == NULL) + goto again1; + error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); + if (error != 0) { + vn_finished_write(mp); + goto again1; + } irflag = vn_irflag_read(fvp); if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || (irflag & VIRF_NAMEDDIR) != 0) { @@ -3884,6 +3900,8 @@ out: vrele(fromnd.ni_dvp); vrele(fvp); } + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(tmp); vn_finished_write(mp); out1: if (error == ERESTART) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 7487f93e4880..6451c9e07a60 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -52,6 +52,7 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/ktr.h> #include <sys/ktrace.h> #include <sys/limits.h> @@ -308,7 +309,8 @@ restart: NDREINIT(ndp); goto restart; } - if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) + if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 || + (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, @@ -484,6 +486,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, if (vp->v_type != VFIFO && vp->v_type != VSOCK && VOP_ACCESS(vp, VREAD, cred, td) == 0) fp->f_flag |= FKQALLOWED; + INOTIFY(vp, IN_OPEN); return (0); } @@ -1746,6 +1749,8 @@ vn_truncate_locked(struct vnode *vp, off_t length, bool sync, vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, cred); VOP_ADD_WRITECOUNT_CHECKED(vp, -1); + if (error == 0) + INOTIFY(vp, IN_MODIFY); } return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index a2b6a7c8ff9f..38138a4af921 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -702,6 +702,7 @@ vop_vptocnp { %% allocate vp E E E +%! allocate post vop_allocate_post vop_allocate { IN struct vnode *vp; @@ -786,6 +787,7 @@ vop_fdatasync { %% copy_file_range invp U U U %% copy_file_range outvp U U U +%! copy_file_range post vop_copy_file_range_post vop_copy_file_range { IN struct vnode *invp; @@ -810,6 +812,7 @@ vop_vput_pair { %% deallocate vp L L L +%! deallocate post vop_deallocate_post vop_deallocate { IN struct vnode *vp; @@ -821,6 +824,27 @@ vop_deallocate { }; +%% inotify vp - - - + +vop_inotify { + IN struct vnode *vp; + IN struct vnode *dvp; + IN struct componentname *cnp; + IN int event; + IN uint32_t cookie; +}; + + +%% inotify_add_watch vp L L L + +vop_inotify_add_watch { + IN struct vnode *vp; + IN struct inotify_softc *sc; + IN uint32_t mask; + OUT uint32_t *wdp; + IN struct thread *td; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile index 6e0fea6efc3a..9774c3da61ee 100644 --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -4,6 +4,7 @@ DEVIWLWIFIDIR= ${SRCTOP}/sys/contrib/dev/iwlwifi WITH_CONFIG_PM= 0 WITH_DEBUGFS= 1 +WITH_CONFIG_ACPI= 1 KMOD= if_iwlwifi @@ -40,6 +41,12 @@ CFLAGS+= -DCONFIG_PM CFLAGS+= -DCONFIG_PM_SLEEP .endif +.if defined(WITH_CONFIG_ACPI) && ${WITH_CONFIG_ACPI} > 0 +SRCS+= fw/acpi.c +CFLAGS+= -DCONFIG_ACPI +CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI +.endif + SRCS+= iwl-devtrace.c # Other @@ -56,7 +63,6 @@ CFLAGS+= -DCONFIG_IWLMVM=1 # Helpful after fresh imports. #CFLAGS+= -ferror-limit=0 -#CFLAGS+= -DCONFIG_ACPI=1 #CFLAGS+= -DCONFIG_INET=1 # Need LKPI TSO implementation. #CFLAGS+= -DCONFIG_IPV6=1 CFLAGS+= -DCONFIG_IWLWIFI_DEBUG=1 diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index 6eefedba8775..cf4f75bd0b6c 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -81,6 +81,23 @@ struct ether_addr { (addr)[3] | (addr)[4] | (addr)[5]) == 0x00) /* + * 802.1q VID constants from IEEE 802.1Q-2014, table 9-2. + */ + +/* Null VID: The tag contains only PCP (priority) and DEI information. */ +#define DOT1Q_VID_NULL 0x0 +/* The default PVID for a bridge port. NB: bridge(4) does not honor this. */ +#define DOT1Q_VID_DEF_PVID 0x1 +/* The default SR_PVID for SRP Stream related traffic. */ +#define DOT1Q_VID_DEF_SR_PVID 0x2 +/* A VID reserved for implementation use, not permitted on the wire. */ +#define DOT1Q_VID_RSVD_IMPL 0xfff +/* The lowest valid VID. */ +#define DOT1Q_VID_MIN 0x1 +/* The highest valid VID. */ +#define DOT1Q_VID_MAX 0xffe + +/* * This is the type of the VLAN ID inside the tag, not the tag itself. */ typedef uint16_t ether_vlanid_t; diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index bc421a8e156d..5b3ee740d75e 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -254,6 +254,8 @@ struct bridge_iflist { uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; + ether_vlanid_t bif_untagged; /* untagged vlan id */ + ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */ }; /* @@ -331,13 +333,12 @@ static void bridge_inject(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, - struct mbuf *); + struct mbuf *, struct bridge_iflist *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static bool bridge_member_ifaddrs(void); - static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, @@ -353,6 +354,9 @@ static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, ether_vlanid_t); +static bool bridge_vfilter_in(const struct bridge_iflist *, struct mbuf *); +static bool bridge_vfilter_out(const struct bridge_iflist *, + const struct mbuf *); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); @@ -400,6 +404,9 @@ static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); +static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *); +static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); +static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); @@ -618,6 +625,14 @@ static const struct bridge_control bridge_control_table[] = { { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + { bridge_ioctl_sifuntagged, sizeof(struct ifbreq), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_gifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_COPYOUT }, }; static const int bridge_control_table_size = nitems(bridge_control_table); @@ -832,6 +847,7 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len, ifp->if_softc = sc; if_initname(ifp, bridge_name, ifd->unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_capabilities = ifp->if_capenable = IFCAP_VLAN_HWTAGGING; ifp->if_ioctl = bridge_ioctl; #ifdef ALTQ ifp->if_start = bridge_altq_start; @@ -954,6 +970,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; + struct ifbif_vlan_req ifvlanreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; @@ -1495,6 +1512,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; + req->ifbr_untagged = bif->bif_untagged; /* Copy STP state options as flags */ if (bp->bp_operedge) @@ -1873,6 +1891,84 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) } static int +bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg) +{ + struct ifbreq *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbr_ifsname); + if (bif == NULL) + return (ENOENT); + + if (req->ifbr_untagged > DOT1Q_VID_MAX) + return (EINVAL); + + if (req->ifbr_untagged != DOT1Q_VID_NULL) + bif->bif_flags |= IFBIF_VLANFILTER; + bif->bif_untagged = req->ifbr_untagged; + return (0); +} + +static int +bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + /* Reject invalid VIDs. */ + if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || + BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) + return (EINVAL); + + switch (req->bv_op) { + /* Replace the existing vlan set with the new set */ + case BRDG_VLAN_OP_SET: + BIT_COPY(BRVLAN_SETSIZE, &req->bv_set, &bif->bif_vlan_set); + break; + + /* Modify the existing vlan set to add the given vlans */ + case BRDG_VLAN_OP_ADD: + BIT_OR(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Modify the existing vlan set to remove the given vlans */ + case BRDG_VLAN_OP_DEL: + BIT_ANDNOT(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Invalid or unknown operation */ + default: + return (EINVAL); + } + + /* + * The only reason to modify the VLAN access list is to use VLAN + * filtering on this interface, so enable it automatically. + */ + bif->bif_flags |= IFBIF_VLANFILTER; + + return (0); +} + +static int +bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + return (0); +} + +static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; @@ -2150,12 +2246,25 @@ bridge_stop(struct ifnet *ifp, int disable) * */ static int -bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) +bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, + struct bridge_iflist *bif) { int len, err = 0; short mflags; struct mbuf *m0; + /* + * Find the bridge member port this packet is being sent on, if the + * caller didn't already provide it. + */ + if (bif == NULL) + bif = bridge_lookup_member_if(sc, dst_ifp); + if (bif == NULL) { + /* Perhaps the interface was removed from the bridge */ + m_freem(m); + return (EINVAL); + } + /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; @@ -2164,6 +2273,18 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) mflags = m->m_flags; /* + * If VLAN filtering is enabled, and the native VLAN ID of the + * outgoing interface matches the VLAN ID of the frame, remove + * the VLAN header. + */ + if ((bif->bif_flags & IFBIF_VLANFILTER) && + bif->bif_untagged != DOT1Q_VID_NULL && + VLANTAGOF(m) == bif->bif_untagged) { + m->m_flags &= ~M_VLANTAG; + m->m_pkthdr.ether_vtag = 0; + } + + /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ @@ -2234,7 +2355,7 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - bridge_enqueue(sc, ifp, m); + bridge_enqueue(sc, ifp, m, NULL); } /* @@ -2329,7 +2450,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, } } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } if (used == 0) m_freem(m); @@ -2347,7 +2468,7 @@ sendunicast: return (0); } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, NULL); return (0); } @@ -2364,17 +2485,18 @@ bridge_transmit(struct ifnet *ifp, struct mbuf *m) struct ether_header *eh; struct ifnet *dst_if; int error = 0; + ether_vlanid_t vlan; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); + vlan = VLANTAGOF(m); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && - (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) != - NULL) { - error = bridge_enqueue(sc, dst_if, m); + (dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan)) != NULL) { + error = bridge_enqueue(sc, dst_if, m, NULL); } else bridge_broadcast(sc, ifp, m, 0); @@ -2435,18 +2557,18 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; - uint16_t vlan; uint8_t *dst; int error; + ether_vlanid_t vlan; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; + vlan = VLANTAGOF(m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); - vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) @@ -2555,6 +2677,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + goto drop; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -2566,7 +2692,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, return; } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, dbif); return; drop: @@ -2636,6 +2762,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) return (NULL); } + /* Do VLAN filtering. */ + if (!bridge_vfilter_in(bif, m)) { + if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (NULL); + } + /* bridge_vfilter_in() may add a tag */ + vlan = VLANTAGOF(m); + bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { @@ -2761,6 +2896,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ + /* Pass tagged packets to if_vlan, if it's loaded */ \ + if (VLANTAGOF(m) != 0) { \ + if (bifp->if_vlantrunk == NULL) { \ + m_freem(m); \ + return (NULL); \ + } \ + (*vlan_input_p)(bifp, m); \ + return (NULL); \ + } \ return (m); \ } \ \ @@ -2817,6 +2961,30 @@ bridge_inject(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; + if (ifp->if_type == IFT_L2VLAN) { + /* + * vlan(4) gives us the vlan ifnet, so we need to get the + * bridge softc to get a pointer to ether_input to send the + * packet to. + */ + struct ifnet *bifp = NULL; + + if (vlan_trunkdev_p == NULL) { + m_freem(m); + return; + } + + bifp = vlan_trunkdev_p(ifp); + if (bifp == NULL) { + m_freem(m); + return; + } + + sc = if_getsoftc(bifp); + sc->sc_if_input(ifp, m); + return; + } + KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0, ("%s: iface %s is not running in netmap mode", __func__, if_name(ifp))); @@ -2867,6 +3035,10 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + continue; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -2910,7 +3082,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, dbif); } if (used == 0) m_freem(m); @@ -2946,11 +3118,116 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } } /* + * Incoming VLAN filtering. Given a frame and the member interface it was + * received on, decide whether the port configuration allows it. + */ +static bool +bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) +{ + ether_vlanid_t vlan; + + vlan = VLANTAGOF(m); + /* Make sure the vlan id is reasonable. */ + if (vlan > DOT1Q_VID_MAX) + return (false); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((sbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + if (vlan == DOT1Q_VID_NULL) { + /* + * The frame doesn't have a tag. If the interface does not + * have an untagged vlan configured, drop the frame. + */ + if (sbif->bif_untagged == DOT1Q_VID_NULL) + return (false); + + /* + * Otherwise, insert a new tag based on the interface's + * untagged vlan id. + */ + m->m_pkthdr.ether_vtag = sbif->bif_untagged; + m->m_flags |= M_VLANTAG; + } else { + /* + * The frame has a tag, so check it matches the interface's + * vlan access list. We explicitly do not accept tagged + * frames for the untagged vlan id here (unless it's also + * in the access list). + */ + if (!BRVLAN_TEST(&sbif->bif_vlan_set, vlan)) + return (false); + } + + /* Accept the frame. */ + return (true); +} + +/* + * Outgoing VLAN filtering. Given a frame, its vlan, and the member interface + * we intend to send it to, decide whether the port configuration allows it to + * be sent. + */ +static bool +bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) +{ + struct ether_header *eh; + ether_vlanid_t vlan; + + NET_EPOCH_ASSERT(); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((dbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + vlan = VLANTAGOF(m); + + /* + * Always allow untagged 802.1D STP frames, even if they would + * otherwise be dropped. This is required for STP to work on + * a filtering bridge. + * + * Tagged STP (Cisco PVST+) is a non-standard extension, so + * handle those frames via the normal filtering path. + */ + eh = mtod(m, struct ether_header *); + if (vlan == DOT1Q_VID_NULL && + memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) + return (true); + + /* + * If the frame wasn't assigned to a vlan at ingress, drop it. + * We can't forward these frames to filtering ports because we + * don't know what VLAN they're supposed to be in. + */ + if (vlan == DOT1Q_VID_NULL) + return (false); + + /* + * If the frame's vlan matches the interfaces's untagged vlan, + * allow it. + */ + if (vlan == dbif->bif_untagged) + return (true); + + /* + * If the frame's vlan is on the interface's tagged access list, + * allow it. + */ + if (BRVLAN_TEST(&dbif->bif_vlan_set, vlan)) + return (true); + + /* The frame was not permitted, so drop it. */ + return (false); +} + +/* * bridge_rtupdate: * * Add a bridge routing entry. diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index 90beb6c96d82..97b63e3d4416 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -78,6 +78,8 @@ #define _NET_IF_BRIDGEVAR_H_ #include <sys/types.h> +#include <sys/_bitset.h> +#include <sys/bitset.h> #include <sys/callout.h> #include <sys/queue.h> #include <sys/condvar.h> @@ -122,6 +124,9 @@ #define BRDGSPROTO 28 /* set protocol (ifbrparam) */ #define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */ +#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */ +#define BRDGSIFVLANSET 32 /* set if vlan set */ +#define BRDGGIFVLANSET 33 /* get if vlan set */ /* * Generic bridge control request. @@ -139,6 +144,7 @@ struct ifbreq { uint32_t ifbr_addrcnt; /* member if addr number */ uint32_t ifbr_addrmax; /* member if addr max */ uint32_t ifbr_addrexceeded; /* member if addr violations */ + ether_vlanid_t ifbr_untagged; /* member if untagged vlan */ uint8_t pad[32]; }; @@ -155,10 +161,11 @@ struct ifbreq { #define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */ #define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */ #define IFBIF_PRIVATE 0x0800 /* if is a private segment */ +#define IFBIF_VLANFILTER 0x1000 /* if does vlan filtering */ #define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \ "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \ - "\011AUTOPTP" + "\011AUTOPTP\015VLANFILTER" #define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \ IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \ IFBIF_BSTP_ADMCOST) /* not saved */ @@ -304,6 +311,26 @@ struct ifbpstpconf { eaddr[5] = pv >> 0; \ } while (0) +/* + * Bridge VLAN access request. + */ +#define BRVLAN_SETSIZE 4096 +typedef __BITSET_DEFINE(ifbvlan_set, BRVLAN_SETSIZE) ifbvlan_set_t; + +#define BRVLAN_SET(set, bit) __BIT_SET(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_CLR(set, bit) __BIT_CLR(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_TEST(set, bit) __BIT_ISSET(BRVLAN_SETSIZE, (bit), set) + +#define BRDG_VLAN_OP_SET 1 /* replace current vlan set */ +#define BRDG_VLAN_OP_ADD 2 /* add vlans to current set */ +#define BRDG_VLAN_OP_DEL 3 /* remove vlans from current set */ + +struct ifbif_vlan_req { + char bv_ifname[IFNAMSIZ]; + uint8_t bv_op; + ifbvlan_set_t bv_set; +}; + #ifdef _KERNEL #define BRIDGE_INPUT(_ifp, _m) do { \ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index e9e1c82cb688..22fcb7bf7c64 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1673,6 +1673,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && + p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index f0b09445d04b..695bb81f77b3 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -126,13 +126,6 @@ struct vlanreq { #define VLAN_PCP_MAX 7 -#define DOT1Q_VID_NULL 0x0 -#define DOT1Q_VID_DEF_PVID 0x1 -#define DOT1Q_VID_DEF_SR_PVID 0x2 -#define DOT1Q_VID_RSVD_IMPL 0xfff -#define DOT1Q_VID_MIN 1 /* minimum valid vlan id */ -#define DOT1Q_VID_MAX 4094 /* maximum valid vlan id */ - /* * 802.1q full tag. Proto and vid are stored in host byte order. */ diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 71cb1862aabf..1416f0c2cdbe 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -508,18 +508,6 @@ extern struct sx pf_end_lock; (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv6 */ @@ -544,18 +532,6 @@ extern struct sx pf_end_lock; !(a)->addr32[2] && \ !(a)->addr32[3] ) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv4 */ @@ -570,29 +546,11 @@ extern struct sx pf_end_lock; #define PF_AZERO(a, c) \ (!(a)->addr32[0]) -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - (a)->v4.s_addr = (b)->v4.s_addr - -#define PF_AINC(a, f) \ - do { \ - (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ - } while (0) - -#define PF_POOLMASK(a, b, c, d, f) \ - do { \ - (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ - (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ - } while (0) - #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ #ifdef _KERNEL -#ifdef INET6 static void inline pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) { @@ -602,12 +560,13 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) memcpy(&dst->v4, &src->v4, sizeof(dst->v4)); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: memcpy(&dst->v6, &src->v6, sizeof(dst->v6)); break; +#endif /* INET6 */ } } -#endif /* INET6 */ #endif /* @@ -629,7 +588,7 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ + !pf_match_addr(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) @@ -2477,11 +2436,11 @@ int pf_test(sa_family_t, int, int, struct ifnet *, struct mbuf **, struct inpcb int pf_normalize_ip(u_short *, struct pf_pdesc *); #endif /* INET */ -#ifdef INET6 -int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, sa_family_t); void pf_addr_inc(struct pf_addr *, sa_family_t); +#ifdef INET6 +int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); int pf_max_frag_size(struct mbuf *); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, struct ifnet *, bool); @@ -2674,11 +2633,10 @@ int pf_kanchor_copyout(const struct pf_kruleset *, const struct pf_krule *, char *, size_t); int pf_kanchor_nvcopyout(const struct pf_kruleset *, const struct pf_krule *, nvlist_t *); -void pf_kanchor_remove(struct pf_krule *); +void pf_remove_kanchor(struct pf_krule *); void pf_remove_if_empty_kruleset(struct pf_kruleset *); struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); -struct pf_kanchor *pf_create_kanchor(struct pf_kanchor *, const char *); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index accc811a12ba..127b29320acb 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -682,7 +682,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) 0); break; case AF_INET6: - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], + pd->af); break; default: unhandled_af(pd->af); @@ -696,7 +697,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) 0); break; case AF_INET6: - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); break; default: unhandled_af(pd->af); @@ -1084,9 +1086,9 @@ pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX], (*sn)->af = af; (*sn)->rule = r_track; - PF_ACPY(&(*sn)->addr, src, af); + pf_addrcpy(&(*sn)->addr, src, af); if (raddr != NULL) - PF_ACPY(&(*sn)->raddr, raddr, af); + pf_addrcpy(&(*sn)->raddr, raddr, af); (*sn)->rkif = rkif; LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); (*sn)->creation = time_uptime; @@ -1687,9 +1689,9 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, copy: #endif /* INET6 */ if (saddr) - PF_ACPY(&key->addr[pd->sidx], saddr, pd->af); + pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af); if (daddr) - PF_ACPY(&key->addr[pd->didx], daddr, pd->af); + pf_addrcpy(&key->addr[pd->didx], daddr, pd->af); return (0); } @@ -1734,13 +1736,17 @@ pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport, bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0])); bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1])); if (pd->dir == PF_IN) { - PF_ACPY(&(*nk)->addr[pd->didx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->didx] = pd->nsport; (*nk)->port[pd->sidx] = pd->ndport; } else { - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->didx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->sidx] = pd->nsport; (*nk)->port[pd->didx] = pd->ndport; } @@ -2053,11 +2059,11 @@ pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_por mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO); if (mapping == NULL) return (NULL); - PF_ACPY(&mapping->endpoints[0].addr, src_addr, af); + pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af); mapping->endpoints[0].port = src_port; mapping->endpoints[0].af = af; mapping->endpoints[0].mapping = mapping; - PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af); + pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af); mapping->endpoints[1].port = nat_port; mapping->endpoints[1].af = af; mapping->endpoints[1].mapping = mapping; @@ -3295,9 +3301,9 @@ pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p, MPASS(pd->ip_sum); } - PF_ACPY(&ao, a, pd->af); + pf_addrcpy(&ao, a, pd->af); if (pd->af == pd->naf) - PF_ACPY(a, an, pd->af); + pf_addrcpy(a, an, pd->af); if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) *pd->pcksum = ~*pd->pcksum; @@ -3426,8 +3432,8 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; - PF_ACPY(&ao, a, AF_INET6); - PF_ACPY(a, an, AF_INET6); + pf_addrcpy(&ao, a, AF_INET6); + pf_addrcpy(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( @@ -3450,9 +3456,9 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, { struct pf_addr oia, ooa; - PF_ACPY(&oia, ia, af); + pf_addrcpy(&oia, ia, af); if (oa) - PF_ACPY(&ooa, oa, af); + pf_addrcpy(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { @@ -3469,7 +3475,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ - PF_ACPY(ia, na, af); + pf_addrcpy(ia, na, af); switch (af) { #ifdef INET case AF_INET: { @@ -3503,7 +3509,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, } /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ if (oa) { - PF_ACPY(oa, na, af); + pf_addrcpy(oa, na, af); switch (af) { #ifdef INET case AF_INET: @@ -4299,8 +4305,8 @@ pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { - PF_ACPY(pd->src, &pd->osrc, pd->af); - PF_ACPY(pd->dst, &pd->odst, pd->af); + pf_addrcpy(pd->src, &pd->osrc, pd->af); + pf_addrcpy(pd->dst, &pd->odst, pd->af); if (pd->sport) *pd->sport = pd->osport; if (pd->dport) @@ -4676,10 +4682,11 @@ pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) } else { rv = pf_match_rule(ctx, &r->anchor->ruleset); /* - * Unless there was an error inside the anchor, - * retain its quick state. + * Unless errors occured, stop iff any rule matched + * within quick anchors. */ - if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK) + if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && + *ctx->am == r) rv = PF_TEST_QUICK; } @@ -4790,7 +4797,6 @@ pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, return (quick); } -#ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) @@ -4802,6 +4808,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); @@ -4812,6 +4819,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; +#endif /* INET6 */ } } @@ -4824,6 +4832,7 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; @@ -4843,9 +4852,9 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; +#endif /* INET6 */ } } -#endif /* INET6 */ void pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) @@ -5744,8 +5753,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.reason = *reason; SLIST_INIT(&ctx.rules); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); if (inp != NULL) { INP_LOCK_ASSERT(inp); @@ -6363,7 +6372,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) &nk->addr[pd->sidx], nk->port[pd->sidx]); pd->sport = &th->th_sport; pd->nsport = th->th_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6372,7 +6381,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) &nk->addr[pd->didx], nk->port[pd->didx]); pd->dport = &th->th_dport; pd->ndport = th->th_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6385,7 +6394,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->sidx]); pd->sport = &pd->hdr.udp.uh_sport; pd->nsport = pd->hdr.udp.uh_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6396,7 +6405,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->didx]); pd->dport = &pd->hdr.udp.uh_dport; pd->ndport = pd->hdr.udp.uh_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6409,7 +6418,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->sidx]); pd->sport = &pd->hdr.sctp.src_port; pd->nsport = pd->hdr.sctp.src_port; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != pd->ndport) { @@ -6419,7 +6428,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->didx]); pd->dport = &pd->hdr.sctp.dest_port; pd->ndport = pd->hdr.sctp.dest_port; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; } @@ -6428,13 +6437,13 @@ pf_translate_compat(struct pf_test_ctx *ctx) if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) { pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) { pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } if (ctx->virtual_type == htons(ICMP_ECHO) && @@ -6453,13 +6462,13 @@ pf_translate_compat(struct pf_test_ctx *ctx) if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->sidx], 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->didx], 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6473,7 +6482,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, @@ -6481,7 +6490,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; #endif /* INET */ @@ -6489,14 +6498,17 @@ pf_translate_compat(struct pf_test_ctx *ctx) case AF_INET6: if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { - PF_ACPY(&pd->nsaddr, &nk->addr[pd->sidx], pd->af); - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx], + pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { - PF_ACPY(&pd->ndaddr, &nk->addr[pd->didx], pd->af); - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx], + pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); } break; #endif /* INET6 */ @@ -7015,8 +7027,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->virtual_proto; - PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); - PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); + pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); + pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); key.port[pd->sidx] = pd->osport; key.port[pd->didx] = pd->odport; @@ -7207,8 +7219,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); pd->naf = nk->af; action = PF_AFRT; } @@ -7502,13 +7514,13 @@ again: key.af = j->pd.af; key.proto = IPPROTO_SCTP; if (j->pd.dir == PF_IN) { /* wire side, straight */ - PF_ACPY(&key.addr[0], j->pd.src, key.af); - PF_ACPY(&key.addr[1], j->pd.dst, key.af); + pf_addrcpy(&key.addr[0], j->pd.src, key.af); + pf_addrcpy(&key.addr[1], j->pd.dst, key.af); key.port[0] = j->pd.hdr.sctp.src_port; key.port[1] = j->pd.hdr.sctp.dest_port; } else { /* stack side, reverse */ - PF_ACPY(&key.addr[1], j->pd.src, key.af); - PF_ACPY(&key.addr[0], j->pd.dst, key.af); + pf_addrcpy(&key.addr[1], j->pd.src, key.af); + pf_addrcpy(&key.addr[0], j->pd.dst, key.af); key.port[1] = j->pd.hdr.sctp.src_port; key.port[0] = j->pd.hdr.sctp.dest_port; } @@ -7904,8 +7916,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, #endif /* INET6 */ } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], + nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], + nk->af); pd->naf = nk->af; return (PF_AFRT); } @@ -8037,8 +8051,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_TCP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th->th_sport; key.port[pd2.didx] = th->th_dport; @@ -8141,9 +8155,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, &nk->addr[pd2.sidx], - nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->nsaddr, + &nk->addr[pd2.sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8232,8 +8246,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_UDP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh->uh_sport; key.port[pd2.didx] = uh->uh_dport; @@ -8276,9 +8290,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8364,8 +8378,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_SCTP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = sh->src_port; key.port[pd2.didx] = sh->dest_port; @@ -8431,9 +8445,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, sh->src_port = nk->port[sidx]; sh->dest_port = nk->port[didx]; m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8574,9 +8588,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, iih->icmp_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, ICMP_MINLEN, (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); /* * IPv4 becomes IPv6 so we must copy @@ -8702,9 +8716,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, iih->icmp6_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, sizeof(struct icmp6_hdr), (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); pd->naf = nk->af; return (PF_AFRT); @@ -8746,8 +8760,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = pd2.proto; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; action = pf_find_state(&pd2, &key, state); @@ -9283,7 +9297,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); - PF_ACPY((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, AF_INET6); + pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, + AF_INET6); if (pd->dir == PF_IN) { if (ip6->ip6_hlim <= IPV6_HLIMDEC) { @@ -10083,8 +10098,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, pd->src = (struct pf_addr *)&h->ip_src; pd->dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = &h->ip_sum; pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; pd->ttl = h->ip_ttl; @@ -10121,8 +10136,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, h = mtod(pd->m, struct ip6_hdr *); pd->src = (struct pf_addr *)&h->ip6_src; pd->dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = NULL; pd->tos = IPV6_DSCP(h); pd->ttl = h->ip6_hlim; diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c index 389b74d09d37..e2200c15c704 100644 --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -522,7 +522,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, + return (pf_match_addr(0, &dyn->pfid_addr4, &dyn->pfid_mask4, a, AF_INET)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); @@ -535,7 +535,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, + return (pf_match_addr(0, &dyn->pfid_addr6, &dyn->pfid_mask6, a, AF_INET6)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index 05a7e1311ad8..357b2be194a5 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -615,7 +615,7 @@ pf_free_rule(struct pf_krule *rule) pfi_kkif_unref(rule->kif); if (rule->rcv_kif) pfi_kkif_unref(rule->rcv_kif); - pf_kanchor_remove(rule); + pf_remove_kanchor(rule); pf_empty_kpool(&rule->rdr.list); pf_empty_kpool(&rule->nat.list); pf_empty_kpool(&rule->route.list); @@ -2350,15 +2350,17 @@ relock_DIOCKILLSTATES: if (psk->psk_proto && psk->psk_proto != sk->proto) continue; - if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, + if (! pf_match_addr(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, + if (! pf_match_addr(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_rt_addr.neg, + if (! pf_match_addr(psk->psk_rt_addr.neg, &psk->psk_rt_addr.addr.v.a.addr, &psk->psk_rt_addr.addr.v.a.mask, &s->act.rt_addr, sk->af)) @@ -2398,10 +2400,10 @@ relock_DIOCKILLSTATES: match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } @@ -2738,7 +2740,7 @@ pf_ioctl_get_rulesets(struct pfioc_ruleset *pr) return (ENOENT); } pr->nr = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL) @@ -2770,7 +2772,7 @@ pf_ioctl_get_ruleset(struct pfioc_ruleset *pr) } pr->name[0] = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { @@ -4152,9 +4154,9 @@ DIOCGETSTATESV2_full: bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; - PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; - PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); @@ -4166,9 +4168,11 @@ DIOCGETSTATESV2_full: error = E2BIG; /* more than one state */ } else { sk = state->key[sidx]; - PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pf_addrcpy(&pnl->rsaddr, + &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; - PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pf_addrcpy(&pnl->rdaddr, + &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; PF_STATE_UNLOCK(state); } @@ -4606,7 +4610,7 @@ DIOCGETSTATESV2_full: } pool->cur = TAILQ_FIRST(&pool->list); - PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); + pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; @@ -6024,11 +6028,11 @@ pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (psnk == NULL || - (PF_MATCHA(psnk->psnk_src.neg, + (pf_match_addr(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, + pf_match_addr(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af))) { @@ -6132,10 +6136,10 @@ relock_DIOCCLRSTATES: match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 5e7865e4fac5..308d76c46e5b 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -319,12 +319,14 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, bzero(&udp_source, sizeof(udp_source)); udp_source.af = pd->af; - PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af); + pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { - PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); + pf_addrcpy(naddr, + &(*udp_mapping)->endpoints[1].addr, + pd->af); *nport = (*udp_mapping)->endpoints[1].port; /* Try to find a src_node as per pf_map_addr(). */ if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && @@ -369,12 +371,13 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, key.proto = pd->proto; do { - PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af); - PF_ACPY(&key.addr[sidx], naddr, key.af); + pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); + pf_addrcpy(&key.addr[sidx], naddr, key.af); key.port[didx] = pd->ndport; if (udp_mapping && *udp_mapping) - PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); + pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, + pd->af); /* * port search; start random, step; @@ -591,10 +594,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); + pf_addrcpy(naddr, raddr, af); break; case PF_POOL_BITMASK: - PF_POOLMASK(naddr, raddr, rmask, saddr, af); + pf_poolmask(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: if (rpool->cur->addr.type == PF_ADDR_TABLE) { @@ -609,7 +612,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -623,7 +626,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { switch (af) { #ifdef INET @@ -654,12 +657,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, break; #endif /* INET6 */ } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + pf_addr_inc(&rpool->counter, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); } break; case PF_POOL_SRCHASH: @@ -680,7 +683,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -694,9 +697,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else { - PF_POOLMASK(naddr, raddr, rmask, + pf_poolmask(naddr, raddr, rmask, (struct pf_addr *)&hash, af); } break; @@ -743,14 +746,14 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); + pf_addrcpy(&rpool->counter, raddr, af); } get_addr: - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); + pf_addr_inc(&rpool->counter, af); break; } } @@ -798,7 +801,7 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done; } - PF_ACPY(naddr, &(*sn)->raddr, af); + pf_addrcpy(naddr, &(*sn)->raddr, af); if (nkif) *nkif = (*sn)->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { @@ -948,7 +951,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr4, &rpool->cur->addr.p.dyn->pfid_mask4, &pd->nsaddr, AF_INET); @@ -961,7 +964,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr6, &rpool->cur->addr.p.dyn->pfid_mask6, &pd->nsaddr, AF_INET6); @@ -969,7 +972,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, #endif /* INET6 */ } } else - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.v.a.addr, &rpool->cur->addr.v.a.mask, &pd->nsaddr, pd->af); @@ -983,7 +986,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr4, &r->src.addr.p.dyn->pfid_mask4, &pd->ndaddr, AF_INET); @@ -995,7 +998,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr6, &r->src.addr.p.dyn->pfid_mask6, &pd->ndaddr, AF_INET6); @@ -1003,7 +1006,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, #endif /* INET6 */ } } else - PF_POOLMASK(naddr, &r->src.addr.v.a.addr, + pf_poolmask(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); break; } @@ -1018,7 +1021,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, &rpool->cur->addr.v.a.mask, + pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, &pd->ndaddr, pd->af); /* Do not change SCTP ports. */ @@ -1056,9 +1059,9 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, key.af = pd->af; key.proto = pd->proto; key.port[0] = pd->nsport; - PF_ACPY(&key.addr[0], &pd->nsaddr, key.af); + pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); key.port[1] = nport; - PF_ACPY(&key.addr[1], naddr, key.af); + pf_addrcpy(&key.addr[1], naddr, key.af); if (!pf_find_state_all_exists(&key, PF_OUT)) break; @@ -1220,8 +1223,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) } } - PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf); - PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf); + pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); + pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: af-to %s done, prefixlen %d, ", diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 381e966eacf1..d5d6dc70255e 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -1308,9 +1308,9 @@ pf_handle_natlook(struct nlmsghdr *hdr, struct nl_pstate *npt) key.af = attrs.af; key.proto = attrs.proto; - PF_ACPY(&key.addr[sidx], &attrs.src, attrs.af); + pf_addrcpy(&key.addr[sidx], &attrs.src, attrs.af); key.port[sidx] = attrs.sport; - PF_ACPY(&key.addr[didx], &attrs.dst, attrs.af); + pf_addrcpy(&key.addr[didx], &attrs.dst, attrs.af); key.port[didx] = attrs.dport; state = pf_find_state_all(&key, attrs.direction, &m); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 865c5ecd72d9..2e5165a9900c 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -232,7 +232,7 @@ pf_get_leaf_kruleset(char *path, char **path_remainder) return (ruleset); } -struct pf_kanchor * +static struct pf_kanchor * pf_create_kanchor(struct pf_kanchor *parent, const char *aname) { struct pf_kanchor *anchor, *dup; @@ -259,8 +259,8 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) if ((dup = RB_INSERT(pf_kanchor_global, &V_pf_anchors, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", + printf("%s: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", __func__, anchor->path, anchor->name, dup->path, dup->name); rs_free(anchor); return (NULL); @@ -270,10 +270,10 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) anchor->parent = parent; if ((dup = RB_INSERT(pf_kanchor_node, &parent->children, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " + printf("%s: " "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); + "'%s' '%s'\n", __func__, anchor->path, + anchor->name, dup->path, dup->name); RB_REMOVE(pf_kanchor_global, &V_pf_anchors, anchor); rs_free(anchor); @@ -339,7 +339,7 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset) int i; while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + if (ruleset == &pf_main_ruleset || !RB_EMPTY(&ruleset->anchor->children) || ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || ruleset->topen) @@ -407,7 +407,7 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s, } ruleset = pf_find_or_create_kruleset(path); rs_free(path); - if (ruleset == NULL || ruleset->anchor == NULL) { + if (ruleset == NULL || ruleset == &pf_main_ruleset) { DPFPRINTF("%s: ruleset\n", __func__); return (1); } @@ -432,7 +432,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r, char a[MAXPATHLEN]; char *p; int i; - if (rs->anchor == NULL) + if (rs == &pf_main_ruleset) a[0] = 0; else strlcpy(a, rs->anchor->path, MAXPATHLEN); @@ -444,7 +444,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r, anchor_call_len); } if (strncmp(a, r->anchor->path, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, + printf("%s: '%s' '%s'\n", __func__, a, r->anchor->path); return (1); } @@ -525,16 +525,13 @@ done: } void -pf_kanchor_remove(struct pf_krule *r) +pf_remove_kanchor(struct pf_krule *r) { if (r->anchor == NULL) return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount\n"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) + if (r->anchor->refcnt <= 0) + printf("%s: broken refcount\n", __func__); + else if (!--r->anchor->refcnt) pf_remove_if_empty_kruleset(&r->anchor->ruleset); r->anchor = NULL; } diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c index d5874df3df66..43e4366845a2 100644 --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -704,7 +704,7 @@ pfr_validate_addr(struct pfr_addr *ad) return (-1); if (ad->pfra_not && ad->pfra_not != 1) return (-1); - if (ad->pfra_fback) + if (ad->pfra_fback != PFR_FB_NONE) return (-1); return (0); } @@ -2340,16 +2340,16 @@ _next_block: if (use_counter && !PF_AZERO(counter, af)) { /* is supplied address within block? */ - if (!PF_MATCHA(0, &cur, &mask, counter, af)) { + if (!pf_match_addr(0, &cur, &mask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; goto _next_block; } - PF_ACPY(addr, counter, af); + pf_addrcpy(addr, counter, af); } else { /* use first address of block */ - PF_ACPY(addr, &cur, af); + pf_addrcpy(addr, &cur, af); } if (!KENTRY_NETWORK(ke)) { @@ -2358,7 +2358,7 @@ _next_block: idx++; goto _next_block; } - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2382,7 +2382,7 @@ _next_block: /* lookup return the same block - perfect */ if (filter && filter(af, addr)) goto _next_entry; - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2392,9 +2392,9 @@ _next_entry: /* we need to increase the counter past the nested block */ pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net); pfr_sockaddr_to_pf_addr(&umask, &umask_addr); - PF_POOLMASK(addr, addr, &umask_addr, &pfr_ffaddr, af); - PF_AINC(addr, af); - if (!PF_MATCHA(0, &cur, &mask, addr, af)) { + pf_poolmask(addr, addr, &umask_addr, &pfr_ffaddr, af); + pf_addr_inc(addr, af); + if (!pf_match_addr(0, &cur, &mask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c index 0f333feb747f..cb96d768adef 100644 --- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c +++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c @@ -226,14 +226,14 @@ mpc85xx_gpio_attach(device_t dev) return (ENOMEM); } + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { mpc85xx_gpio_detach(dev); return (ENOMEM); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - return (0); } diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner index 423a89c10c78..73fa9660e2d2 100644 --- a/sys/riscv/allwinner/files.allwinner +++ b/sys/riscv/allwinner/files.allwinner @@ -1,5 +1,6 @@ arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt +arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_syscon.c optional syscon arm/allwinner/aw_sid.c optional aw_sid nvmem arm/allwinner/aw_timer.c optional aw_timer fdt diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner index 1bf6b027a4cb..2b1e0d4e09dc 100644 --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -7,6 +7,7 @@ options SOC_ALLWINNER_D1 device aw_ccu # Allwinner clock controller device aw_gpio # Allwinner GPIO controller +device aw_rtc # Allwinner Real-time Clock device aw_sid # Allwinner Secure ID EFUSE device aw_timer # Allwinner Timer device aw_usbphy # Allwinner USB PHY diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h index 48c75afc62a0..6a5a17eda5ee 100644 --- a/sys/sys/caprights.h +++ b/sys/sys/caprights.h @@ -79,6 +79,8 @@ extern const cap_rights_t cap_futimes_rights; extern const cap_rights_t cap_getpeername_rights; extern const cap_rights_t cap_getsockopt_rights; extern const cap_rights_t cap_getsockname_rights; +extern const cap_rights_t cap_inotify_add_rights; +extern const cap_rights_t cap_inotify_rm_rights; extern const cap_rights_t cap_ioctl_rights; extern const cap_rights_t cap_linkat_source_rights; extern const cap_rights_t cap_linkat_target_rights; diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h index d493535454e9..3847c4c73e75 100644 --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -279,11 +279,15 @@ #define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE) +/* Allows operations on inotify descriptors. */ +#define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL) + /* All used bits for index 1. */ -#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL) +#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL) /* Available bits for index 1. */ -#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index d770c274d7b7..cab94ac511a5 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -16,6 +16,8 @@ #define EXTERR_KTRACE 3 /* To allow inclusion of this file into kern_ktrace.c */ #define EXTERR_CAT_FUSE 4 +#define EXTERR_CAT_INOTIFY 5 +#define EXTERR_CAT_GENIO 6 #endif diff --git a/sys/sys/file.h b/sys/sys/file.h index 284d523147b6..63313926c4f0 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -71,6 +71,7 @@ struct nameidata; #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ +#define DTYPE_INOTIFY 15 /* inotify descriptor */ #ifdef _KERNEL diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h new file mode 100644 index 000000000000..65dc5dba43f3 --- /dev/null +++ b/sys/sys/inotify.h @@ -0,0 +1,150 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#ifndef _INOTIFY_H_ +#define _INOTIFY_H_ + +#include <sys/_types.h> + +/* Flags for inotify_init1(). */ +#define IN_NONBLOCK 0x00000004 /* O_NONBLOCK */ +#define IN_CLOEXEC 0x00100000 /* O_CLOEXEC */ + +struct inotify_event { + int wd; + __uint32_t mask; + __uint32_t cookie; + __uint32_t len; + char name[0]; +}; + +/* Events, set in the mask field. */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 +#define IN_ALL_EVENTS 0x00000fff + +/* Events report only for entries in a watched dir, not the dir itself. */ +#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \ + IN_MOVED_FROM | IN_MOVED_TO) + +#ifdef _KERNEL +/* + * An unlink that's done as part of a rename only records IN_DELETE if the + * unlinked vnode itself is watched, and not when the containing directory is + * watched. + */ +#define _IN_MOVE_DELETE 0x40000000 +/* + * Inode link count changes only trigger IN_ATTRIB events if the inode itself is + * watched, and not when the containing directory is watched. + */ +#define _IN_ATTRIB_LINKCOUNT 0x80000000 +#endif + +/* Flags, set in the mask field. */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_CREATE 0x10000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ONESHOT 0x80000000 +#define _IN_ALL_FLAGS (IN_ONLYDIR | IN_DONT_FOLLOW | \ + IN_EXCL_UNLINK | IN_MASK_CREATE | \ + IN_MASK_ADD | IN_ONESHOT) + +/* Flags returned by the kernel. */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 +#define IN_ISDIR 0x40000000 +#define _IN_ALL_RETFLAGS (IN_Q_OVERFLOW | IN_UNMOUNT | IN_IGNORED | \ + IN_ISDIR) + +#define _IN_ALIGN _Alignof(struct inotify_event) +#define _IN_NAMESIZE(namelen) \ + ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN)) + +#ifdef _KERNEL +struct componentname; +struct file; +struct inotify_softc; +struct thread; +struct vnode; + +int inotify_create_file(struct thread *, struct file *, int, int *); +void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t); + +int kern_inotify_rm_watch(int, uint32_t, struct thread *); +int kern_inotify_add_watch(int, int, const char *, uint32_t, + struct thread *); + +void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int, + uint32_t); +int vn_inotify_add_watch(struct vnode *, struct inotify_softc *, + __uint32_t, __uint32_t *, struct thread *); +void vn_inotify_revoke(struct vnode *); + +/* Log an inotify event. */ +#define INOTIFY(vp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \ + VIRF_INOTIFY_PARENT)) != 0)) \ + VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \ +} while (0) + +/* Log an inotify event using a specific name for the vnode. */ +#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \ + VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \ +} while (0) + +extern __uint32_t inotify_rename_cookie; + +#define INOTIFY_MOVE(vp, fdvp, fcnp, tvp, tdvp, tcnp) do { \ + if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \ + __uint32_t cookie; \ + \ + cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \ + VOP_INOTIFY((vp), (fdvp), (fcnp), IN_MOVED_FROM, cookie); \ + VOP_INOTIFY((vp), (tdvp), (tcnp), IN_MOVED_TO, cookie); \ + } \ + if ((tvp) != NULL) \ + INOTIFY_NAME((tvp), (tdvp), (tcnp), _IN_MOVE_DELETE); \ +} while (0) + +#define INOTIFY_REVOKE(vp) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) \ + vn_inotify_revoke((vp)); \ +} while (0) + +#else +#include <sys/cdefs.h> + +__BEGIN_DECLS +int inotify_init(void); +int inotify_init1(int flags); +int inotify_add_watch(int fd, const char *pathname, __uint32_t mask); +int inotify_add_watch_at(int fd, int dfd, const char *pathname, + __uint32_t mask); +int inotify_rm_watch(int fd, int wd); +__END_DECLS +#endif /* !_KERNEL */ + +#endif /* !_INOTIFY_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index a6f858e02395..f6480b173a5c 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -267,6 +267,7 @@ struct mount { int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */ struct lock mnt_explock; /* vfs_export walkers lock */ + struct lock mnt_renamelock; /* renames and O_RESOLVE_BENEATH */ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 5c245235ace5..6008d83f729d 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -108,7 +108,12 @@ struct nameidata { * through the VOP interface. */ struct componentname ni_cnd; + + /* Serving RBENEATH. */ struct nameicap_tracker_head ni_cap_tracker; + struct vnode *ni_rbeneath_dpp; + struct mount *ni_nctrack_mnt; + /* * Private helper data for UFS, must be at the end. See * NDINIT_PREFILL(). @@ -235,6 +240,10 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, panic("namei data not inited"); \ if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \ panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \ + if ((arg)->ni_nctrack_mnt != NULL) \ + panic("NDREINIT on namei data with leaked ni_nctrack_mnt"); \ + if (!TAILQ_EMPTY(&(arg)->ni_cap_tracker)) \ + panic("NDREINIT on namei data with leaked ni_cap_tracker"); \ (arg)->ni_debugflags = NAMEI_DBG_INITED; \ } #else @@ -259,6 +268,9 @@ do { \ _ndp->ni_resflags = 0; \ filecaps_init(&_ndp->ni_filecaps); \ _ndp->ni_rightsneeded = _rightsp; \ + _ndp->ni_rbeneath_dpp = NULL; \ + _ndp->ni_nctrack_mnt = NULL; \ + TAILQ_INIT(&_ndp->ni_cap_tracker); \ } while (0) #define NDREINIT(ndp) do { \ diff --git a/sys/sys/param.h b/sys/sys/param.h index 57eb8ebcf12c..af116d6e3f7a 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500050 +#define __FreeBSD_version 1500051 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index b15dace8cfa0..61411890c85b 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -122,6 +122,8 @@ struct uidinfo { long ui_kqcnt; /* (b) number of kqueues */ long ui_umtxcnt; /* (b) number of shared umtxs */ long ui_pipecnt; /* (b) consumption of pipe buffers */ + long ui_inotifycnt; /* (b) number of inotify descriptors */ + long ui_inotifywatchcnt; /* (b) number of inotify watches */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ #ifdef RACCT @@ -144,6 +146,8 @@ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max); +int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval); +int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval); int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp); struct plimit diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h index dc4d88ce689f..0b79c841d149 100644 --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -30,6 +30,7 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_INOTIFY = 2, }; struct specialfd_eventfd { @@ -37,4 +38,8 @@ struct specialfd_eventfd { int flags; }; +struct specialfd_inotify { + int flags; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 68406a2dfc29..eec923d0b82e 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -529,4 +529,6 @@ #define SYS_fchroot 590 #define SYS_setcred 591 #define SYS_exterrctl 592 -#define SYS_MAXSYSCALL 593 +#define SYS_inotify_add_watch_at 593 +#define SYS_inotify_rm_watch 594 +#define SYS_MAXSYSCALL 595 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 9a90a63f35a3..547242a73277 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -434,4 +434,6 @@ MIASM = \ getrlimitusage.o \ fchroot.o \ setcred.o \ - exterrctl.o + exterrctl.o \ + inotify_add_watch_at.o \ + inotify_rm_watch.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 94da81c84d25..94b5a0a7a95e 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1891,6 +1891,16 @@ struct exterrctl_args { char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)]; char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)]; }; +struct inotify_add_watch_at_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; +}; +struct inotify_rm_watch_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2293,6 +2303,8 @@ int sys_getrlimitusage(struct thread *, struct getrlimitusage_args *); int sys_fchroot(struct thread *, struct fchroot_args *); int sys_setcred(struct thread *, struct setcred_args *); int sys_exterrctl(struct thread *, struct exterrctl_args *); +int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *); +int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); #ifdef COMPAT_43 @@ -3275,6 +3287,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #define SYS_AUE_fchroot AUE_NULL #define SYS_AUE_setcred AUE_SETCRED #define SYS_AUE_exterrctl AUE_NULL +#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY +#define SYS_AUE_inotify_rm_watch AUE_INOTIFY #undef PAD_ #undef PADL_ diff --git a/sys/sys/user.h b/sys/sys/user.h index f94a91ca1238..103236b6ed1b 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ struct user { #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 +#define KF_TYPE_INOTIFY 15 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -456,6 +457,10 @@ struct kinfo_file { int32_t kf_kqueue_count; int32_t kf_kqueue_state; } kf_kqueue; + struct { + uint64_t kf_inotify_npending; + uint64_t kf_inotify_nbpending; + } kf_inotify; } kf_un; }; uint16_t kf_status; /* Status flags. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index bed20f607339..3ed469bdce6d 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -86,11 +86,13 @@ enum vgetstate { * it from v_data. If non-null, this area is freed in getnewvnode(). */ -struct namecache; struct cache_fpl; +struct inotify_watch; +struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ + TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ @@ -248,6 +250,9 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ #define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ #define VIRF_NAMEDATTR 0x0040 /* Named attribute */ +#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */ +#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being + watched */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ @@ -667,6 +672,7 @@ char *cache_symlink_alloc(size_t size, int flags); void cache_symlink_free(char *string, size_t size); int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len); +void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); @@ -869,8 +875,10 @@ int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); -int vop_stdioctl(struct vop_ioctl_args *); int vop_stdneed_inactive(struct vop_need_inactive_args *); +int vop_stdinotify(struct vop_inotify_args *); +int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *); +int vop_stdioctl(struct vop_ioctl_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdunlock(struct vop_unlock_args *); @@ -910,9 +918,12 @@ int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ +void vop_allocate_post(void *a, int rc); +void vop_copy_file_range_post(void *ap, int rc); void vop_close_post(void *a, int rc); void vop_create_pre(void *a); void vop_create_post(void *a, int rc); +void vop_deallocate_post(void *a, int rc); void vop_whiteout_pre(void *a); void vop_whiteout_post(void *a, int rc); void vop_deleteextattr_pre(void *a); @@ -1020,9 +1031,12 @@ void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ - if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ - VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ - | (noffset > osize ? NOTE_EXTEND : 0)); \ + if (noffset > ooffset) { \ + if (VN_KNLIST_EMPTY((ap)->a_vp)) { \ + VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ + (noffset > osize ? NOTE_EXTEND : 0)); \ + } \ + INOTIFY((ap)->a_vp, IN_MODIFY); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index d23c2af9bd9a..e829105197cc 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -193,6 +193,7 @@ if (cfile) { printc(common_head \ "#include <sys/param.h>\n" \ "#include <sys/event.h>\n" \ + "#include <sys/inotify.h>\n" \ "#include <sys/kernel.h>\n" \ "#include <sys/mount.h>\n" \ "#include <sys/sdt.h>\n" \ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 891e490a7031..75f5fe716c31 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1012,7 +1012,6 @@ ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td) else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); - sx_init(&ump->um_checkpath_lock, "uchpth"); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; @@ -1182,7 +1181,6 @@ out: } if (ump != NULL) { mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; @@ -1306,7 +1304,6 @@ ffs_unmount(struct mount *mp, int mntflags) vrele(ump->um_odevvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index eaf37c58756b..3f9c95e934fc 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1412,7 +1412,6 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; - sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED); if (target->i_number == source_ino) return (EEXIST); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 9aea01e70951..74cb094bdfe4 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1273,9 +1273,9 @@ ufs_rename( struct mount *mp; ino_t ino; seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; - bool checkpath_locked, want_seqc_end; + bool want_seqc_end; - checkpath_locked = want_seqc_end = false; + want_seqc_end = false; endoff = 0; mp = tdvp->v_mount; @@ -1427,10 +1427,6 @@ relock: } vfs_ref(mp); MPASS(!want_seqc_end); - if (checkpath_locked) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; - } VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1484,8 +1480,6 @@ relock: if (error) goto unlockout; - sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = true; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* @@ -1493,8 +1487,6 @@ relock: * everything else and VGET before restarting. */ if (ino) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); @@ -1574,9 +1566,6 @@ relock: vn_seqc_write_end(fdvp); want_seqc_end = false; vfs_ref(mp); - MPASS(checkpath_locked); - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1763,9 +1752,6 @@ unlockout: vn_seqc_write_end(fdvp); } - if (checkpath_locked) - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - vput(fdvp); vput(fvp); diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 5c7fa11dae6a..d33b01e4425e 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -97,8 +97,6 @@ struct ufsmount { uint64_t um_maxsymlinklen; /* (c) max size of short symlink */ struct mtx um_lock; /* (c) Protects ufsmount & fs */ - struct sx um_checkpath_lock; /* (c) Protects ufs_checkpath() - result */ struct mount_softdeps *um_softdep; /* (c) softdep mgmt structure */ struct vnode *um_quotas[MAXQUOTAS]; /* (q) pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* (q) quota file access cred */ diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c index ae1d23e811e7..221f5dbf5ba3 100644 --- a/sys/x86/linux/linux_dummy_x86.c +++ b/sys/x86/linux/linux_dummy_x86.c @@ -46,7 +46,5 @@ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); DUMMY(sysfs); DUMMY(quotactl); -/* Linux 2.6.13: */ -DUMMY(inotify_init); /* Linux 2.6.22: */ DUMMY(signalfd); |