diff options
Diffstat (limited to 'sys/kern')
50 files changed, 838 insertions, 760 deletions
| diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 1bc2491a1a12..779158b41221 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -92,7 +92,7 @@  #define	ELF_ABI_ID	__CONCAT(elf, __ELF_WORD_SIZE)  static int __elfN(check_header)(const Elf_Ehdr *hdr); -static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, +static const Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,      const char *interp, int32_t *osrel, uint32_t *fctl0);  static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,      u_long *entry); @@ -104,7 +104,7 @@ static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,      int32_t *osrel);  static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);  static bool __elfN(check_note)(struct image_params *imgp, -    Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0, +    const Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,      uint32_t *fctl0);  static vm_prot_t __elfN(trans_prot)(Elf_Word);  static Elf_Word __elfN(untrans_prot)(vm_prot_t); @@ -227,11 +227,11 @@ SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx,      CTLFLAG_RWTUN, &__elfN(allow_wx), 0,      "Allow pages to be mapped simultaneously writable and executable"); -static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; +static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS];  #define	aligned(a, t)	(rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) -Elf_Brandnote __elfN(freebsd_brandnote) = { +const Elf_Brandnote __elfN(freebsd_brandnote) = {  	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),  	.hdr.n_descsz	= sizeof(int32_t),  	.hdr.n_type	= NT_FREEBSD_ABI_TAG, @@ -254,7 +254,7 @@ __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)  static int GNU_KFREEBSD_ABI_DESC = 3; -Elf_Brandnote __elfN(kfreebsd_brandnote) = { +const Elf_Brandnote __elfN(kfreebsd_brandnote) = {  	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),  	.hdr.n_descsz	= 16,	/* XXX at least 16 */  	.hdr.n_type	= 1, @@ -286,7 +286,7 @@ kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)  }  int -__elfN(insert_brand_entry)(Elf_Brandinfo *entry) +__elfN(insert_brand_entry)(const Elf_Brandinfo *entry)  {  	int i; @@ -305,7 +305,7 @@ __elfN(insert_brand_entry)(Elf_Brandinfo *entry)  }  int -__elfN(remove_brand_entry)(Elf_Brandinfo *entry) +__elfN(remove_brand_entry)(const Elf_Brandinfo *entry)  {  	int i; @@ -321,7 +321,7 @@ __elfN(remove_brand_entry)(Elf_Brandinfo *entry)  }  bool -__elfN(brand_inuse)(Elf_Brandinfo *entry) +__elfN(brand_inuse)(const Elf_Brandinfo *entry)  {  	struct proc *p;  	bool rval = false; @@ -338,12 +338,12 @@ __elfN(brand_inuse)(Elf_Brandinfo *entry)  	return (rval);  } -static Elf_Brandinfo * +static const Elf_Brandinfo *  __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,      int32_t *osrel, uint32_t *fctl0)  {  	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; -	Elf_Brandinfo *bi, *bi_m; +	const Elf_Brandinfo *bi, *bi_m;  	bool ret, has_fctl0;  	int i, interp_name_len; @@ -492,7 +492,7 @@ __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr)  static int  __elfN(check_header)(const Elf_Ehdr *hdr)  { -	Elf_Brandinfo *bi; +	const Elf_Brandinfo *bi;  	int i;  	if (!IS_ELF(*hdr) || @@ -1109,7 +1109,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)  	struct vmspace *vmspace;  	vm_map_t map;  	char *interp; -	Elf_Brandinfo *brand_info; +	const Elf_Brandinfo *brand_info;  	struct sysentvec *sv;  	u_long addr, baddr, entry, proghdr;  	u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc; @@ -1925,7 +1925,7 @@ __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,  	Elf_Phdr *phdr;  	Elf_Shdr *shdr;  	struct phdr_closure phc; -	Elf_Brandinfo *bi; +	const Elf_Brandinfo *bi;  	ehdr = (Elf_Ehdr *)hdr;  	bi = td->td_proc->p_elf_brandinfo; @@ -2831,7 +2831,7 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,  		}  		if ((const char *)note_end - (const char *)note <  		    sizeof(Elf_Note)) { -			uprintf("ELF note to short\n"); +			uprintf("ELF note too short\n");  			goto retf;  		}  		if (note->n_namesz != checknote->n_namesz || @@ -2839,9 +2839,9 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,  		    note->n_type != checknote->n_type)  			goto nextnote;  		note_name = (const char *)(note + 1); -		if (note_name + checknote->n_namesz >= -		    (const char *)note_end || strncmp(note_vendor, -		    note_name, checknote->n_namesz) != 0) +		if (note_name + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + +		    note->n_descsz >= (const char *)note_end || +		    strncmp(note_vendor, note_name, checknote->n_namesz) != 0)  			goto nextnote;  		if (cb(note, cb_arg, &res)) @@ -2861,7 +2861,7 @@ ret:  }  struct brandnote_cb_arg { -	Elf_Brandnote *brandnote; +	const Elf_Brandnote *brandnote;  	int32_t *osrel;  }; @@ -2883,7 +2883,7 @@ brandnote_cb(const Elf_Note *note, void *arg0, bool *res)  	return (true);  } -static Elf_Note fctl_note = { +static const Elf_Note fctl_note = {  	.n_namesz = sizeof(FREEBSD_ABI_VENDOR),  	.n_descsz = sizeof(uint32_t),  	.n_type = NT_FREEBSD_FEATURE_CTL, @@ -2918,7 +2918,7 @@ note_fctl_cb(const Elf_Note *note, void *arg0, bool *res)   * as for headers.   */  static bool -__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote, +__elfN(check_note)(struct image_params *imgp, const Elf_Brandnote *brandnote,      int32_t *osrel, bool *has_fctl0, uint32_t *fctl0)  {  	const Elf_Phdr *phdr; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 87ffdb8dbf07..6612ac685936 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -384,7 +384,7 @@ C_SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH,  #if __SIZEOF_LONG__ == 4  static const char ilp32_warn[] = -    "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 15.0.\n"; +    "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 16.0.\n";  C_SYSINIT(ilp32warn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH,      print_caddr_t, ilp32_warn);  C_SYSINIT(ilp32warn2, SI_SUB_LAST, SI_ORDER_FIFTH, diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index e42e7dcf8b44..cd305de1ed44 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -665,4 +665,5 @@ struct sysent sysent[] = {  	{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 596 = setgroups */  	{ .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 597 = jail_attach_jd */  	{ .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 598 = jail_remove_jd */ +	{ .sy_narg = AS(kexec_load_args), .sy_call = (sy_call_t *)sys_kexec_load, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 599 = kexec_load */  }; diff --git a/sys/kern/kern_boottrace.c b/sys/kern/kern_boottrace.c index 1fa87955a299..c83255bc74ee 100644 --- a/sys/kern/kern_boottrace.c +++ b/sys/kern/kern_boottrace.c @@ -579,7 +579,7 @@ sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)  }  static void -boottrace_init(void) +boottrace_init(void *dummy __unused)  {  	if (!boottrace_enabled) diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index b891ed84957a..2da51d84ff60 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -664,7 +664,7 @@ prep_cdevsw(struct cdevsw *devsw, int flags)  		if ((devsw->d_flags & D_GIANTOK) == 0) {  			printf(  			    "WARNING: Device \"%s\" is Giant locked and may be " -			    "deleted before FreeBSD 15.0.\n", +			    "deleted before FreeBSD 16.0.\n",  			    devsw->d_name == NULL ? "???" : devsw->d_name);  		}  		if (devsw->d_gianttrick == NULL) { @@ -1163,6 +1163,9 @@ destroy_devl(struct cdev *dev)  		devfs_destroy_cdevpriv(p);  		mtx_lock(&cdevpriv_mtx);  	} +	while (cdp->cdp_fdpriv_dtrc != 0) { +		msleep(&cdp->cdp_fdpriv_dtrc, &cdevpriv_mtx, 0, "cdfdpc", 0); +	}  	mtx_unlock(&cdevpriv_mtx);  	dev_lock(); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 19118eb7f275..a71a601733e5 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2486,7 +2486,7 @@ fdunshare(struct thread *td)  	if (refcount_load(&p->p_fd->fd_refcnt) == 1)  		return; -	tmp = fdcopy(p->p_fd); +	tmp = fdcopy(p->p_fd, p);  	fdescfree(td);  	p->p_fd = tmp;  } @@ -2515,14 +2515,17 @@ pdunshare(struct thread *td)   * this is to ease callers, not catch errors.   */  struct filedesc * -fdcopy(struct filedesc *fdp) +fdcopy(struct filedesc *fdp, struct proc *p1)  {  	struct filedesc *newfdp;  	struct filedescent *nfde, *ofde; +	struct file *fp;  	int i, lastfile; +	bool fork_pass;  	MPASS(fdp != NULL); +	fork_pass = false;  	newfdp = fdinit();  	FILEDESC_SLOCK(fdp);  	for (;;) { @@ -2533,10 +2536,35 @@ fdcopy(struct filedesc *fdp)  		fdgrowtable(newfdp, lastfile + 1);  		FILEDESC_SLOCK(fdp);  	} -	/* copy all passable descriptors (i.e. not kqueue) */ + +	/* +	 * Copy all passable descriptors (i.e. not kqueue), and +	 * prepare to handle copyable but not passable descriptors +	 * (kqueues). +	 * +	 * The pass to handle copying is performed after all passable +	 * files are installed into the new file descriptor's table, +	 * since kqueues need all referenced file descriptors already +	 * valid, including other kqueues. For the same reason the +	 * copying is done in two passes by itself, first installing +	 * not fully initialized ('empty') copyable files into the new +	 * fd table, and then giving the subsystems a second chance to +	 * really fill the copied file backing structure with the +	 * content. +	 */  	newfdp->fd_freefile = fdp->fd_freefile;  	FILEDESC_FOREACH_FDE(fdp, i, ofde) { -		if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 || +		const struct fileops *ops; + +		ops = ofde->fde_file->f_ops; +		fp = NULL; +		if ((ops->fo_flags & DFLAG_FORK) != 0 && +		    (ofde->fde_flags & UF_FOCLOSE) == 0) { +			if (ops->fo_fork(newfdp, ofde->fde_file, &fp, p1, +			    curthread) != 0) +				continue; +			fork_pass = true; +		} else if ((ops->fo_flags & DFLAG_PASSABLE) == 0 ||  		    (ofde->fde_flags & UF_FOCLOSE) != 0 ||  		    !fhold(ofde->fde_file)) {  			if (newfdp->fd_freefile == fdp->fd_freefile) @@ -2545,11 +2573,30 @@ fdcopy(struct filedesc *fdp)  		}  		nfde = &newfdp->fd_ofiles[i];  		*nfde = *ofde; +		if (fp != NULL) +			nfde->fde_file = fp;  		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);  		fdused_init(newfdp, i);  	}  	MPASS(newfdp->fd_freefile != -1);  	FILEDESC_SUNLOCK(fdp); + +	/* +	 * Now handle copying kqueues, since all fds, including +	 * kqueues, are in place. +	 */ +	if (__predict_false(fork_pass)) { +		FILEDESC_FOREACH_FDE(newfdp, i, nfde) { +			const struct fileops *ops; + +			ops = nfde->fde_file->f_ops; +			if ((ops->fo_flags & DFLAG_FORK) == 0 || +			    nfde->fde_file == NULL) +				continue; +			ops->fo_fork(newfdp, NULL, &nfde->fde_file, p1, +			    curthread); +		} +	}  	return (newfdp);  } diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c index 7a2818c29b1a..a37cb23efed8 100644 --- a/sys/kern/kern_devctl.c +++ b/sys/kern/kern_devctl.c @@ -130,6 +130,7 @@ static const struct filterops devctl_rfiltops = {  	.f_isfd = 1,  	.f_detach = filt_devctl_detach,  	.f_event = filt_devctl_read, +	.f_copy = knote_triv_copy,  };  static struct cdev *devctl_dev; @@ -140,7 +141,7 @@ static struct devctlbridge {  } devctl_notify_hook = { .send_f = NULL };  static void -devctl_init(void) +devctl_init(void *dummy __unused)  {  	int reserve;  	uma_zone_t z; diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 23d8dc9cf54a..1baa24d278bf 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -134,6 +134,7 @@ static fo_kqfilter_t	kqueue_kqfilter;  static fo_stat_t	kqueue_stat;  static fo_close_t	kqueue_close;  static fo_fill_kinfo_t	kqueue_fill_kinfo; +static fo_fork_t	kqueue_fork;  static const struct fileops kqueueops = {  	.fo_read = invfo_rdwr, @@ -148,7 +149,9 @@ static const struct fileops kqueueops = {  	.fo_chown = invfo_chown,  	.fo_sendfile = invfo_sendfile,  	.fo_cmp = file_kcmp_generic, +	.fo_fork = kqueue_fork,  	.fo_fill_kinfo = kqueue_fill_kinfo, +	.fo_flags = DFLAG_FORK,  };  static int 	knote_attach(struct knote *kn, struct kqueue *kq); @@ -156,7 +159,7 @@ static void 	knote_drop(struct knote *kn, struct thread *td);  static void 	knote_drop_detached(struct knote *kn, struct thread *td);  static void 	knote_enqueue(struct knote *kn);  static void 	knote_dequeue(struct knote *kn); -static void 	knote_init(void); +static void 	knote_init(void *);  static struct 	knote *knote_alloc(int mflag);  static void 	knote_free(struct knote *kn); @@ -176,6 +179,7 @@ static void	filt_timerdetach(struct knote *kn);  static void	filt_timerstart(struct knote *kn, sbintime_t to);  static void	filt_timertouch(struct knote *kn, struct kevent *kev,  		    u_long type); +static int	filt_timercopy(struct knote *kn, struct proc *p1);  static int	filt_timervalidate(struct knote *kn, sbintime_t *to);  static int	filt_timer(struct knote *kn, long hint);  static int	filt_userattach(struct knote *kn); @@ -187,11 +191,13 @@ static void	filt_usertouch(struct knote *kn, struct kevent *kev,  static const struct filterops file_filtops = {  	.f_isfd = 1,  	.f_attach = filt_fileattach, +	.f_copy = knote_triv_copy,  };  static const struct filterops kqread_filtops = {  	.f_isfd = 1,  	.f_detach = filt_kqdetach,  	.f_event = filt_kqueue, +	.f_copy = knote_triv_copy,  };  /* XXX - move to kern_proc.c?  */  static const struct filterops proc_filtops = { @@ -199,12 +205,14 @@ static const struct filterops proc_filtops = {  	.f_attach = filt_procattach,  	.f_detach = filt_procdetach,  	.f_event = filt_proc, +	.f_copy = knote_triv_copy,  };  static const struct filterops jail_filtops = {  	.f_isfd = 0,  	.f_attach = filt_jailattach,  	.f_detach = filt_jaildetach,  	.f_event = filt_jail, +	.f_copy = knote_triv_copy,  };  static const struct filterops timer_filtops = {  	.f_isfd = 0, @@ -212,12 +220,14 @@ static const struct filterops timer_filtops = {  	.f_detach = filt_timerdetach,  	.f_event = filt_timer,  	.f_touch = filt_timertouch, +	.f_copy =  filt_timercopy,  };  static const struct filterops user_filtops = {  	.f_attach = filt_userattach,  	.f_detach = filt_userdetach,  	.f_event = filt_user,  	.f_touch = filt_usertouch, +	.f_copy = knote_triv_copy,  };  static uma_zone_t	knote_zone; @@ -347,6 +357,7 @@ filt_nullattach(struct knote *kn)  static const struct filterops null_filtops = {  	.f_isfd = 0,  	.f_attach = filt_nullattach, +	.f_copy = knote_triv_copy,  };  /* XXX - make SYSINIT to add these, and move into respective modules. */ @@ -940,6 +951,30 @@ filt_timerattach(struct knote *kn)  	return (0);  } +static int +filt_timercopy(struct knote *kn, struct proc *p) +{ +	struct kq_timer_cb_data *kc_src, *kc; + +	if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { +		atomic_subtract_int(&kq_ncallouts, 1); +		return (ENOMEM); +	} + +	kn->kn_status &= ~KN_DETACHED; +	kc_src = kn->kn_ptr.p_v; +	kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); +	kc->kn = kn; +	kc->p = p; +	kc->flags = kc_src->flags & ~KQ_TIMER_CB_ENQUEUED; +	kc->next = kc_src->next; +	kc->to = kc_src->to; +	kc->cpuid = PCPU_GET(cpuid); +	callout_init(&kc->c, 1); +	kqtimer_sched_callout(kc); +	return (0); +} +  static void  filt_timerstart(struct knote *kn, sbintime_t to)  { @@ -1151,7 +1186,7 @@ int  sys_kqueue(struct thread *td, struct kqueue_args *uap)  { -	return (kern_kqueue(td, 0, NULL)); +	return (kern_kqueue(td, 0, false, NULL));  }  int @@ -1159,55 +1194,76 @@ sys_kqueuex(struct thread *td, struct kqueuex_args *uap)  {  	int flags; -	if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0) +	if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0)  		return (EINVAL);  	flags = 0;  	if ((uap->flags & KQUEUE_CLOEXEC) != 0)  		flags |= O_CLOEXEC; -	return (kern_kqueue(td, flags, NULL)); +	return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0, +	    NULL));  }  static void -kqueue_init(struct kqueue *kq) +kqueue_init(struct kqueue *kq, bool cponfork)  {  	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);  	TAILQ_INIT(&kq->kq_head);  	knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);  	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); +	if (cponfork) +		kq->kq_state |= KQ_CPONFORK;  } -int -kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) +static int +kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip, +    struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork, +    struct kqueue **kqp)  { -	struct filedesc *fdp; -	struct kqueue *kq; -	struct file *fp;  	struct ucred *cred; -	int fd, error; +	struct kqueue *kq; +	int error; -	fdp = td->td_proc->p_fd;  	cred = td->td_ucred;  	if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))  		return (ENOMEM); -	error = falloc_caps(td, &fp, &fd, flags, fcaps); +	error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) : +	    _falloc_noinstall(td, fpp, 1);  	if (error != 0) {  		chgkqcnt(cred->cr_ruidinfo, -1, 0);  		return (error);  	}  	/* An extra reference on `fp' has been held for us by falloc(). */ -	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); -	kqueue_init(kq); +	kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO); +	kqueue_init(kq, cponfork);  	kq->kq_fdp = fdp;  	kq->kq_cred = crhold(cred); -	FILEDESC_XLOCK(fdp); +	if (fdip != NULL) +		FILEDESC_XLOCK(fdp);  	TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); -	FILEDESC_XUNLOCK(fdp); +	if (fdip != NULL) +		FILEDESC_XUNLOCK(fdp); + +	finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); +	*kqp = kq; +	return (0); +} + +int +kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps) +{ +	struct kqueue *kq; +	struct file *fp; +	int fd, error; + +	error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags, +	    fcaps, cponfork, &kq); +	if (error != 0) +		return (error); -	finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);  	fdrop(fp, td);  	td->td_retval[0] = fd; @@ -1488,7 +1544,7 @@ kern_kevent_anonymous(struct thread *td, int nevents,  	struct kqueue kq = {};  	int error; -	kqueue_init(&kq); +	kqueue_init(&kq, false);  	kq.kq_refcnt = 1;  	error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);  	kqueue_drain(&kq, td); @@ -1576,7 +1632,7 @@ kqueue_fo_release(int filt)  	mtx_lock(&filterops_lock);  	KASSERT(sysfilt_ops[~filt].for_refcnt > 0, -	    ("filter object refcount not valid on release")); +	    ("filter object %d refcount not valid on release", filt));  	sysfilt_ops[~filt].for_refcnt--;  	mtx_unlock(&filterops_lock);  } @@ -1855,17 +1911,8 @@ done:  }  static int -kqueue_acquire(struct file *fp, struct kqueue **kqp) +kqueue_acquire_ref(struct kqueue *kq)  { -	int error; -	struct kqueue *kq; - -	error = 0; - -	kq = fp->f_data; -	if (fp->f_type != DTYPE_KQUEUE || kq == NULL) -		return (EINVAL); -	*kqp = kq;  	KQ_LOCK(kq);  	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {  		KQ_UNLOCK(kq); @@ -1873,8 +1920,22 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp)  	}  	kq->kq_refcnt++;  	KQ_UNLOCK(kq); +	return (0); +} -	return error; +static int +kqueue_acquire(struct file *fp, struct kqueue **kqp) +{ +	struct kqueue *kq; +	int error; + +	kq = fp->f_data; +	if (fp->f_type != DTYPE_KQUEUE || kq == NULL) +		return (EINVAL); +	error = kqueue_acquire_ref(kq); +	if (error == 0) +		*kqp = kq; +	return (error);  }  static void @@ -2887,7 +2948,7 @@ knote_dequeue(struct knote *kn)  }  static void -knote_init(void) +knote_init(void *dummy __unused)  {  	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, @@ -2937,6 +2998,152 @@ noacquire:  	return (error);  } +static int +kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1, +    struct thread *td) +{ +	struct kqueue *kq, *kq1; +	int error; + +	MPASS(fp->f_type == DTYPE_KQUEUE); +	kq = fp->f_data; +	if ((kq->kq_state & KQ_CPONFORK) == 0) +		return (EOPNOTSUPP); +	error = kqueue_acquire_ref(kq); +	if (error != 0) +		return (error); +	error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1); +	if (error == 0) { +		kq1->kq_forksrc = kq; +		(*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC | +		    O_CLOEXEC | O_CLOFORK); +	} else { +		kqueue_release(kq, 0); +	} +	return (error); +} + +static void +kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1, +    struct filedesc *fdp) +{ +	struct knote *kn1; +	const struct filterops *fop; +	int error; + +	fop = kn->kn_fop; +	if (fop->f_copy == NULL || (fop->f_isfd && +	    fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL)) +		return; +	error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK); +	if (error != 0) +		return; + +	kn1 = knote_alloc(M_WAITOK); +	*kn1 = *kn; +	kn1->kn_status |= KN_DETACHED; +	kn1->kn_status &= ~KN_QUEUED; +	kn1->kn_kq = kq1; +	error = fop->f_copy(kn1, p1); +	if (error != 0) { +		knote_free(kn1); +		return; +	} +	(void)kqueue_fo_find(kn->kn_kevent.filter); +	if (fop->f_isfd && !fhold(kn1->kn_fp)) { +		fop->f_detach(kn1); +		kqueue_fo_release(kn->kn_kevent.filter); +		knote_free(kn1); +		return; +	} +	if (kn->kn_knlist != NULL) +		knlist_add(kn->kn_knlist, kn1, 0); +	KQ_LOCK(kq1); +	knote_attach(kn1, kq1); +	kn1->kn_influx = 0; +	if ((kn->kn_status & KN_QUEUED) != 0) +		knote_enqueue(kn1); +	KQ_UNLOCK(kq1); +} + +static void +kqueue_fork_copy_list(struct klist *knlist, struct knote *marker, +    struct kqueue *kq, struct kqueue *kq1, struct proc *p1, +    struct filedesc *fdp) +{ +	struct knote *kn; + +	KQ_OWNED(kq); +	kn = SLIST_FIRST(knlist); +	while (kn != NULL) { +		if ((kn->kn_status & KN_DETACHED) != 0 || +		    (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) { +			kn = SLIST_NEXT(kn, kn_link); +			continue; +		} +		kn_enter_flux(kn); +		SLIST_INSERT_AFTER(kn, marker, kn_link); +		KQ_UNLOCK(kq); +		kqueue_fork_copy_knote(kq1, kn, p1, fdp); +		KQ_LOCK(kq); +		kn_leave_flux(kn); +		kn = SLIST_NEXT(marker, kn_link); +		/* XXXKIB switch kn_link to LIST? */ +		SLIST_REMOVE(knlist, marker, knote, kn_link); +	} +} + +static int +kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1, +    struct proc *p1, struct thread *td) +{ +	struct kqueue *kq, *kq1; +	struct knote *marker; +	int error, i; + +	error = 0; +	MPASS(fp == NULL); +	MPASS(fp1->f_type == DTYPE_KQUEUE); + +	kq1 = fp1->f_data; +	kq = kq1->kq_forksrc; +	marker = knote_alloc(M_WAITOK); +	marker->kn_status = KN_MARKER; + +	KQ_LOCK(kq); +	for (i = 0; i < kq->kq_knlistsize; i++) { +		kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1, +		    p1, fdp); +	} +	if (kq->kq_knhashmask != 0) { +		for (i = 0; i <= kq->kq_knhashmask; i++) { +			kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq, +			    kq1, p1, fdp); +		} +	} +	kqueue_release(kq, 1); +	kq1->kq_forksrc = NULL; +	KQ_UNLOCK(kq); + +	knote_free(marker); +	return (error); +} + +static int +kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1, +    struct proc *p1, struct thread *td) +{ +	if (*fp1 == NULL) +		return (kqueue_fork_alloc(fdp, fp, fp1, td)); +	return (kqueue_fork_copy(fdp, fp, *fp1, p1, td)); +} + +int +knote_triv_copy(struct knote *kn __unused, struct proc *p1 __unused) +{ +	return (0); +} +  struct knote_status_export_bit {  	int kn_status_bit;  	int knt_status_bit; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 0fc2d0e7f1bc..2bdd6faa025a 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -418,7 +418,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,  #endif  	int error, i, orig_osrel;  	uint32_t orig_fctl0; -	Elf_Brandinfo *orig_brandinfo; +	const Elf_Brandinfo *orig_brandinfo;  	size_t freepath_size;  	static const char fexecv_proc_title[] = "(fexecv)"; @@ -1314,7 +1314,7 @@ exec_map_stack(struct image_params *imgp)  		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);  	} else {  		sharedpage_addr = sv->sv_shared_page_base; -		vm_map_fixed(map, obj, 0, +		error = vm_map_fixed(map, obj, 0,  		    sharedpage_addr, sv->sv_shared_page_len,  		    VM_PROT_READ | VM_PROT_EXECUTE,  		    VM_PROT_READ | VM_PROT_EXECUTE, diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index ab8ed32ad189..c4b1c8201ff2 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -807,7 +807,7 @@ kern_abort2(struct thread *td, const char *why, int nargs, void **uargs)  	}  	if (nargs > 0) {  		sbuf_putc(sb, '('); -		for (i = 0;i < nargs; i++) +		for (i = 0; i < nargs; i++)  			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);  		sbuf_putc(sb, ')');  	} diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 7f6abae187b3..8b237b6dbd17 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -423,7 +423,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *  			pd = pdshare(p1->p_pd);  		else  			pd = pdcopy(p1->p_pd); -		fd = fdcopy(p1->p_fd); +		fd = fdcopy(p1->p_fd, p2);  		fdtol = NULL;  	} else {  		if (fr->fr_flags2 & FR2_SHARE_PATHS) diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 3697d95fe0e5..267b60ffb5bc 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1088,6 +1088,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)  	else {  		if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |  		    JAIL_OWN_DESC))) { +			error = EINVAL;  			vfs_opterror(opts, "unexpected desc");  			goto done_errmsg;  		} @@ -2518,6 +2519,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)  	} else if (error == 0) {  		if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |  		    JAIL_OWN_DESC))) { +			error = EINVAL;  			vfs_opterror(opts, "unexpected desc");  			goto done;  		} @@ -2909,12 +2911,6 @@ prison_remove(struct prison *pr)  {  	sx_assert(&allprison_lock, SA_XLOCKED);  	mtx_assert(&pr->pr_mtx, MA_OWNED); -	if (!prison_isalive(pr)) { -		/* Silently ignore already-dying prisons. */ -		mtx_unlock(&pr->pr_mtx); -		sx_xunlock(&allprison_lock); -		return; -	}  	prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);  } @@ -3461,12 +3457,17 @@ prison_deref(struct prison *pr, int flags)  			/* Kill the prison and its descendents. */  			KASSERT(pr != &prison0,  			    ("prison_deref trying to kill prison0")); -			if (!(flags & PD_DEREF)) { -				prison_hold(pr); -				flags |= PD_DEREF; +			if (!prison_isalive(pr)) { +				/* Silently ignore already-dying prisons. */ +				flags &= ~PD_KILL; +			} else { +				if (!(flags & PD_DEREF)) { +					prison_hold(pr); +					flags |= PD_DEREF; +				} +				flags = prison_lock_xlock(pr, flags); +				prison_deref_kill(pr, &freeprison);  			} -			flags = prison_lock_xlock(pr, flags); -			prison_deref_kill(pr, &freeprison);  		}  		if (flags & PD_DEUREF) {  			/* Drop a user reference. */ diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c index 3f322b271400..a564393d3366 100644 --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -344,6 +344,7 @@ static const struct filterops jaildesc_kqops = {  	.f_isfd = 1,  	.f_detach = jaildesc_kqops_detach,  	.f_event = jaildesc_kqops_event, +	.f_copy = knote_triv_copy,  };  static int diff --git a/sys/kern/kern_jailmeta.c b/sys/kern/kern_jailmeta.c index 4e37eccad03a..91bb7155820d 100644 --- a/sys/kern/kern_jailmeta.c +++ b/sys/kern/kern_jailmeta.c @@ -599,22 +599,18 @@ SYSCTL_PROC(_security_jail, OID_AUTO, env,  /* Setup and tear down. */ -static int +static void  jm_sysinit(void *arg __unused)  {  	meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);  	env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods); - -	return (0);  } -static int +static void  jm_sysuninit(void *arg __unused)  {  	osd_jail_deregister(meta.osd_slot);  	osd_jail_deregister(env.osd_slot); - -	return (0);  }  SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL); diff --git a/sys/kern/kern_kexec.c b/sys/kern/kern_kexec.c new file mode 100644 index 000000000000..2efea7dcf9a7 --- /dev/null +++ b/sys/kern/kern_kexec.c @@ -0,0 +1,350 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#ifdef INTRNG +#include <sys/intr.h> +#endif +#include <sys/kexec.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/priv.h> +#include <sys/reboot.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/smp.h> +#include <sys/syscallsubr.h> +#include <sys/sysproto.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pagequeue.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> + +#include <machine/kexec.h> + +#ifndef	KEXEC_MD_PAGES +/* + * Number of MD pages for extra bookkeeping. + * This is a macro because it can be a constant (some architectures make it 0). + * It accepts an argument, which is an array of + * kexec_segment[KEXEC_SEGMENT_MAX]. + */ +#define	KEXEC_MD_PAGES(x)	0 +#endif + +/* + * Basic design: + * + * Given an array of "segment descriptors" stage an image to be loaded and + * jumped to at reboot, instead of rebooting via firmware. + * + * Constraints: + * - The segment descriptors' "mem" and "memsz" must each fit within a + *   vm_phys_seg segment, which can be obtained via the `vm.phys_segs` sysctl. + *   A single segment cannot span multiple vm_phys_seg segments, even if the + *   vm_phys_seg segments are adjacent. + * + * Technical details: + * + * Take advantage of the VM subsystem and create a vm_object to hold the staged + * image.  When grabbing pages for the object, sort the pages so that if a page + * in the object is located in the physical range of any of the kexec segment + * targets then it gets placed at the pindex corresponding to that physical + * address.  This avoids the chance of corruption by writing over the page in + * the final copy, or the need for a copy buffer page. + */ + +static struct kexec_image staged_image; +static vm_offset_t stage_addr; +static vm_object_t kexec_obj; + +static eventhandler_tag kexec_reboot_handler; +static struct mtx kexec_mutex; + +static MALLOC_DEFINE(M_KEXEC, "kexec", "Kexec segments"); + + +static void +kexec_reboot(void *junk __unused, int howto) +{ +	if ((howto & RB_KEXEC) == 0 || kexec_obj == NULL) +		return; + +#ifdef SMP +	cpu_mp_stop(); +#endif /* SMP */ +	intr_disable(); +	printf("Starting kexec reboot\n"); + +	scheduler_stopped = true; +	kexec_reboot_md(&staged_image); +} + +MTX_SYSINIT(kexec_mutex, &kexec_mutex, "kexec", MTX_DEF); + +/* Sort the segment list once copied in */ +static int +seg_cmp(const void *seg1, const void *seg2) +{ +	const struct kexec_segment *s1, *s2; + +	s1 = seg1; +	s2 = seg2; + +	return ((uintptr_t)s1->mem - (uintptr_t)s2->mem); +} + +static bool +segment_fits(struct kexec_segment *seg) +{ +	vm_paddr_t v = (vm_paddr_t)(uintptr_t)seg->mem; + +	for (int i = 0; i < vm_phys_nsegs; i++) { +		if (v >= vm_phys_segs[i].start && +		    (v + seg->memsz - 1) <= vm_phys_segs[i].end) +			return (true); +	} + +	return (false); +} + +static vm_paddr_t +pa_for_pindex(struct kexec_segment_stage *segs, int count, vm_pindex_t pind) +{ +	for (int i = count; i > 0; --i) { +		if (pind >= segs[i - 1].pindex) +			return (ptoa(pind - segs[i-1].pindex) + segs[i - 1].target); +	} + +	panic("No segment for pindex %ju\n", (uintmax_t)pind); +} + +/* + * For now still tied to the system call, so assumes all memory is userspace. + */ +int +kern_kexec_load(struct thread *td, u_long entry, u_long nseg, +    struct kexec_segment *seg, u_long flags) +{ +	static int kexec_loading; +	struct kexec_segment segtmp[KEXEC_SEGMENT_MAX]; +	struct kexec_image *new_image_stage = 0; +	vm_object_t new_segments = NULL; +	uint8_t *buf; +	int err = 0; +	int i; +	const size_t segsize = nseg * sizeof(struct kexec_segment); +	vm_page_t *page_list = 0; +	vm_size_t image_count, md_pages, page_count, tmpsize; +	vm_offset_t segment_va = 0; +	/* +	 * - Do any sanity checking +	 * - Load the new segments to temporary +	 * - Remove the old segments +	 * - Install the new segments +	 */ + +	if (nseg > KEXEC_SEGMENT_MAX) +		return (EINVAL); + +	if (atomic_cmpset_acq_int(&kexec_loading, false, true) == 0) +		return (EBUSY); + +	/* Only do error checking if we're installing new segments. */ +	if (nseg > 0) { +		/* Create the new kexec object before destroying the old one. */ +		bzero(&segtmp, sizeof(segtmp)); +		err = copyin(seg, segtmp, segsize); +		if (err != 0) +			goto out; +		qsort(segtmp, nseg, sizeof(*segtmp), seg_cmp); +		new_image_stage = malloc(sizeof(*new_image_stage), M_TEMP, M_WAITOK | M_ZERO); +		/* +		 * Sanity checking: +		 * - All segments must not overlap the kernel, so must be fully enclosed +		 *   in a vm_phys_seg (each kexec segment must be in a single +		 *   vm_phys_seg segment, cannot cross even adjacent segments). +		 */ +		image_count = 0; +		for (i = 0; i < nseg; i++) { +			if (!segment_fits(&segtmp[i]) || +			    segtmp[i].bufsz > segtmp[i].memsz) { +				err = EINVAL; +				goto out; +			} +			new_image_stage->segments[i].pindex = image_count; +			new_image_stage->segments[i].target = (vm_offset_t)segtmp[i].mem; +			new_image_stage->segments[i].size = segtmp[i].memsz; +			image_count += atop(segtmp[i].memsz); +		} +		md_pages = KEXEC_MD_PAGES(segtmp); +		page_count = image_count + md_pages; +		new_segments = vm_object_allocate(OBJT_PHYS, page_count); +		page_list = malloc(page_count * sizeof(vm_page_t), M_TEMP, M_WAITOK); + +		/* +		 * - Grab all pages for all segments (use pindex to slice it) +		 * - Walk the list (once) +		 *   - At each pindex, check if the target PA that corresponds +		 *     to that index is in the object.  If so, swap the pages. +		 *   - At the end of this the list will be "best" sorted. +		 */ +		vm_page_grab_pages_unlocked(new_segments, 0, +		    VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_WIRED | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO, +		    page_list, page_count); + +		/* Sort the pages to best match the PA */ +		VM_OBJECT_WLOCK(new_segments); +		for (i = 0; i < image_count; i++) { +			vm_page_t curpg, otherpg, tmp; +			vm_pindex_t otheridx; + +			curpg = page_list[i]; +			otherpg = PHYS_TO_VM_PAGE(pa_for_pindex(new_image_stage->segments, +			    nseg, curpg->pindex)); +			otheridx = otherpg->pindex; + +			if (otherpg->object == new_segments) { +				/* +				 * Swap 'curpg' and 'otherpg', since 'otherpg' +				 * is at the PA 'curpg' covers. +				 */ +				vm_radix_remove(&new_segments->rtree, otheridx); +				vm_radix_remove(&new_segments->rtree, i); +				otherpg->pindex = i; +				curpg->pindex = otheridx; +				vm_radix_insert(&new_segments->rtree, curpg); +				vm_radix_insert(&new_segments->rtree, otherpg); +				tmp = curpg; +				page_list[i] = otherpg; +				page_list[otheridx] = tmp; +			} +		} +		for (i = 0; i < nseg; i++) { +			new_image_stage->segments[i].first_page = +			    vm_radix_lookup(&new_segments->rtree, +			    new_image_stage->segments[i].pindex); +		} +		if (md_pages > 0) +			new_image_stage->first_md_page = +			    vm_radix_lookup(&new_segments->rtree, +			    page_count - md_pages); +		else +			new_image_stage->first_md_page = NULL; +		VM_OBJECT_WUNLOCK(new_segments); + +		/* Map the object to do the copies */ +		err = vm_map_find(kernel_map, new_segments, 0, &segment_va, +		    ptoa(page_count), 0, VMFS_ANY_SPACE, +		    VM_PROT_RW, VM_PROT_RW, MAP_PREFAULT); +		if (err != 0) +			goto out; +		buf = (void *)segment_va; +		new_image_stage->map_addr = segment_va; +		new_image_stage->map_size = ptoa(new_segments->size); +		new_image_stage->entry = entry; +		new_image_stage->map_obj = new_segments; +		for (i = 0; i < nseg; i++) { +			err = copyin(segtmp[i].buf, buf, segtmp[i].bufsz); +			if (err != 0) { +				goto out; +			} +			new_image_stage->segments[i].map_buf = buf; +			buf += segtmp[i].bufsz; +			tmpsize = segtmp[i].memsz - segtmp[i].bufsz; +			if (tmpsize > 0) +				memset(buf, 0, tmpsize); +			buf += tmpsize; +		} +		/* What's left are the MD pages, so zero them all out. */ +		if (md_pages > 0) +			bzero(buf, ptoa(md_pages)); + +		cpu_flush_dcache((void *)segment_va, ptoa(page_count)); +		if ((err = kexec_load_md(new_image_stage)) != 0) +			goto out; +	} +	if (kexec_obj != NULL) { +		vm_object_unwire(kexec_obj, 0, kexec_obj->size, 0); +		KASSERT(stage_addr != 0, ("Mapped kexec_obj without address")); +		vm_map_remove(kernel_map, stage_addr, stage_addr + kexec_obj->size); +	} +	kexec_obj = new_segments; +	bzero(&staged_image, sizeof(staged_image)); +	if (nseg > 0) +		memcpy(&staged_image, new_image_stage, sizeof(*new_image_stage)); + +	printf("trampoline at %#jx\n", (uintmax_t)staged_image.entry); +	if (nseg > 0) { +		if (kexec_reboot_handler == NULL) +			kexec_reboot_handler = +			    EVENTHANDLER_REGISTER(shutdown_final, kexec_reboot, NULL, +			    SHUTDOWN_PRI_DEFAULT - 150); +	} else { +		if (kexec_reboot_handler != NULL) +			EVENTHANDLER_DEREGISTER(shutdown_final, kexec_reboot_handler); +	} +out: +	/* Clean up the mess if we've gotten far. */ +	if (err != 0 && new_segments != NULL) { +		vm_object_unwire(new_segments, 0, new_segments->size, 0); +		if (segment_va != 0) +			vm_map_remove(kernel_map, segment_va, segment_va + kexec_obj->size); +		else +			vm_object_deallocate(new_segments); +	} +	atomic_store_rel_int(&kexec_loading, false); +	if (new_image_stage != NULL) +		free(new_image_stage, M_TEMP); +	if (page_list != 0) +		free(page_list, M_TEMP); + +	return (err); +} + +int +sys_kexec_load(struct thread *td, struct kexec_load_args *uap) +{ +	int error; + +	// FIXME: Do w need a better privilege check than PRIV_REBOOT here? +	error = priv_check(td, PRIV_REBOOT); +	if (error != 0) +		return (error); +	return (kern_kexec_load(td, uap->entry, uap->nseg, uap->segments, uap->flags)); +} diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index d566bc01bc5e..e2f63cbc0c5a 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -435,7 +435,7 @@ linker_file_register_modules(linker_file_t lf)  }  static void -linker_init_kernel_modules(void) +linker_init_kernel_modules(void *dummy __unused)  {  	sx_xlock(&kld_sx); diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 653ce1ee556b..fcbfbe64f854 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -303,7 +303,7 @@ sysctl_vm_malloc_zone_sizes(SYSCTL_HANDLER_ARGS)   */  #if MALLOC_DEBUG_MAXZONES > 1  static void -tunable_set_numzones(void) +tunable_set_numzones(void *dummy __unused)  {  	TUNABLE_INT_FETCH("debug.malloc.numzones", @@ -1302,7 +1302,7 @@ mallocinit(void *dummy)  #endif  			    align, UMA_ZONE_MALLOC);  		} -		for (;i <= size; i+= KMEM_ZBASE) +		for (; i <= size; i+= KMEM_ZBASE)  			kmemsize[i >> KMEM_ZSHIFT] = indx;  	}  } diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 7351e9cb6313..2aab151aba08 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -1312,7 +1312,7 @@ static struct kproc_desc racctd_kp = {  };  static void -racctd_init(void) +racctd_init(void *dummy __unused)  {  	if (!racct_enable)  		return; @@ -1322,7 +1322,7 @@ racctd_init(void)  SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);  static void -racct_init(void) +racct_init(void *dummy __unused)  {  	if (!racct_enable)  		return; diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c index 3854ffbeec29..cd66bff62608 100644 --- a/sys/kern/kern_rangelock.c +++ b/sys/kern/kern_rangelock.c @@ -300,7 +300,7 @@ static void rangelock_free_free(struct rl_q_entry *free);  static void rangelock_noncheating_destroy(struct rangelock *lock);  static void -rangelock_sys_init(void) +rangelock_sys_init(void *dummy __unused)  {  	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),  	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c index 4232c71f86fb..682ba86d23ff 100644 --- a/sys/kern/kern_rctl.c +++ b/sys/kern/kern_rctl.c @@ -209,7 +209,7 @@ static struct dict actionnames[] = {  	{ "throttle", RCTL_ACTION_THROTTLE },  	{ NULL, -1 }}; -static void rctl_init(void); +static void rctl_init(void *);  SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);  static uma_zone_t rctl_rule_zone; @@ -2175,7 +2175,7 @@ rctl_racct_release(struct racct *racct)  }  static void -rctl_init(void) +rctl_init(void *dummy __unused)  {  	if (!racct_enable) diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c index 5b8398caaca9..f48d0e3d616b 100644 --- a/sys/kern/kern_sharedpage.c +++ b/sys/kern/kern_sharedpage.c @@ -130,8 +130,7 @@ shared_page_init(void *dummy __unused)  	shared_page_mapping = (char *)addr;  } -SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, -    NULL); +SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, shared_page_init, NULL);  /*   * Push the timehands update to the shared page. diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 8efc0886988b..a55f3c761449 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -113,7 +113,7 @@ static int	filt_sigattach(struct knote *kn);  static void	filt_sigdetach(struct knote *kn);  static int	filt_signal(struct knote *kn, long hint);  static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock); -static void	sigqueue_start(void); +static void	sigqueue_start(void *);  static void	sigfastblock_setpend(struct thread *td, bool resched);  static void	sig_handle_first_stop(struct thread *td, struct proc *p,      int sig); @@ -124,6 +124,7 @@ const struct filterops sig_filtops = {  	.f_attach = filt_sigattach,  	.f_detach = filt_sigdetach,  	.f_event = filt_signal, +	.f_copy = knote_triv_copy,  };  static int	kern_forcesigexit = 1; @@ -344,7 +345,7 @@ ast_sigsuspend(struct thread *td, int tda __unused)  }  static void -sigqueue_start(void) +sigqueue_start(void *dummy __unused)  {  	ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),  		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 2a6f0989f6aa..5b7485c25cd7 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -90,7 +90,7 @@ static int	user_clock_nanosleep(struct thread *td, clockid_t clock_id,  		    int flags, const struct timespec *ua_rqtp,  		    struct timespec *ua_rmtp); -static void	itimer_start(void); +static void	itimer_start(void *);  static int	itimer_init(void *, int, int);  static void	itimer_fini(void *, int);  static void	itimer_enter(struct itimer *); @@ -1170,7 +1170,7 @@ eventratecheck(struct timeval *lasttime, int *cureps, int maxeps)  }  static void -itimer_start(void) +itimer_start(void *dummy __unused)  {  	static const struct kclock rt_clock = {  		.timer_create  = realtimer_create, diff --git a/sys/kern/md4c.c b/sys/kern/md4c.c deleted file mode 100644 index e173e17e3387..000000000000 --- a/sys/kern/md4c.c +++ /dev/null @@ -1,298 +0,0 @@ -/* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm - */ - -/*- -   SPDX-License-Identifier: RSA-MD - -   Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved. - -   License to copy and use this software is granted provided that it -   is identified as the "RSA Data Security, Inc. MD4 Message-Digest -   Algorithm" in all material mentioning or referencing this software -   or this function. - -   License is also granted to make and use derivative works provided -   that such works are identified as "derived from the RSA Data -   Security, Inc. MD4 Message-Digest Algorithm" in all material -   mentioning or referencing the derived work. - -   RSA Data Security, Inc. makes no representations concerning either -   the merchantability of this software or the suitability of this -   software for any particular purpose. It is provided "as is" -   without express or implied warranty of any kind. - -   These notices must be retained in any copies of any part of this -   documentation and/or software. - */ - -#include <sys/param.h> -#ifdef _KERNEL -#include <sys/systm.h> -#else -#include <string.h> -#endif -#include <sys/md4.h> - -typedef unsigned char *POINTER; -typedef uint16_t UINT2; -typedef uint32_t UINT4; - -#define PROTO_LIST(list) list - -/* Constants for MD4Transform routine. - */ -#define S11 3 -#define S12 7 -#define S13 11 -#define S14 19 -#define S21 3 -#define S22 5 -#define S23 9 -#define S24 13 -#define S31 3 -#define S32 9 -#define S33 11 -#define S34 15 - -static void MD4Transform PROTO_LIST ((UINT4 [4], const unsigned char [64])); -static void Encode PROTO_LIST -  ((unsigned char *, UINT4 *, unsigned int)); -static void Decode PROTO_LIST -  ((UINT4 *, const unsigned char *, unsigned int)); - -static unsigned char PADDING[64] = { -  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* F, G and H are basic MD4 functions. - */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - -/* ROTATE_LEFT rotates x left n bits. - */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) - -/* FF, GG and HH are transformations for rounds 1, 2 and 3 */ -/* Rotation is separate from addition to prevent recomputation */ -#define FF(a, b, c, d, x, s) { \ -    (a) += F ((b), (c), (d)) + (x); \ -    (a) = ROTATE_LEFT ((a), (s)); \ -  } -#define GG(a, b, c, d, x, s) { \ -    (a) += G ((b), (c), (d)) + (x) + (UINT4)0x5a827999; \ -    (a) = ROTATE_LEFT ((a), (s)); \ -  } -#define HH(a, b, c, d, x, s) { \ -    (a) += H ((b), (c), (d)) + (x) + (UINT4)0x6ed9eba1; \ -    (a) = ROTATE_LEFT ((a), (s)); \ -  } - -/* MD4 initialization. Begins an MD4 operation, writing a new context. - */ -void -MD4Init(MD4_CTX *context) -{ -  context->count[0] = context->count[1] = 0; - -  /* Load magic initialization constants. -   */ -  context->state[0] = 0x67452301; -  context->state[1] = 0xefcdab89; -  context->state[2] = 0x98badcfe; -  context->state[3] = 0x10325476; -} - -/* MD4 block update operation. Continues an MD4 message-digest -     operation, processing another message block, and updating the -     context. - */ -void -MD4Update(MD4_CTX *context, const unsigned char *input, -    unsigned int inputLen) -{ -  unsigned int i, index, partLen; - -  /* Compute number of bytes mod 64 */ -  index = (unsigned int)((context->count[0] >> 3) & 0x3F); -  /* Update number of bits */ -  if ((context->count[0] += ((UINT4)inputLen << 3)) -      < ((UINT4)inputLen << 3)) -    context->count[1]++; -  context->count[1] += ((UINT4)inputLen >> 29); - -  partLen = 64 - index; -  /* Transform as many times as possible. -   */ -  if (inputLen >= partLen) { -    bcopy(input, &context->buffer[index], partLen); -    MD4Transform (context->state, context->buffer); - -    for (i = partLen; i + 63 < inputLen; i += 64) -      MD4Transform (context->state, &input[i]); - -    index = 0; -  } -  else -    i = 0; - -  /* Buffer remaining input */ -  bcopy(&input[i], &context->buffer[index], inputLen-i); -} - -/* MD4 padding. */ -void -MD4Pad(MD4_CTX *context) -{ -  unsigned char bits[8]; -  unsigned int index, padLen; - -  /* Save number of bits */ -  Encode (bits, context->count, 8); - -  /* Pad out to 56 mod 64. -   */ -  index = (unsigned int)((context->count[0] >> 3) & 0x3f); -  padLen = (index < 56) ? (56 - index) : (120 - index); -  MD4Update (context, PADDING, padLen); - -  /* Append length (before padding) */ -  MD4Update (context, bits, 8); -} - -/* MD4 finalization. Ends an MD4 message-digest operation, writing the -     the message digest and zeroizing the context. - */ -void -MD4Final(unsigned char digest[static 16], MD4_CTX *context) -{ -  /* Do padding */ -  MD4Pad (context); - -  /* Store state in digest */ -  Encode (digest, context->state, 16); - -  /* Zeroize sensitive information. -   */ -  bzero(context, sizeof (*context)); -} - -/* MD4 basic transformation. Transforms state based on block. - */ -static void -MD4Transform(UINT4 state[4], const unsigned char block[64]) -{ -  UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - -  Decode (x, block, 64); - -  /* Round 1 */ -  FF (a, b, c, d, x[ 0], S11); /* 1 */ -  FF (d, a, b, c, x[ 1], S12); /* 2 */ -  FF (c, d, a, b, x[ 2], S13); /* 3 */ -  FF (b, c, d, a, x[ 3], S14); /* 4 */ -  FF (a, b, c, d, x[ 4], S11); /* 5 */ -  FF (d, a, b, c, x[ 5], S12); /* 6 */ -  FF (c, d, a, b, x[ 6], S13); /* 7 */ -  FF (b, c, d, a, x[ 7], S14); /* 8 */ -  FF (a, b, c, d, x[ 8], S11); /* 9 */ -  FF (d, a, b, c, x[ 9], S12); /* 10 */ -  FF (c, d, a, b, x[10], S13); /* 11 */ -  FF (b, c, d, a, x[11], S14); /* 12 */ -  FF (a, b, c, d, x[12], S11); /* 13 */ -  FF (d, a, b, c, x[13], S12); /* 14 */ -  FF (c, d, a, b, x[14], S13); /* 15 */ -  FF (b, c, d, a, x[15], S14); /* 16 */ - -  /* Round 2 */ -  GG (a, b, c, d, x[ 0], S21); /* 17 */ -  GG (d, a, b, c, x[ 4], S22); /* 18 */ -  GG (c, d, a, b, x[ 8], S23); /* 19 */ -  GG (b, c, d, a, x[12], S24); /* 20 */ -  GG (a, b, c, d, x[ 1], S21); /* 21 */ -  GG (d, a, b, c, x[ 5], S22); /* 22 */ -  GG (c, d, a, b, x[ 9], S23); /* 23 */ -  GG (b, c, d, a, x[13], S24); /* 24 */ -  GG (a, b, c, d, x[ 2], S21); /* 25 */ -  GG (d, a, b, c, x[ 6], S22); /* 26 */ -  GG (c, d, a, b, x[10], S23); /* 27 */ -  GG (b, c, d, a, x[14], S24); /* 28 */ -  GG (a, b, c, d, x[ 3], S21); /* 29 */ -  GG (d, a, b, c, x[ 7], S22); /* 30 */ -  GG (c, d, a, b, x[11], S23); /* 31 */ -  GG (b, c, d, a, x[15], S24); /* 32 */ - -  /* Round 3 */ -  HH (a, b, c, d, x[ 0], S31); /* 33 */ -  HH (d, a, b, c, x[ 8], S32); /* 34 */ -  HH (c, d, a, b, x[ 4], S33); /* 35 */ -  HH (b, c, d, a, x[12], S34); /* 36 */ -  HH (a, b, c, d, x[ 2], S31); /* 37 */ -  HH (d, a, b, c, x[10], S32); /* 38 */ -  HH (c, d, a, b, x[ 6], S33); /* 39 */ -  HH (b, c, d, a, x[14], S34); /* 40 */ -  HH (a, b, c, d, x[ 1], S31); /* 41 */ -  HH (d, a, b, c, x[ 9], S32); /* 42 */ -  HH (c, d, a, b, x[ 5], S33); /* 43 */ -  HH (b, c, d, a, x[13], S34); /* 44 */ -  HH (a, b, c, d, x[ 3], S31); /* 45 */ -  HH (d, a, b, c, x[11], S32); /* 46 */ -  HH (c, d, a, b, x[ 7], S33); /* 47 */ -  HH (b, c, d, a, x[15], S34); /* 48 */ - -  state[0] += a; -  state[1] += b; -  state[2] += c; -  state[3] += d; - -  /* Zeroize sensitive information. -   */ -  bzero((POINTER)x, sizeof (x)); -} - -/* Encodes input (UINT4) into output (unsigned char). Assumes len is -     a multiple of 4. - */ -static void -Encode(unsigned char *output, UINT4 *input, unsigned int len) -{ -  unsigned int i, j; - -  for (i = 0, j = 0; j < len; i++, j += 4) { -    output[j] = (unsigned char)(input[i] & 0xff); -    output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); -    output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); -    output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); -  } -} - -/* Decodes input (unsigned char) into output (UINT4). Assumes len is -     a multiple of 4. - */ -static void -Decode(UINT4 *output, const unsigned char *input, unsigned int len) -{ -  unsigned int i, j; - -  for (i = 0, j = 0; j < len; i++, j += 4) -    output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) | -      (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24); -} - -#ifdef WEAK_REFS -/* When building libmd, provide weak references. Note: this is not -   activated in the context of compiling these sources for internal -   use in libcrypt. - */ -#undef MD4Init -__weak_reference(_libmd_MD4Init, MD4Init); -#undef MD4Update -__weak_reference(_libmd_MD4Update, MD4Update); -#undef MD4Pad -__weak_reference(_libmd_MD4Pad, MD4Pad); -#undef MD4Final -__weak_reference(_libmd_MD4Final, MD4Final); -#endif diff --git a/sys/kern/md5c.c b/sys/kern/md5c.c deleted file mode 100644 index 0922d0f8cc61..000000000000 --- a/sys/kern/md5c.c +++ /dev/null @@ -1,341 +0,0 @@ -/*- - * SPDX-License-Identifier: RSA-MD - * - * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm - * - * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All - * rights reserved. - * - * License to copy and use this software is granted provided that it - * is identified as the "RSA Data Security, Inc. MD5 Message-Digest - * Algorithm" in all material mentioning or referencing this software - * or this function. - * - * License is also granted to make and use derivative works provided - * that such works are identified as "derived from the RSA Data - * Security, Inc. MD5 Message-Digest Algorithm" in all material - * mentioning or referencing the derived work. - * - * RSA Data Security, Inc. makes no representations concerning either - * the merchantability of this software or the suitability of this - * software for any particular purpose. It is provided "as is" - * without express or implied warranty of any kind. - * - * These notices must be retained in any copies of any part of this - * documentation and/or software. - * - * This code is the same as the code published by RSA Inc.  It has been - * edited for clarity and style only. - */ - -#include <sys/types.h> - -#ifdef _KERNEL -#include <sys/systm.h> -#else -#include <string.h> -#endif - -#include <machine/endian.h> -#include <sys/endian.h> -#include <sys/md5.h> - -static void MD5Transform(uint32_t [4], const unsigned char [64]); - -#if (BYTE_ORDER == LITTLE_ENDIAN) -#define Encode memcpy -#define Decode memcpy -#else  - -/* - * Encodes input (uint32_t) into output (unsigned char). Assumes len is - * a multiple of 4. - */ - -static void -Encode (unsigned char *output, uint32_t *input, unsigned int len) -{ -	unsigned int i; -	uint32_t ip; - -	for (i = 0; i < len / 4; i++) { -		ip = input[i]; -		*output++ = ip; -		*output++ = ip >> 8; -		*output++ = ip >> 16; -		*output++ = ip >> 24; -	} -} - -/* - * Decodes input (unsigned char) into output (uint32_t). Assumes len is - * a multiple of 4. - */ - -static void -Decode (uint32_t *output, const unsigned char *input, unsigned int len) -{ -	unsigned int i; - -	for (i = 0; i < len; i += 4) {  -		*output++ = input[i] | (input[i+1] << 8) | (input[i+2] << 16) | -		    (input[i+3] << 24); -	} -} -#endif - -static unsigned char PADDING[64] = { -  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* F, G, H and I are basic MD5 functions. */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) -#define I(x, y, z) ((y) ^ ((x) | (~z))) - -/* ROTATE_LEFT rotates x left n bits. */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) - -/* - * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. - * Rotation is separate from addition to prevent recomputation. - */ -#define FF(a, b, c, d, x, s, ac) { \ -	(a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \ -	(a) = ROTATE_LEFT ((a), (s)); \ -	(a) += (b); \ -	} -#define GG(a, b, c, d, x, s, ac) { \ -	(a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \ -	(a) = ROTATE_LEFT ((a), (s)); \ -	(a) += (b); \ -	} -#define HH(a, b, c, d, x, s, ac) { \ -	(a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \ -	(a) = ROTATE_LEFT ((a), (s)); \ -	(a) += (b); \ -	} -#define II(a, b, c, d, x, s, ac) { \ -	(a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \ -	(a) = ROTATE_LEFT ((a), (s)); \ -	(a) += (b); \ -	} - -/* MD5 initialization. Begins an MD5 operation, writing a new context. */ - -void -MD5Init(MD5_CTX *context) -{ - -	context->count[0] = context->count[1] = 0; - -	/* Load magic initialization constants.  */ -	context->state[0] = 0x67452301; -	context->state[1] = 0xefcdab89; -	context->state[2] = 0x98badcfe; -	context->state[3] = 0x10325476; -} - -/*  - * MD5 block update operation. Continues an MD5 message-digest - * operation, processing another message block, and updating the - * context. - */ - -void -MD5Update(MD5_CTX *context, const void *in, unsigned int inputLen) -{ -	unsigned int i, index, partLen; -	const unsigned char *input = in; - -	/* Compute number of bytes mod 64 */ -	index = (unsigned int)((context->count[0] >> 3) & 0x3F); - -	/* Update number of bits */ -	if ((context->count[0] += ((uint32_t)inputLen << 3)) -	    < ((uint32_t)inputLen << 3)) -		context->count[1]++; -	context->count[1] += ((uint32_t)inputLen >> 29); - -	partLen = 64 - index; - -	/* Transform as many times as possible. */ -	if (inputLen >= partLen) { -		memcpy((void *)&context->buffer[index], (const void *)input, -		    partLen); -		MD5Transform (context->state, context->buffer); - -		for (i = partLen; i + 63 < inputLen; i += 64) -			MD5Transform (context->state, &input[i]); - -		index = 0; -	} -	else -		i = 0; - -	/* Buffer remaining input */ -	memcpy ((void *)&context->buffer[index], (const void *)&input[i], -	    inputLen-i); -} - -/* - * MD5 padding. Adds padding followed by original length. - */ - -static void -MD5Pad(MD5_CTX *context) -{ -	unsigned char bits[8]; -	unsigned int index, padLen; - -	/* Save number of bits */ -	Encode (bits, context->count, 8); - -	/* Pad out to 56 mod 64. */ -	index = (unsigned int)((context->count[0] >> 3) & 0x3f); -	padLen = (index < 56) ? (56 - index) : (120 - index); -	MD5Update (context, PADDING, padLen); - -	/* Append length (before padding) */ -	MD5Update (context, bits, 8); -} - -/* - * MD5 finalization. Ends an MD5 message-digest operation, writing the - * the message digest and zeroizing the context. - */ - -void -MD5Final(unsigned char digest[static MD5_DIGEST_LENGTH], MD5_CTX *context) -{ -	/* Do padding. */ -	MD5Pad (context); - -	/* Store state in digest */ -	Encode (digest, context->state, MD5_DIGEST_LENGTH); - -	/* Zeroize sensitive information. */ -	explicit_bzero (context, sizeof (*context)); -} - -/* MD5 basic transformation. Transforms state based on block. */ - -static void -MD5Transform(uint32_t state[4], const unsigned char block[64]) -{ -	uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - -	Decode (x, block, 64); - -	/* Round 1 */ -#define S11 7 -#define S12 12 -#define S13 17 -#define S14 22 -	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ -	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ -	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ -	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ -	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ -	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ -	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ -	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ -	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ -	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ -	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ -	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ -	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ -	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ -	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ -	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ - -	/* Round 2 */ -#define S21 5 -#define S22 9 -#define S23 14 -#define S24 20 -	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ -	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ -	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ -	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ -	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ -	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */ -	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ -	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ -	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ -	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ -	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ -	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ -	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ -	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ -	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ -	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ - -	/* Round 3 */ -#define S31 4 -#define S32 11 -#define S33 16 -#define S34 23 -	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ -	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ -	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ -	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ -	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ -	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ -	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ -	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ -	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ -	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ -	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ -	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */ -	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ -	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ -	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ -	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ - -	/* Round 4 */ -#define S41 6 -#define S42 10 -#define S43 15 -#define S44 21 -	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ -	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ -	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ -	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ -	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ -	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ -	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ -	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ -	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ -	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ -	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ -	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ -	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ -	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ -	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ -	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ - -	state[0] += a; -	state[1] += b; -	state[2] += c; -	state[3] += d; - -	/* Zeroize sensitive information. */ -	memset ((void *)x, 0, sizeof (x)); -} - -#ifdef WEAK_REFS -/* When building libmd, provide weak references. Note: this is not -   activated in the context of compiling these sources for internal -   use in libcrypt. - */ -#undef MD5Init -__weak_reference(_libmd_MD5Init, MD5Init); -#undef MD5Update -__weak_reference(_libmd_MD5Update, MD5Update); -#undef MD5Final -__weak_reference(_libmd_MD5Final, MD5Final); -#endif diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index bf5bda7e058d..b84f69cfd03e 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -4633,7 +4633,7 @@ bus_release_resources(device_t dev, const struct resource_spec *rs,   * parent of @p dev.   */  struct resource * -bus_alloc_resource(device_t dev, int type, int *rid, rman_res_t start, +(bus_alloc_resource)(device_t dev, int type, int *rid, rman_res_t start,      rman_res_t end, rman_res_t count, u_int flags)  {  	struct resource *res; diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 07a9cc0f57be..c4d0223d484f 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -415,7 +415,7 @@ sysctl_devstat(SYSCTL_HANDLER_ARGS)  	if (error != 0)  		return (error); -	for (;nds != NULL;) { +	while (nds != NULL) {  		error = SYSCTL_OUT(req, nds, sizeof(struct devstat));  		if (error != 0)  			return (error); diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 56264a96c9fa..909dd10a6e69 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -330,7 +330,7 @@ kdb_reboot(void)  #define	KEY_CRTLP	16	/* ^P */  #define	KEY_CRTLR	18	/* ^R */ -/* States of th KDB "alternate break sequence" detecting state machine. */ +/* States of the KDB "alternate break sequence" detecting state machine. */  enum {  	KDB_ALT_BREAK_SEEN_NONE,  	KDB_ALT_BREAK_SEEN_CR, diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index 5380902e602f..aac35a56130e 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -79,6 +79,7 @@ static const struct filterops log_read_filterops = {  	.f_attach =	NULL,  	.f_detach =	logkqdetach,  	.f_event =	logkqread, +	.f_copy = knote_triv_copy,  };  static struct logsoftc { diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index 5c14e15830f4..c9a387a5e87b 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -140,7 +140,7 @@ uma_zone_t pcpu_zone_32;  uma_zone_t pcpu_zone_64;  static void -pcpu_zones_startup(void) +pcpu_zones_startup(void *dummy __unused)  {  	pcpu_zone_4 = uma_zcreate("pcpu-4", 4, diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index db0ceb17b9f0..e2070ae3f865 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -766,7 +766,7 @@ reswitch:	switch (ch = (u_char)*fmt++) {  				PCHAR(hex2ascii(*up & 0x0f));  				up++;  				if (width) -					for (q=p;*q;q++) +					for (q = p; *q; q++)  						PCHAR(*q);  			}  			break; diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 1f9577fddf9c..9f5106316018 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -242,7 +242,7 @@ generic_stop_cpus(cpuset_t map, u_int type)  	KASSERT(  	    type == IPI_STOP || type == IPI_STOP_HARD  #if X86 -	    || type == IPI_SUSPEND +	    || type == IPI_SUSPEND || type == IPI_OFF  #endif  	    , ("%s: invalid stop type", __func__)); @@ -260,7 +260,7 @@ generic_stop_cpus(cpuset_t map, u_int type)  	 * will be lost, violating FreeBSD's assumption of reliable  	 * IPI delivery.  	 */ -	if (type == IPI_SUSPEND) +	if (type == IPI_SUSPEND || type == IPI_OFF)  		mtx_lock_spin(&smp_ipi_mtx);  #endif @@ -280,7 +280,7 @@ generic_stop_cpus(cpuset_t map, u_int type)  #endif  #if X86 -	if (type == IPI_SUSPEND) +	if (type == IPI_SUSPEND || type == IPI_OFF)  		cpus = &suspended_cpus;  	else  #endif @@ -298,7 +298,7 @@ generic_stop_cpus(cpuset_t map, u_int type)  	}  #if X86 -	if (type == IPI_SUSPEND) +	if (type == IPI_SUSPEND || type == IPI_OFF)  		mtx_unlock_spin(&smp_ipi_mtx);  #endif @@ -327,6 +327,13 @@ suspend_cpus(cpuset_t map)  	return (generic_stop_cpus(map, IPI_SUSPEND));  } + +int +offline_cpus(cpuset_t map) +{ + +	return (generic_stop_cpus(map, IPI_OFF)); +}  #endif  /* diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c index c2a0f67cae85..04ed107c933d 100644 --- a/sys/kern/sys_eventfd.c +++ b/sys/kern/sys_eventfd.c @@ -85,13 +85,16 @@ static int	filt_eventfdwrite(struct knote *kn, long hint);  static const struct filterops eventfd_rfiltops = {  	.f_isfd = 1,  	.f_detach = filt_eventfddetach, -	.f_event = filt_eventfdread +	.f_event = filt_eventfdread, +	.f_copy = knote_triv_copy,  }; +  static const struct filterops eventfd_wfiltops = {  	.f_isfd = 1,  	.f_detach = filt_eventfddetach, -	.f_event = filt_eventfdwrite +	.f_event = filt_eventfdwrite, +	.f_copy = knote_triv_copy,  };  struct eventfd { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 57ebe8dc85f0..6531cea31423 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -181,20 +181,23 @@ static int	filt_pipedump(struct proc *p, struct knote *kn,  static const struct filterops pipe_nfiltops = {  	.f_isfd = 1,  	.f_detach = filt_pipedetach_notsup, -	.f_event = filt_pipenotsup +	.f_event = filt_pipenotsup,  	/* no userdump */ +	.f_copy = knote_triv_copy,  };  static const struct filterops pipe_rfiltops = {  	.f_isfd = 1,  	.f_detach = filt_pipedetach,  	.f_event = filt_piperead,  	.f_userdump = filt_pipedump, +	.f_copy = knote_triv_copy,  };  static const struct filterops pipe_wfiltops = {  	.f_isfd = 1,  	.f_detach = filt_pipedetach,  	.f_event = filt_pipewrite,  	.f_userdump = filt_pipedump, +	.f_copy = knote_triv_copy,  };  /* diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index acaf1241cb2e..c5db21544b0f 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -486,6 +486,7 @@ static const struct filterops procdesc_kqops = {  	.f_isfd = 1,  	.f_detach = procdesc_kqops_detach,  	.f_event = procdesc_kqops_event, +	.f_copy = knote_triv_copy,  };  static int diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index c221106ae067..bc0725230cca 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -586,7 +586,7 @@ soaio_enqueue(struct task *task)  }  static void -soaio_init(void) +soaio_init(void *dummy __unused)  {  	soaio_lifetime = AIOD_LIFETIME_DEFAULT; diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 4cef89cd5219..06a4adc3d8cb 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -604,4 +604,5 @@ const char *syscallnames[] = {  	"setgroups",			/* 596 = setgroups */  	"jail_attach_jd",			/* 597 = jail_attach_jd */  	"jail_remove_jd",			/* 598 = jail_remove_jd */ +	"kexec_load",			/* 599 = kexec_load */  }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 967af1f5313c..ea6d2b5aa1ef 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3394,4 +3394,12 @@  		);  	} +599	AUE_NULL	STD { +		int kexec_load( +			uint64_t entry, +			u_long nseg, +			_In_reads_(nseg) _Contains_long_ptr_ struct kexec_segment *segments, +			u_long flags +		); +	}  ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index e28fef931ea8..5951cebbe74a 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3514,6 +3514,16 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)  		*n_args = 1;  		break;  	} +	/* kexec_load */ +	case 599: { +		struct kexec_load_args *p = params; +		uarg[a++] = p->entry; /* uint64_t */ +		uarg[a++] = p->nseg; /* u_long */ +		uarg[a++] = (intptr_t)p->segments; /* struct kexec_segment * */ +		uarg[a++] = p->flags; /* u_long */ +		*n_args = 4; +		break; +	}  	default:  		*n_args = 0;  		break; @@ -9401,6 +9411,25 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)  			break;  		};  		break; +	/* kexec_load */ +	case 599: +		switch (ndx) { +		case 0: +			p = "uint64_t"; +			break; +		case 1: +			p = "u_long"; +			break; +		case 2: +			p = "userland struct kexec_segment *"; +			break; +		case 3: +			p = "u_long"; +			break; +		default: +			break; +		}; +		break;  	default:  		break;  	}; @@ -11409,6 +11438,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)  		if (ndx == 0 || ndx == 1)  			p = "int";  		break; +	/* kexec_load */ +	case 599: +		if (ndx == 0 || ndx == 1) +			p = "int"; +		break;  	default:  		break;  	}; diff --git a/sys/kern/tty.c b/sys/kern/tty.c index c8e2c561b7cf..067471eb949a 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -754,12 +754,14 @@ static const struct filterops tty_kqops_read = {  	.f_isfd = 1,  	.f_detach = tty_kqops_read_detach,  	.f_event = tty_kqops_read_event, +	.f_copy = knote_triv_copy,  };  static const struct filterops tty_kqops_write = {  	.f_isfd = 1,  	.f_detach = tty_kqops_write_detach,  	.f_event = tty_kqops_write_event, +	.f_copy = knote_triv_copy,  };  static int diff --git a/sys/kern/tty_pts.c b/sys/kern/tty_pts.c index 1291770a9ccb..2672935c2d89 100644 --- a/sys/kern/tty_pts.c +++ b/sys/kern/tty_pts.c @@ -491,11 +491,13 @@ static const struct filterops pts_kqops_read = {  	.f_isfd = 1,  	.f_detach = pts_kqops_read_detach,  	.f_event = pts_kqops_read_event, +	.f_copy = knote_triv_copy,  };  static const struct filterops pts_kqops_write = {  	.f_isfd = 1,  	.f_detach = pts_kqops_write_detach,  	.f_event = pts_kqops_write_event, +	.f_copy = knote_triv_copy,  };  static int diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index a8aec397b352..4c1bb1ff228e 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -281,11 +281,13 @@ static const struct filterops mq_rfiltops = {  	.f_isfd = 1,  	.f_detach = filt_mqdetach,  	.f_event = filt_mqread, +	.f_copy = knote_triv_copy,  };  static const struct filterops mq_wfiltops = {  	.f_isfd = 1,  	.f_detach = filt_mqdetach,  	.f_event = filt_mqwrite, +	.f_copy = knote_triv_copy,  };  /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index fe2d8d056062..eb9544628137 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -191,16 +191,19 @@ static const struct filterops soread_filtops = {  	.f_isfd = 1,  	.f_detach = filt_sordetach,  	.f_event = filt_soread, +	.f_copy = knote_triv_copy,  };  static const struct filterops sowrite_filtops = {  	.f_isfd = 1,  	.f_detach = filt_sowdetach,  	.f_event = filt_sowrite, +	.f_copy = knote_triv_copy,  };  static const struct filterops soempty_filtops = {  	.f_isfd = 1,  	.f_detach = filt_sowdetach,  	.f_event = filt_soempty, +	.f_copy = knote_triv_copy,  };  so_gen_t	so_gencnt;	/* generation count for sockets */ diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 340d84666459..807271488af2 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1069,6 +1069,21 @@ uipc_stream_sbspace(struct sockbuf *sb)  	return (min(space, mbspace));  } +/* + * UNIX version of generic sbwait() for writes.  We wait on peer's receive + * buffer, using our timeout. + */ +static int +uipc_stream_sbwait(struct socket *so, sbintime_t timeo) +{ +	struct sockbuf *sb = &so->so_rcv; + +	SOCK_RECVBUF_LOCK_ASSERT(so); +	sb->sb_flags |= SB_WAIT; +	return (msleep_sbt(&sb->sb_acc, SOCK_RECVBUF_MTX(so), PSOCK | PCATCH, +	    "sbwait", timeo, 0, 0)); +} +  static int  uipc_sosend_stream_or_seqpacket(struct socket *so, struct sockaddr *addr,      struct uio *uio0, struct mbuf *m, struct mbuf *c, int flags, @@ -1203,7 +1218,8 @@ restart:  				error = EWOULDBLOCK;  				goto out4;  			} -			if ((error = sbwait(so2, SO_RCV)) != 0) { +			if ((error = uipc_stream_sbwait(so2, +			    so->so_snd.sb_timeo)) != 0) {  				SOCK_RECVBUF_UNLOCK(so2);  				goto out4;  			} else @@ -1543,15 +1559,19 @@ restart:  				mc_init_m(&cmc, control);  				SOCK_RECVBUF_LOCK(so); -				MPASS(!(sb->sb_state & SBS_CANTRCVMORE)); - -				if (__predict_false(cmc.mc_len + sb->sb_ccc + -				    sb->sb_ctl > sb->sb_hiwat)) { +				if (__predict_false( +				    (sb->sb_state & SBS_CANTRCVMORE) || +				    cmc.mc_len + sb->sb_ccc + sb->sb_ctl > +				    sb->sb_hiwat)) {  					/* -					 * Too bad, while unp_externalize() was -					 * failing, the other side had filled -					 * the buffer and we can't prepend data -					 * back. Losing data! +					 * While the lock was dropped and we +					 * were failing in unp_externalize(), +					 * the peer could has a) disconnected, +					 * b) filled the buffer so that we +					 * can't prepend data back. +					 * These are two edge conditions that +					 * we just can't handle, so lose the +					 * data and return the error.  					 */  					SOCK_RECVBUF_UNLOCK(so);  					SOCK_IO_RECV_UNLOCK(so); @@ -1835,11 +1855,13 @@ static const struct filterops uipc_write_filtops = {  	.f_isfd = 1,  	.f_detach = uipc_filt_sowdetach,  	.f_event = uipc_filt_sowrite, +	.f_copy = knote_triv_copy,  };  static const struct filterops uipc_empty_filtops = {  	.f_isfd = 1,  	.f_detach = uipc_filt_sowdetach,  	.f_event = uipc_filt_soempty, +	.f_copy = knote_triv_copy,  };  static int @@ -2397,7 +2419,7 @@ uipc_sendfile_wait(struct socket *so, off_t need, int *space)  		}  		if (!sockref)  			soref(so2); -		error = sbwait(so2, SO_RCV); +		error = uipc_stream_sbwait(so2, so->so_snd.sb_timeo);  		if (error == 0 &&  		    __predict_false(sb->sb_state & SBS_CANTRCVMORE))  			error = EPIPE; diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index e63fa4c01434..60916a9fbd32 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -345,12 +345,14 @@ static const struct filterops aio_filtops = {  	.f_attach = filt_aioattach,  	.f_detach = filt_aiodetach,  	.f_event = filt_aio, +	.f_copy = knote_triv_copy,  };  static const struct filterops lio_filtops = {  	.f_isfd = 0,  	.f_attach = filt_lioattach,  	.f_detach = filt_liodetach, -	.f_event = filt_lio +	.f_event = filt_lio, +	.f_copy = knote_triv_copy,  };  static eventhandler_tag exit_tag, exec_tag; diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 2e397b8e9e8f..b674313993c4 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -260,8 +260,10 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,  	 */  	while (lblkno < (origblkno + maxra)) {  		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); -		if (error) +		if (error) { +			error = 0;  			break; +		}  		if (blkno == -1)  			break; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 05d1120030f3..4eca09aef145 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -458,6 +458,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)  		case _PC_HAS_NAMEDATTR:  		case _PC_HAS_HIDDENSYSTEM:  		case _PC_CLONE_BLKSIZE: +		case _PC_CASE_INSENSITIVE:  			*ap->a_retval = 0;  			return (0);  		default: diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c index b265a5ff3a62..e60d8426ee42 100644 --- a/sys/kern/vfs_inotify.c +++ b/sys/kern/vfs_inotify.c @@ -111,6 +111,7 @@ static const struct filterops inotify_rfiltops = {  	.f_isfd = 1,  	.f_detach = filt_inotifydetach,  	.f_event = filt_inotifyevent, +	.f_copy = knote_triv_copy,  };  static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 73e110c05bc1..58975f7ac932 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -6545,6 +6545,7 @@ const struct filterops fs_filtops = {  	.f_attach = filt_fsattach,  	.f_detach = filt_fsdetach,  	.f_event = filt_fsevent, +	.f_copy = knote_triv_copy,  };  static int @@ -6624,24 +6625,28 @@ static int	filt_vfsvnode(struct knote *kn, long hint);  static void	filt_vfsdetach(struct knote *kn);  static int	filt_vfsdump(struct proc *p, struct knote *kn,  		    struct kinfo_knote *kin); +static int	filt_vfscopy(struct knote *kn, struct proc *p1);  static const struct filterops vfsread_filtops = {  	.f_isfd = 1,  	.f_detach = filt_vfsdetach,  	.f_event = filt_vfsread,  	.f_userdump = filt_vfsdump, +	.f_copy = filt_vfscopy,  };  static const struct filterops vfswrite_filtops = {  	.f_isfd = 1,  	.f_detach = filt_vfsdetach,  	.f_event = filt_vfswrite,  	.f_userdump = filt_vfsdump, +	.f_copy = filt_vfscopy,  };  static const struct filterops vfsvnode_filtops = {  	.f_isfd = 1,  	.f_detach = filt_vfsdetach,  	.f_event = filt_vfsvnode,  	.f_userdump = filt_vfsdump, +	.f_copy = filt_vfscopy,  };  static void @@ -6825,6 +6830,16 @@ filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin)  	return (0);  } +static int +filt_vfscopy(struct knote *kn, struct proc *p1) +{ +	struct vnode *vp; + +	vp = (struct vnode *)kn->kn_hook; +	vhold(vp); +	return (0); +} +  int  vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)  { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 9e1275359715..1a739d354f1f 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1119,7 +1119,7 @@ flags_to_rights(int flags, cap_rights_t *rightsp)  	if (flags & O_TRUNC)  		cap_rights_set_one(rightsp, CAP_FTRUNCATE); -	if (flags & (O_SYNC | O_FSYNC)) +	if (flags & (O_SYNC | O_FSYNC | O_DSYNC))  		cap_rights_set_one(rightsp, CAP_FSYNC);  	if (flags & (O_EXLOCK | O_SHLOCK)) | 
