/*- * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91 * $Id: kern_clock.c,v 1.11 1993/12/19 00:51:20 wollman Exp $ */ #include "param.h" #include "systm.h" #include "dkstat.h" #include "callout.h" #include "kernel.h" #include "proc.h" #include "signalvar.h" #include "resourcevar.h" #include "machine/cpu.h" #include "resource.h" #include "vm/vm.h" #ifdef GPROF #include "gprof.h" #endif static void gatherstats(clockframe *); /* From callout.h */ struct callout *callfree, *callout, calltodo; int ncallout; /* * Clock handling routines. * * This code is written to operate with two timers which run * independently of each other. The main clock, running at hz * times per second, is used to do scheduling and timeout calculations. * The second timer does resource utilization estimation statistically * based on the state of the machine phz times a second. Both functions * can be performed by a single clock (ie hz == phz), however the * statistics will be much more prone to errors. Ideally a machine * would have separate clocks measuring time spent in user state, system * state, interrupt state, and idle state. These clocks would allow a non- * approximate measure of resource utilization. */ /* * TODO: * time of day, system/user timing, timeouts, profiling on separate timers * allocate more timeout table slots when table overflows. */ /* * Bump a timeval by a small number of usec's. */ #define BUMPTIME(t, usec) { \ register struct timeval *tp = (t); \ \ tp->tv_usec += (usec); \ if (tp->tv_usec >= 1000000) { \ tp->tv_usec -= 1000000; \ tp->tv_sec++; \ } \ } /* * The hz hardware interval timer. * We update the events relating to real time. * If this timer is also being used to gather statistics, * we run through the statistics gathering routine as well. */ void hardclock(frame) clockframe frame; { register struct callout *p1; register struct proc *p = curproc; register struct pstats *pstats = 0; register struct rusage *ru; register struct vmspace *vm; register int s; int needsoft = 0; extern int tickdelta; extern long timedelta; /* * Update real-time timeout queue. * At front of queue are some number of events which are ``due''. * The time to these is <= 0 and if negative represents the * number of ticks which have passed since it was supposed to happen. * The rest of the q elements (times > 0) are events yet to happen, * where the time for each is given as a delta from the previous. * Decrementing just the first of these serves to decrement the time * to all events. */ p1 = calltodo.c_next; while (p1) { if (--p1->c_time > 0) break; needsoft = 1; if (p1->c_time == 0) break; p1 = p1->c_next; } /* * Curproc (now in p) is null if no process is running. * We assume that curproc is set in user mode! */ if (p) pstats = p->p_stats; /* * Charge the time out based on the mode the cpu is in. * Here again we fudge for the lack of proper interval timers * assuming that the current state has been around at least * one tick. */ if (CLKF_USERMODE(&frame)) { if (pstats->p_prof.pr_scale) needsoft = 1; /* * CPU was in user state. Increment * user time counter, and process process-virtual time * interval timer. */ BUMPTIME(&p->p_utime, tick); if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) psignal(p, SIGVTALRM); } else { /* * CPU was in system state. */ if (p) BUMPTIME(&p->p_stime, tick); } /* bump the resource usage of integral space use */ if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { ru->ru_ixrss += vm->vm_tsize * NBPG / 1024; ru->ru_idrss += vm->vm_dsize * NBPG / 1024; ru->ru_isrss += vm->vm_ssize * NBPG / 1024; if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) > ru->ru_maxrss) { ru->ru_maxrss = vm->vm_pmap.pm_stats.resident_count * NBPG / 1024; } } /* * If the cpu is currently scheduled to a process, then * charge it with resource utilization for a tick, updating * statistics which run in (user+system) virtual time, * such as the cpu time limit and profiling timers. * This assumes that the current process has been running * the entire last tick. */ if (p) { if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > p->p_rlimit[RLIMIT_CPU].rlim_cur) { psignal(p, SIGXCPU); if (p->p_rlimit[RLIMIT_CPU].rlim_cur < p->p_rlimit[RLIMIT_CPU].rlim_max) p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; } if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) psignal(p, SIGPROF); /* * We adjust the priority of the current process. * The priority of a process gets worse as it accumulates * CPU time. The cpu usage estimator (p_cpu) is increased here * and the formula for computing priorities (in kern_synch.c) * will compute a different value each time the p_cpu increases * by 4. The cpu usage estimator ramps up quite quickly when * the process is running (linearly), and decays away * exponentially, * at a rate which is proportionally slower * when the system is busy. The basic principal is that the * system will 90% forget that a process used a lot of CPU * time in 5*loadav seconds. This causes the system to favor * processes which haven't run much recently, and to * round-robin among other processes. */ p->p_cpticks++; if (++p->p_cpu == 0) p->p_cpu--; if ((p->p_cpu&3) == 0) { setpri(p); if (p->p_pri >= PUSER) p->p_pri = p->p_usrpri; } } /* * If the alternate clock has not made itself known then * we must gather the statistics. */ if (phz == 0) gatherstats(&frame); /* * Increment the time-of-day, and schedule * processing of the callouts at a very low cpu priority, * so we don't keep the relatively high clock interrupt * priority any longer than necessary. */ if (timedelta == 0) BUMPTIME(&time, tick) else { register delta; if (timedelta < 0) { delta = tick - tickdelta; timedelta += tickdelta; } else { delta = tick + tickdelta; timedelta -= tickdelta; } BUMPTIME(&time, delta); } #ifdef DCFCLK /* * This is lousy, but until I can get the $&^%&^(!!! signal onto one * of the interrupt's I'll have to poll it. No, it will not work if * you attempt -DHZ=1000, things break. * But keep the NDCFCLK low, to avoid waste of cycles... * phk@data.fls.dk */ dcfclk_worker(); #endif if (needsoft) { #if 0 /* * XXX - hardclock runs at splhigh, so the splsoftclock is useless and * softclock runs at splhigh as well if we do this. It is not much of * an optimization, since the "software interrupt" is done with a call * from doreti, and the overhead of checking there is sometimes less * than checking here. Moreover, the whole %$$%$^ frame is passed by * value here. */ if (CLKF_BASEPRI(&frame)) { /* * Save the overhead of a software interrupt; * it will happen as soon as we return, so do it now. */ (void) splsoftclock(); softclock(frame); } else #endif setsoftclock(); } } int dk_ndrive = DK_NDRIVE; /* * Gather statistics on resource utilization. * * We make a gross assumption: that the system has been in the * state it is in (user state, kernel state, interrupt state, * or idle state) for the entire last time interval, and * update statistics accordingly. */ void gatherstats(framep) clockframe *framep; { register int cpstate, s; /* * Determine what state the cpu is in. */ if (CLKF_USERMODE(framep)) { /* * CPU was in user state. */ if (curproc->p_nice > NZERO) cpstate = CP_NICE; else cpstate = CP_USER; } else { /* * CPU was in system state. If profiling kernel * increment a counter. If no process is running * then this is a system tick if we were running * at a non-zero IPL (in a driver). If a process is running, * then we charge it with system time even if we were * at a non-zero IPL, since the system often runs * this way during processing of system calls. * This is approximate, but the lack of true interval * timers makes doing anything else difficult. */ cpstate = CP_SYS; if (curproc == NULL && CLKF_BASEPRI(framep)) cpstate = CP_IDLE; #ifdef GPROF s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc; if (profiling < 2 && s < s_textsize) kcount[s / (HISTFRACTION * sizeof (*kcount))]++; #endif } /* * We maintain statistics shown by user-level statistics * programs: the amount of time in each cpu state, and * the amount of time each of DK_NDRIVE ``drives'' is busy. */ cp_time[cpstate]++; for (s = 0; s < DK_NDRIVE; s++) if (dk_busy&(1<c_time > 0) { splx(s); break; } arg = p1->c_arg; func = p1->c_func; a = p1->c_time; calltodo.c_next = p1->c_next; p1->c_next = callfree; callfree = p1; splx(s); (*func)(arg, a); } /* * If no process to work with, we're finished. */ if (curproc == 0) return; /* * If trapped user-mode and profiling, give it * a profiling tick. */ if (CLKF_USERMODE(&frame)) { register struct proc *p = curproc; if (p->p_stats->p_prof.pr_scale) profile_tick(p, &frame); /* * Check to see if process has accumulated * more than 10 minutes of user time. If so * reduce priority to give others a chance. */ if (p->p_ucred->cr_uid && p->p_nice == NZERO && p->p_utime.tv_sec > 10 * 60) { p->p_nice = NZERO + 4; setpri(p); p->p_pri = p->p_usrpri; } } } /* * Arrange that (*func)(arg) is called in t/hz seconds. */ void timeout(func, arg, t) timeout_func_t func; caddr_t arg; register int t; { register struct callout *p1, *p2, *pnew; register int s = splhigh(); if (t <= 0) t = 1; pnew = callfree; if (pnew == NULL) panic("timeout table overflow"); callfree = pnew->c_next; pnew->c_arg = arg; pnew->c_func = func; for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) if (p2->c_time > 0) t -= p2->c_time; p1->c_next = pnew; pnew->c_next = p2; pnew->c_time = t; if (p2) p2->c_time -= t; splx(s); } /* * untimeout is called to remove a function timeout call * from the callout structure. */ void untimeout(func, arg) timeout_func_t func; caddr_t arg; { register struct callout *p1, *p2; register int s; s = splhigh(); for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { if (p2->c_func == func && p2->c_arg == arg) { if (p2->c_next && p2->c_time > 0) p2->c_next->c_time += p2->c_time; p1->c_next = p2->c_next; p2->c_next = callfree; callfree = p2; break; } } splx(s); } /* * Compute number of hz until specified time. * Used to compute third argument to timeout() from an * absolute time. */ /* XXX clock_t */ u_long hzto(tv) struct timeval *tv; { register unsigned long ticks; register long sec; register long usec; int s; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * Maximum value for any timeout in 10ms ticks is 248 days. */ s = splhigh(); sec = tv->tv_sec - time.tv_sec; usec = tv->tv_usec - time.tv_usec; splx(s); if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC printf("hzto: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) / tick + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + ((unsigned long)usec + (tick - 1)) / tick + 1; else ticks = LONG_MAX; #define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */ if (ticks > CLOCK_T_MAX) ticks = CLOCK_T_MAX; return (ticks); }