diff options
-rw-r--r-- | lib/libc/gmon/mcount.c | 156 | ||||
-rw-r--r-- | sys/amd64/amd64/prof_machdep.c | 153 | ||||
-rw-r--r-- | sys/amd64/include/asmacros.h | 92 | ||||
-rw-r--r-- | sys/amd64/include/profile.h | 54 | ||||
-rw-r--r-- | sys/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/include/asmacros.h | 92 | ||||
-rw-r--r-- | sys/i386/include/profile.h | 54 | ||||
-rw-r--r-- | sys/i386/isa/prof_machdep.c | 153 | ||||
-rw-r--r-- | sys/kern/subr_prof.c | 95 | ||||
-rw-r--r-- | sys/libkern/mcount.c | 182 | ||||
-rw-r--r-- | sys/sys/gmon.h | 64 | ||||
-rw-r--r-- | usr.bin/Makefile | 3 | ||||
-rw-r--r-- | usr.bin/gprof4/Makefile | 14 | ||||
-rw-r--r-- | usr.sbin/config/config.8 | 5 | ||||
-rw-r--r-- | usr.sbin/config/mkmakefile.c | 8 | ||||
-rw-r--r-- | usr.sbin/kgmon/kgmon.8 | 11 | ||||
-rw-r--r-- | usr.sbin/kgmon/kgmon.c | 37 |
18 files changed, 876 insertions, 303 deletions
diff --git a/lib/libc/gmon/mcount.c b/lib/libc/gmon/mcount.c index 63fbf886a151..59d8de40f20f 100644 --- a/lib/libc/gmon/mcount.c +++ b/lib/libc/gmon/mcount.c @@ -38,7 +38,14 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93"; #include <sys/param.h> #include <sys/gmon.h> #ifdef KERNEL -#include <i386/include/cpufunc.h> +#include <sys/systm.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> +void bintr __P((void)); +void btrap __P((void)); +void eintr __P((void)); +void user __P((void)); #endif /* @@ -57,39 +64,127 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93"; * perform this optimization. */ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */ - register u_long frompc, selfpc; + register fptrint_t frompc, selfpc; { +#ifdef GUPROF + u_int delta; +#endif + register fptrdiff_t frompci; register u_short *frompcindex; register struct tostruct *top, *prevtop; register struct gmonparam *p; register long toindex; #ifdef KERNEL - register int s; - u_long save_eflags; + register int s; /* XXX */ + u_long save_eflags; /* XXX */ #endif p = &_gmonparam; +#ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; +#endif #ifdef KERNEL MCOUNT_ENTER; #else p->state = GMON_PROF_BUSY; #endif + frompci = frompc - p->lowpc; + +#ifdef KERNEL + /* + * When we are called from an exception handler, frompci may be + * for a user address. Convert such frompci's to the index of + * user() to merge all user counts. + */ + if (frompci >= p->textsize) { + if (frompci + p->lowpc + >= (fptrint_t)(VM_MAXUSER_ADDRESS + UPAGES * NBPG)) + goto done; + frompci = (fptrint_t)user - p->lowpc; + if (frompci >= p->textsize) + goto done; + } +#endif /* KERNEL */ + +#ifdef GUPROF + if (p->state != GMON_PROF_HIRES) + goto skip_guprof_stuff; + /* + * Look at the clock and add the count of clock cycles since the + * clock was last looked at to a counter for frompc. This + * solidifies the count for the function containing frompc and + * effectively starts another clock for the current function. + * The count for the new clock will be solidified when another + * function call is made or the function returns. + * + * We use the usual sampling counters since they can be located + * efficiently. 4-byte counters are usually necessary. + * + * There are many complications for subtracting the profiling + * overheads from the counts for normal functions and adding + * them to the counts for mcount(), mexitcount() and cputime(). + * We attempt to handle fractional cycles, but the overheads + * are usually underestimated because they are calibrated for + * a simpler than usual setup. + */ + delta = cputime() - p->mcount_overhead; + p->cputime_overhead_resid += p->cputime_overhead_frac; + p->mcount_overhead_resid += p->mcount_overhead_frac; + if ((int)delta < 0) + *p->mcount_count += delta + p->mcount_overhead + - p->cputime_overhead; + else if (delta != 0) { + if (p->cputime_overhead_resid >= CALIB_SCALE) { + p->cputime_overhead_resid -= CALIB_SCALE; + ++*p->cputime_count; + --delta; + } + if (delta != 0) { + if (p->mcount_overhead_resid >= CALIB_SCALE) { + p->mcount_overhead_resid -= CALIB_SCALE; + ++*p->mcount_count; + --delta; + } + KCOUNT(p, frompci) += delta; + } + *p->mcount_count += p->mcount_overhead_sub; + } + *p->cputime_count += p->cputime_overhead; +skip_guprof_stuff: +#endif /* GUPROF */ + +#ifdef KERNEL + /* + * When we are called from an exception handler, frompc is faked + * to be for where the exception occurred. We've just solidified + * the count for there. Now convert frompci to the index of btrap() + * for trap handlers and bintr() for interrupt handlers to make + * exceptions appear in the call graph as calls from btrap() and + * bintr() instead of calls from all over. + */ + if ((fptrint_t)selfpc >= (fptrint_t)btrap + && (fptrint_t)selfpc < (fptrint_t)eintr) { + if ((fptrint_t)selfpc >= (fptrint_t)bintr) + frompci = (fptrint_t)bintr - p->lowpc; + else + frompci = (fptrint_t)btrap - p->lowpc; + } +#endif /* KERNEL */ + /* - * check that frompcindex is a reasonable pc value. + * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ - frompc -= p->lowpc; - if (frompc > p->textsize) + if (frompci >= p->textsize) goto done; - frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))]; + frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* @@ -180,3 +275,48 @@ overflow: * which is included by <sys/gmon.h>. */ MCOUNT + +#ifdef GUPROF +void +mexitcount(selfpc) + fptrint_t selfpc; +{ + struct gmonparam *p; + fptrint_t selfpcdiff; + + p = &_gmonparam; + selfpcdiff = selfpc - (fptrint_t)p->lowpc; + if (selfpcdiff < p->textsize) { + u_int delta; + + /* + * Solidify the count for the current function. + */ + delta = cputime() - p->mexitcount_overhead; + p->cputime_overhead_resid += p->cputime_overhead_frac; + p->mexitcount_overhead_resid += p->mexitcount_overhead_frac; + if ((int)delta < 0) + *p->mexitcount_count += delta + p->mexitcount_overhead + - p->cputime_overhead; + else if (delta != 0) { + if (p->cputime_overhead_resid >= CALIB_SCALE) { + p->cputime_overhead_resid -= CALIB_SCALE; + ++*p->cputime_count; + --delta; + } + if (delta != 0) { + if (p->mexitcount_overhead_resid + >= CALIB_SCALE) { + p->mexitcount_overhead_resid + -= CALIB_SCALE; + ++*p->mexitcount_count; + --delta; + } + KCOUNT(p, selfpcdiff) += delta; + } + *p->mexitcount_count += p->mexitcount_overhead_sub; + } + *p->cputime_count += p->cputime_overhead; + } +} +#endif /* GUPROF */ diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c new file mode 100644 index 000000000000..2aa6787d69bc --- /dev/null +++ b/sys/amd64/amd64/prof_machdep.c @@ -0,0 +1,153 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <machine/clock.h> +#include <i386/isa/isa.h> +#include <i386/isa/timerreg.h> + +#ifdef GUPROF +extern u_int cputime __P((void)); +#endif + +#ifdef __GNUC__ +asm(" +GM_STATE = 0 +GMON_PROF_OFF = 3 + + .text + .align 4,0x90 + .globl __mcount +__mcount: + # + # Check that we are profiling. Do it early for speed. + # + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # __mcount is the same as mcount except the caller hasn't changed + # the stack except to call here, so the caller's raddr is above + # our raddr. + # + movl 4(%esp),%edx + jmp Lgot_frompc + + .align 4,0x90 + .globl mcount +mcount: + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # The caller's stack frame has already been built, so %ebp is + # the caller's frame pointer. The caller's raddr is in the + # caller's frame following the caller's caller's frame pointer. + # + movl 4(%ebp),%edx +Lgot_frompc: + # + # Our raddr is the caller's pc. + # + movl (%esp),%eax + + pushf + pushl %eax + pushl %edx + cli + call _mcount + addl $8,%esp + popf +Lmcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +asm(" + .text +# +# Dummy label to be seen when gprof -u hides mexitcount. +# + .align 4,0x90 + .globl __mexitcount +__mexitcount: + nop + +GMON_PROF_HIRES = 4 + + .align 4,0x90 + .globl mexitcount +mexitcount: + cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE + jne Lmexitcount_exit + pushl %edx + pushl %eax + movl 8(%esp),%eax + pushf + pushl %eax + cli + call _mexitcount + addl $4,%esp + popf + popl %eax + popl %edx +Lmexitcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +u_int +cputime() +{ + u_int count; + u_int delta; + u_char low; + static u_int prev_count; + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + count = low | (inb(TIMER_CNTR0) << 8); + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + timer0_max_count); + return (delta); +} +#else /* not GUPROF */ +#ifdef __GNUC__ +asm(" + .text + .align 4,0x90 + .globl mexitcount +mexitcount: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index b2a6dc839f2e..8776ccfe763d 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $ + * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $ */ #ifndef _MACHINE_ASMACROS_H_ @@ -38,47 +38,83 @@ #ifdef KERNEL +/* XXX too much duplication in various asm*.h's and gprof.h's */ + #define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */ #define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */ -#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: -#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) - -/* These three are place holders for future changes to the profiling code */ -#define MCOUNT_LABEL(name) -#define MEXITCOUNT -#define FAKE_MCOUNT(caller) +#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #ifdef GPROF /* - * ALTENTRY() must be before a corresponding ENTRY() so that it can jump - * over the mcounting. - */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f -#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: -/* - * The call to mcount supports the usual (bad) conventions. We allocate - * some data and pass a pointer to it although the FreeBSD doesn't use - * the data. We set up a frame before calling mcount because that is - * the standard convention although it makes work for both mcount and - * callers. + * __mcount is like mcount except that doesn't require its caller to set + * up a frame pointer. It must be called before pushing anything onto the + * stack. gcc should eventually generate code to call __mcount in most + * cases. This would make -pg in combination with -fomit-frame-pointer + * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to + * allow profiling before setting up the frame pointer, but this is + * inadequate for good handling of special cases, e.g., -fpic works best + * with profiling after the prologue. + * + * Neither __mcount nor mcount requires %eax to point to 4 bytes of data, + * so don't waste space allocating the data or time setting it up. Changes + * to avoid the wastage in gcc-2.4.5-compiled code are available. + * + * mexitcount is a new profiling feature to allow accurate timing of all + * functions if an accurate clock is available. Changes to gcc-2.4.5 to + * support it are are available. The changes currently don't allow not + * generating mexitcounts for non-kernel code. It is best to call + * mexitcount right at the end of a function like the MEXITCOUNT macro + * does, but the changes to gcc only implement calling it as the first + * thing in the epilogue to avoid problems with -fpic. + * + * mcount and __mexitcount may clobber the call-used registers and %ef. + * mexitcount may clobber %ecx and %ef. + * + * Cross-jumping makes accurate timing more difficult. It is handled in + * many cases by calling mexitcount before jumping. It is not handled + * for some conditional jumps (e.g., in bcopyx) or for some fault-handling + * jumps. It is handled for some fault-handling jumps by not sharing the + * exit routine. + * + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to + * the main entry point. Note that alt entries are counted twice. They + * have to be counted as ordinary entries for gprof to get the call times + * right for the ordinary entries. + * + * High local labels are used in macros to avoid clashes with local labels + * in functions. + * + * "ret" is used instead of "RET" because there are a lot of "ret"s. + * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this + * file is preprocessed in traditional mode). "ret" clobbers eflags + * but this doesn't matter. */ -#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ - pushl %ebp; movl %esp,%ebp; \ - movl $1b,%eax; call mcount; popl %ebp -#else +#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f +#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT +#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define MCOUNT call __mcount +#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT +#define MEXITCOUNT call mexitcount +#define ret MEXITCOUNT ; .byte 0xc3 +#else /* not GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. - * If there is a previous ALTENTRY() then the alignment code is empty. + * If there is a previous ALTENTRY() then the alignment code for ENTRY() + * is empty. */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name) -#define ENTRY(name) GEN_ENTRY(_/**/name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define ENTRY(name) GEN_ENTRY(name) +#define FAKE_MCOUNT(caller) #define MCOUNT +#define MCOUNT_LABEL(name) +#define MEXITCOUNT +#endif /* GPROF */ -#endif - +/* XXX NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 9fe27ec5eda8..c55d629e0551 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -31,35 +31,59 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $ + * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $ */ -#ifndef _I386_MACHINE_PROFILE_H_ -#define _I386_MACHINE_PROFILE_H_ +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ +#if 0 #define _MCOUNT_DECL static inline void _mcount #define MCOUNT \ extern void mcount() asm("mcount"); void mcount() { \ - int selfpc, frompcindex; \ + fptrint_t selfpc, frompc; \ /* \ - * find the return address for mcount, \ + * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ - * selfpc = pc pushed by mcount call \ + * selfpc = pc pushed by call to mcount \ */ \ asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ - * frompcindex = pc pushed by jsr into self. \ - * In GCC the caller's stack frame has already been built so we \ - * have to chase a6 to find caller's raddr. \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %ebp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer. \ */ \ - asm("movl (%%ebp),%0" : "=r" (frompcindex)); \ - frompcindex = ((int *)frompcindex)[1]; \ - _mcount(frompcindex, selfpc); \ + asm("movl (%%ebp),%0" : "=r" (frompc)); \ + frompc = ((fptrint_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ } +#else +#define _MCOUNT_DECL void mcount +#define MCOUNT +#endif -#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr() -#define MCOUNT_EXIT write_eflags(save_eflags) +#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); } +#define MCOUNT_EXIT (write_eflags(save_eflags)) -#endif +#define CALIB_SCALE 1000 +#define KCOUNT(p,index) ((p)->kcount[(index) \ + / (HISTFRACTION * sizeof(*(p)->kcount))]) +#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc) + +/* An unsigned integral type that can hold function pointers. */ +typedef u_int fptrint_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef int fptrdiff_t; + +u_int cputime __P((void)); +void mcount __P((fptrint_t frompc, fptrint_t selfpc)); +void mexitcount __P((fptrint_t selfpc)); + +#endif /* !MACHINE_PROFILE_H */ diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index fda656d34011..1633ed87b9f8 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $ +# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver i386/isa/pcvt/pcvt_out.c optional vt device-driver i386/isa/pcvt/pcvt_sup.c optional vt device-driver i386/isa/pcvt/pcvt_vtf.c optional vt device-driver +i386/isa/prof_machdep.c optional profiling-routine i386/isa/psm.c optional psm device-driver i386/isa/random_machdep.c standard i386/isa/rc.c optional rc device-driver diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386 index fda656d34011..1633ed87b9f8 100644 --- a/sys/i386/conf/files.i386 +++ b/sys/i386/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $ +# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver i386/isa/pcvt/pcvt_out.c optional vt device-driver i386/isa/pcvt/pcvt_sup.c optional vt device-driver i386/isa/pcvt/pcvt_vtf.c optional vt device-driver +i386/isa/prof_machdep.c optional profiling-routine i386/isa/psm.c optional psm device-driver i386/isa/random_machdep.c standard i386/isa/rc.c optional rc device-driver diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h index b2a6dc839f2e..8776ccfe763d 100644 --- a/sys/i386/include/asmacros.h +++ b/sys/i386/include/asmacros.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $ + * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $ */ #ifndef _MACHINE_ASMACROS_H_ @@ -38,47 +38,83 @@ #ifdef KERNEL +/* XXX too much duplication in various asm*.h's and gprof.h's */ + #define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */ #define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */ -#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: -#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) - -/* These three are place holders for future changes to the profiling code */ -#define MCOUNT_LABEL(name) -#define MEXITCOUNT -#define FAKE_MCOUNT(caller) +#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #ifdef GPROF /* - * ALTENTRY() must be before a corresponding ENTRY() so that it can jump - * over the mcounting. - */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f -#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: -/* - * The call to mcount supports the usual (bad) conventions. We allocate - * some data and pass a pointer to it although the FreeBSD doesn't use - * the data. We set up a frame before calling mcount because that is - * the standard convention although it makes work for both mcount and - * callers. + * __mcount is like mcount except that doesn't require its caller to set + * up a frame pointer. It must be called before pushing anything onto the + * stack. gcc should eventually generate code to call __mcount in most + * cases. This would make -pg in combination with -fomit-frame-pointer + * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to + * allow profiling before setting up the frame pointer, but this is + * inadequate for good handling of special cases, e.g., -fpic works best + * with profiling after the prologue. + * + * Neither __mcount nor mcount requires %eax to point to 4 bytes of data, + * so don't waste space allocating the data or time setting it up. Changes + * to avoid the wastage in gcc-2.4.5-compiled code are available. + * + * mexitcount is a new profiling feature to allow accurate timing of all + * functions if an accurate clock is available. Changes to gcc-2.4.5 to + * support it are are available. The changes currently don't allow not + * generating mexitcounts for non-kernel code. It is best to call + * mexitcount right at the end of a function like the MEXITCOUNT macro + * does, but the changes to gcc only implement calling it as the first + * thing in the epilogue to avoid problems with -fpic. + * + * mcount and __mexitcount may clobber the call-used registers and %ef. + * mexitcount may clobber %ecx and %ef. + * + * Cross-jumping makes accurate timing more difficult. It is handled in + * many cases by calling mexitcount before jumping. It is not handled + * for some conditional jumps (e.g., in bcopyx) or for some fault-handling + * jumps. It is handled for some fault-handling jumps by not sharing the + * exit routine. + * + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to + * the main entry point. Note that alt entries are counted twice. They + * have to be counted as ordinary entries for gprof to get the call times + * right for the ordinary entries. + * + * High local labels are used in macros to avoid clashes with local labels + * in functions. + * + * "ret" is used instead of "RET" because there are a lot of "ret"s. + * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this + * file is preprocessed in traditional mode). "ret" clobbers eflags + * but this doesn't matter. */ -#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ - pushl %ebp; movl %esp,%ebp; \ - movl $1b,%eax; call mcount; popl %ebp -#else +#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f +#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT +#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define MCOUNT call __mcount +#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT +#define MEXITCOUNT call mexitcount +#define ret MEXITCOUNT ; .byte 0xc3 +#else /* not GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. - * If there is a previous ALTENTRY() then the alignment code is empty. + * If there is a previous ALTENTRY() then the alignment code for ENTRY() + * is empty. */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name) -#define ENTRY(name) GEN_ENTRY(_/**/name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define ENTRY(name) GEN_ENTRY(name) +#define FAKE_MCOUNT(caller) #define MCOUNT +#define MCOUNT_LABEL(name) +#define MEXITCOUNT +#endif /* GPROF */ -#endif - +/* XXX NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index 9fe27ec5eda8..c55d629e0551 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -31,35 +31,59 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $ + * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $ */ -#ifndef _I386_MACHINE_PROFILE_H_ -#define _I386_MACHINE_PROFILE_H_ +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ +#if 0 #define _MCOUNT_DECL static inline void _mcount #define MCOUNT \ extern void mcount() asm("mcount"); void mcount() { \ - int selfpc, frompcindex; \ + fptrint_t selfpc, frompc; \ /* \ - * find the return address for mcount, \ + * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ - * selfpc = pc pushed by mcount call \ + * selfpc = pc pushed by call to mcount \ */ \ asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ - * frompcindex = pc pushed by jsr into self. \ - * In GCC the caller's stack frame has already been built so we \ - * have to chase a6 to find caller's raddr. \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %ebp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer. \ */ \ - asm("movl (%%ebp),%0" : "=r" (frompcindex)); \ - frompcindex = ((int *)frompcindex)[1]; \ - _mcount(frompcindex, selfpc); \ + asm("movl (%%ebp),%0" : "=r" (frompc)); \ + frompc = ((fptrint_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ } +#else +#define _MCOUNT_DECL void mcount +#define MCOUNT +#endif -#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr() -#define MCOUNT_EXIT write_eflags(save_eflags) +#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); } +#define MCOUNT_EXIT (write_eflags(save_eflags)) -#endif +#define CALIB_SCALE 1000 +#define KCOUNT(p,index) ((p)->kcount[(index) \ + / (HISTFRACTION * sizeof(*(p)->kcount))]) +#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc) + +/* An unsigned integral type that can hold function pointers. */ +typedef u_int fptrint_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef int fptrdiff_t; + +u_int cputime __P((void)); +void mcount __P((fptrint_t frompc, fptrint_t selfpc)); +void mexitcount __P((fptrint_t selfpc)); + +#endif /* !MACHINE_PROFILE_H */ diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c new file mode 100644 index 000000000000..2aa6787d69bc --- /dev/null +++ b/sys/i386/isa/prof_machdep.c @@ -0,0 +1,153 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <machine/clock.h> +#include <i386/isa/isa.h> +#include <i386/isa/timerreg.h> + +#ifdef GUPROF +extern u_int cputime __P((void)); +#endif + +#ifdef __GNUC__ +asm(" +GM_STATE = 0 +GMON_PROF_OFF = 3 + + .text + .align 4,0x90 + .globl __mcount +__mcount: + # + # Check that we are profiling. Do it early for speed. + # + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # __mcount is the same as mcount except the caller hasn't changed + # the stack except to call here, so the caller's raddr is above + # our raddr. + # + movl 4(%esp),%edx + jmp Lgot_frompc + + .align 4,0x90 + .globl mcount +mcount: + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # The caller's stack frame has already been built, so %ebp is + # the caller's frame pointer. The caller's raddr is in the + # caller's frame following the caller's caller's frame pointer. + # + movl 4(%ebp),%edx +Lgot_frompc: + # + # Our raddr is the caller's pc. + # + movl (%esp),%eax + + pushf + pushl %eax + pushl %edx + cli + call _mcount + addl $8,%esp + popf +Lmcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +asm(" + .text +# +# Dummy label to be seen when gprof -u hides mexitcount. +# + .align 4,0x90 + .globl __mexitcount +__mexitcount: + nop + +GMON_PROF_HIRES = 4 + + .align 4,0x90 + .globl mexitcount +mexitcount: + cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE + jne Lmexitcount_exit + pushl %edx + pushl %eax + movl 8(%esp),%eax + pushf + pushl %eax + cli + call _mexitcount + addl $4,%esp + popf + popl %eax + popl %edx +Lmexitcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +u_int +cputime() +{ + u_int count; + u_int delta; + u_char low; + static u_int prev_count; + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + count = low | (inb(TIMER_CNTR0) << 8); + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + timer0_max_count); + return (delta); +} +#else /* not GUPROF */ +#ifdef __GNUC__ +asm(" + .text + .align 4,0x90 + .globl mexitcount +mexitcount: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index b9c1ae21e170..0727f9b7ce94 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 - * $Id: subr_prof.c,v 1.14 1995/12/14 08:31:44 phk Exp $ + * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $ */ #include <sys/param.h> @@ -62,6 +62,11 @@ kmstartup(dummy) { char *cp; struct gmonparam *p = &_gmonparam; +#ifdef GUPROF + fptrint_t kmstartup_addr; + int i; +#endif + /* * Round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. @@ -89,9 +94,74 @@ kmstartup(dummy) bzero(cp, p->kcountsize + p->tossize + p->fromssize); p->tos = (struct tostruct *)cp; cp += p->tossize; - p->kcount = (u_short *)cp; + p->kcount = (HISTCOUNTER *)cp; cp += p->kcountsize; p->froms = (u_short *)cp; + +#ifdef GUPROF + /* + * Initialize pointers to overhead counters. + */ + p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime)); + p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount)); + p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount)); + + /* + * Determine overheads. + */ + disable_intr(); + p->state = GMON_PROF_HIRES; + + p->cputime_overhead = 0; + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) + p->cputime_overhead += cputime(); + + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) +#if defined(i386) && __GNUC__ >= 2 + /* + * Underestimate slightly by always calling __mcount, never + * mcount. + */ + asm("pushl %0; call __mcount; popl %%ecx" + : + : "i" (kmstartup) + : "ax", "bx", "cx", "dx", "memory"); +#else +#error +#endif + p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup)); + + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) +#if defined(i386) && __GNUC__ >= 2 + asm("call mexitcount; 1:" + : : : "ax", "bx", "cx", "dx", "memory"); + asm("movl $1b,%0" : "=rm" (kmstartup_addr)); +#else +#error +#endif + p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr)); + + p->state = GMON_PROF_OFF; + enable_intr(); + + p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead; + p->mexitcount_overhead_sub = p->mexitcount_overhead + - p->cputime_overhead; + printf("Profiling overheads: %u+%u %u+%u\n", + p->cputime_overhead, p->mcount_overhead_sub, + p->cputime_overhead, p->mexitcount_overhead_sub); + p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE; + p->cputime_overhead /= CALIB_SCALE; + p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE; + p->mcount_overhead_sub /= CALIB_SCALE; + p->mcount_overhead /= CALIB_SCALE; + p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE; + p->mexitcount_overhead_sub /= CALIB_SCALE; + p->mexitcount_overhead /= CALIB_SCALE; +#endif /* GUPROF */ } /* @@ -104,6 +174,7 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS u_int namelen = arg2; struct gmonparam *gp = &_gmonparam; int error; + int state; /* all sysctl names at this level are terminal */ if (namelen != 1) @@ -111,13 +182,27 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS switch (name[0]) { case GPROF_STATE: - error = sysctl_handle_int(oidp, &gp->state, 0, req); + state = gp->state; + error = sysctl_handle_int(oidp, &state, 0, req); if (error) return (error); - if (gp->state == GMON_PROF_OFF) + if (!req->newptr) + return (0); + if (state == GMON_PROF_OFF) { stopprofclock(&proc0); - else + gp->state = state; + } else if (state == GMON_PROF_ON) { + gp->profrate = profhz; + gp->state = state; startprofclock(&proc0); +#ifdef GUPROF + } else if (state == GMON_PROF_HIRES) { + gp->profrate = 1193182; /* XXX */ + stopprofclock(&proc0); + gp->state = state; +#endif + } else if (state != gp->state) + return (EINVAL); return (0); case GPROF_COUNT: return (sysctl_handle_opaque(oidp, diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c index fc3625b1974d..ed0e68d5cea5 100644 --- a/sys/libkern/mcount.c +++ b/sys/libkern/mcount.c @@ -1,180 +1,4 @@ -/*- - * Copyright (c) 1983, 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: mcount.c,v 1.3 1994/09/15 15:41:48 paul Exp $ - */ +#define GPROF4 1 /* XXX can't get at kernel options */ +#define GUPROF 1 /* XXX can't get at kernel options */ -#include <sys/param.h> -#include <sys/gmon.h> -#ifdef KERNEL -#include <i386/include/cpufunc.h> -#endif - -/* - * mcount is called on entry to each function compiled with the profiling - * switch set. _mcount(), which is declared in a machine-dependent way - * with _MCOUNT_DECL, does the actual work and is either inlined into a - * C routine or called by an assembly stub. In any case, this magic is - * taken care of by the MCOUNT definition in <machine/profile.h>. - * - * _mcount updates data structures that represent traversals of the - * program's call graph edges. frompc and selfpc are the return - * address and function address that represents the given call graph edge. - * - * Note: the original BSD code used the same variable (frompcindex) for - * both frompcindex and frompc. Any reasonable, modern compiler will - * perform this optimization. - */ -_MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */ - register u_long frompc, selfpc; -{ - register u_short *frompcindex; - register struct tostruct *top, *prevtop; - register struct gmonparam *p; - register long toindex; -#ifdef KERNEL - register int s; - u_long save_eflags; -#endif - - p = &_gmonparam; - /* - * check that we are profiling - * and that we aren't recursively invoked. - */ - if (p->state != GMON_PROF_ON) - return; -#ifdef KERNEL - MCOUNT_ENTER; -#else - p->state = GMON_PROF_BUSY; -#endif - /* - * check that frompcindex is a reasonable pc value. - * for example: signal catchers get called from the stack, - * not from text space. too bad. - */ - frompc -= p->lowpc; - if (frompc > p->textsize) - goto done; - - frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))]; - toindex = *frompcindex; - if (toindex == 0) { - /* - * first time traversing this arc - */ - toindex = ++p->tos[0].link; - if (toindex >= p->tolimit) - /* halt further profiling */ - goto overflow; - - *frompcindex = toindex; - top = &p->tos[toindex]; - top->selfpc = selfpc; - top->count = 1; - top->link = 0; - goto done; - } - top = &p->tos[toindex]; - if (top->selfpc == selfpc) { - /* - * arc at front of chain; usual case. - */ - top->count++; - goto done; - } - /* - * have to go looking down chain for it. - * top points to what we are looking at, - * prevtop points to previous top. - * we know it is not at the head of the chain. - */ - for (; /* goto done */; ) { - if (top->link == 0) { - /* - * top is end of the chain and none of the chain - * had top->selfpc == selfpc. - * so we allocate a new tostruct - * and link it to the head of the chain. - */ - toindex = ++p->tos[0].link; - if (toindex >= p->tolimit) - goto overflow; - - top = &p->tos[toindex]; - top->selfpc = selfpc; - top->count = 1; - top->link = *frompcindex; - *frompcindex = toindex; - goto done; - } - /* - * otherwise, check the next arc on the chain. - */ - prevtop = top; - top = &p->tos[top->link]; - if (top->selfpc == selfpc) { - /* - * there it is. - * increment its count - * move it to the head of the chain. - */ - top->count++; - toindex = prevtop->link; - prevtop->link = top->link; - top->link = *frompcindex; - *frompcindex = toindex; - goto done; - } - - } -done: -#ifdef KERNEL - MCOUNT_EXIT; -#else - p->state = GMON_PROF_ON; -#endif - return; -overflow: - p->state = GMON_PROF_ERROR; -#ifdef KERNEL - MCOUNT_EXIT; -#endif - return; -} - -/* - * Actual definition of mcount function. Defined in <machine/profile.h>, - * which is included by <sys/gmon.h>. - */ -MCOUNT +#include "../lib/libc/gmon/mcount.c" /* XXX */ diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h index 9b3882de918a..619e94c45acb 100644 --- a/sys/sys/gmon.h +++ b/sys/sys/gmon.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)gmon.h 8.2 (Berkeley) 1/4/94 - * $Id: gmon.h,v 1.6 1995/05/30 08:14:22 rgrimes Exp $ + * $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $ */ #ifndef _SYS_GMON_H_ @@ -49,18 +49,33 @@ struct gmonhdr { int version; /* version number */ int profrate; /* profiling clock rate */ int spare[3]; /* reserved */ + /* XXX should record counter size and density */ }; #define GMONVERSION 0x00051879 /* - * histogram counters are unsigned shorts (according to the kernel). + * Type of histogram counters used in the kernel. */ +#ifdef GPROF4 +#define HISTCOUNTER unsigned +#else #define HISTCOUNTER unsigned short +#endif /* - * fraction of text space to allocate for histogram counters here, 1/2 + * Fraction of text space to allocate for histogram counters. + * We allocate counters at the same or higher density as function + * addresses, so that each counter belongs to a unique function. + * A lower density of counters would give less resolution but a + * higher density would be wasted. + * + * Assume that function addresses are at least 4-byte-aligned. + * It would be better to get the linker to align functions more + * strictly so that we could use smaller tables. */ -#define HISTFRACTION 2 +#define FUNCTION_ALIGNMENT 4 +#define HISTFRACTION (FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER) == 0 \ + ? 1 : FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER)) /* * Fraction of text space to allocate for from hash buckets. @@ -90,7 +105,23 @@ struct gmonhdr { * profiling data structures without (in practice) sacrificing * any granularity. */ -#define HASHFRACTION 2 +/* + * XXX I think the above analysis completely misses the point. I think + * the point is that addresses in different functions must hash to + * different values. Since the hash is essentially division by + * sizeof(unsigned short), the correct formula is: + * + * HASHFRACTION = MIN_FUNCTION_ALIGNMENT / sizeof(unsigned short) + * + * Note that he unsigned short here has nothing to do with the one for + * HISTFRACTION. + * + * Hash collisions from a two call sequence don't matter. They get + * handled like collisions for calls to different addresses from the + * same address through a function pointer. + */ +#define HASHFRACTION (FUNCTION_ALIGNMENT / sizeof(unsigned short) == 0 \ + ? 1 : FUNCTION_ALIGNMENT / sizeof(unsigned short)) /* * percent of text space to allocate for tostructs with a minimum. @@ -132,17 +163,33 @@ struct rawarc { */ struct gmonparam { int state; - u_short *kcount; + HISTCOUNTER *kcount; u_long kcountsize; u_short *froms; u_long fromssize; struct tostruct *tos; u_long tossize; long tolimit; - u_long lowpc; - u_long highpc; + fptrint_t lowpc; + fptrint_t highpc; u_long textsize; u_long hashfraction; + u_long profrate; + HISTCOUNTER *cputime_count; + u_int cputime_overhead; + u_int cputime_overhead_frac; + u_int cputime_overhead_resid; + u_int cputime_overhead_sub; + HISTCOUNTER *mcount_count; + u_int mcount_overhead; + u_int mcount_overhead_frac; + u_int mcount_overhead_resid; + u_int mcount_overhead_sub; + HISTCOUNTER *mexitcount_count; + u_int mexitcount_overhead; + u_int mexitcount_overhead_frac; + u_int mexitcount_overhead_resid; + u_int mexitcount_overhead_sub; }; extern struct gmonparam _gmonparam; @@ -153,6 +200,7 @@ extern struct gmonparam _gmonparam; #define GMON_PROF_BUSY 1 #define GMON_PROF_ERROR 2 #define GMON_PROF_OFF 3 +#define GMON_PROF_HIRES 4 /* * Sysctl definitions for extracting profiling information from the kernel. diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 4f2edb9e6f6f..37844eb3beb7 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -1,5 +1,5 @@ # From: @(#)Makefile 8.3 (Berkeley) 1/7/94 -# $Id: Makefile,v 1.62 1995/10/03 12:29:28 bde Exp $ +# $Id: Makefile,v 1.63 1995/10/23 16:44:22 peter Exp $ # XXX MISSING: deroff diction graph learn plot # spell spline struct units xsend @@ -25,6 +25,7 @@ SUBDIR= apply ar at banner basename biff cal calendar \ unexpand unifdef uniq unvis users uudecode uuencode vacation \ vgrind vi vis w wall wc what whereis which who whois window \ write xargs xinstall xstr yacc yes ypcat ypmatch ypwhich +SUBDIR+=gprof4 .if !exists(../secure) || defined(NOSECURE) SUBDIR+=telnet diff --git a/usr.bin/gprof4/Makefile b/usr.bin/gprof4/Makefile new file mode 100644 index 000000000000..f5c2c83bd33d --- /dev/null +++ b/usr.bin/gprof4/Makefile @@ -0,0 +1,14 @@ +# This was cloned from the Makefile for gprof by changing PROG from gprof +# to gprof4, adding NOMAN and PATH, adding -DGPROF4 to CFLAGS and deleting +# beforeinstall. + +# @(#)Makefile 5.17 (Berkeley) 5/11/90 + +PROG= gprof4 +NOMAN= noman +SRCS= gprof.c arcs.c dfn.c lookup.c ${MACHINE}.c hertz.c \ + printgprof.c printlist.c +CFLAGS+=-DGPROF4 +.PATH: ${.CURDIR}/../../usr.bin/gprof + +.include <bsd.prog.mk> diff --git a/usr.sbin/config/config.8 b/usr.sbin/config/config.8 index 5f93a861d256..32cb3609bfdd 100644 --- a/usr.sbin/config/config.8 +++ b/usr.sbin/config/config.8 @@ -88,6 +88,11 @@ will configure a system for profiling; for example, .Xr kgmon 8 and .Xr gprof 1 . +If two or more +.Fl p +options are supplied, +.Nm config +will configure a system for high resolution profiling. .It Fl n If the .Fl n diff --git a/usr.sbin/config/mkmakefile.c b/usr.sbin/config/mkmakefile.c index 8f6ca3ae878b..f8e01d03d225 100644 --- a/usr.sbin/config/mkmakefile.c +++ b/usr.sbin/config/mkmakefile.c @@ -157,8 +157,10 @@ makefile() } fprintf(ofp, "KERN_IDENT=%s\n", raise(ident)); fprintf(ofp, "IDENT="); - if (profiling) + if (profiling >= 1) fprintf(ofp, " -DGPROF"); + if (profiling >= 2) + fprintf(ofp, " -DGPROF4 -DGUPROF"); if (cputype == 0) { printf("cpu type must be specified\n"); @@ -202,8 +204,10 @@ makefile() fprintf(ofp, "%s=%s\n", op->op_name, op->op_value); if (debugging) fprintf(ofp, "DEBUG=-g\n"); - if (profiling) + if (profiling >= 1) fprintf(ofp, "PROF=-pg\n"); + if (profiling >= 2) + fprintf(ofp, "PROF+=-mprofiler-epilogue\n"); while (fgets(line, BUFSIZ, ifp) != 0) { if (*line != '%') { fprintf(ofp, "%s", line); diff --git a/usr.sbin/kgmon/kgmon.8 b/usr.sbin/kgmon/kgmon.8 index 114fea8dea69..4a915826c37d 100644 --- a/usr.sbin/kgmon/kgmon.8 +++ b/usr.sbin/kgmon/kgmon.8 @@ -39,7 +39,7 @@ .Nd generate a dump of the operating system's profile buffers .Sh SYNOPSIS .Nm kgmon -.Op Fl bhpr +.Op Fl Bbhpr .Op Fl M core .Op Fl N system .Sh DESCRIPTION @@ -62,8 +62,10 @@ file suitable for later analysis by .Pp The options are as follows: .Bl -tag -width Ds +.It Fl B +Resume the collection of high resolution profile data. .It Fl b -Resume the collection of profile data. +Resume the collection of low resolution profile data. .It Fl h Stop the collection of profile data. .It Fl p @@ -86,6 +88,8 @@ default ``/kernel''. .El .Pp If neither +.Fl B +nor .Fl b nor .Fl h @@ -96,6 +100,9 @@ flag is specified and profile data is being collected, profiling will be momentarily suspended, the operating system profile buffers will be dumped, and profiling will be immediately resumed. +.Pp +The profile buffers should be reset when the resolution +of the profile data is changed. .Sh FILES .Bl -tag -width /dev/kmemx -compact .It Pa /kernel diff --git a/usr.sbin/kgmon/kgmon.c b/usr.sbin/kgmon/kgmon.c index 1a2923bd5b25..c283f169a4ab 100644 --- a/usr.sbin/kgmon/kgmon.c +++ b/usr.sbin/kgmon/kgmon.c @@ -69,7 +69,7 @@ struct kvmvars { struct gmonparam gpm; }; -int bflag, hflag, kflag, rflag, pflag; +int Bflag, bflag, hflag, kflag, rflag, pflag; int debug = 0; void setprof __P((struct kvmvars *kvp, int state)); void dumpstate __P((struct kvmvars *kvp)); @@ -87,7 +87,7 @@ main(int argc, char **argv) seteuid(getuid()); kmemf = NULL; system = NULL; - while ((ch = getopt(argc, argv, "M:N:bhpr")) != EOF) { + while ((ch = getopt(argc, argv, "M:N:Bbhpr")) != EOF) { switch((char)ch) { case 'M': @@ -99,6 +99,10 @@ main(int argc, char **argv) system = optarg; break; + case 'B': + Bflag = 1; + break; + case 'b': bflag = 1; break; @@ -117,7 +121,7 @@ main(int argc, char **argv) default: (void)fprintf(stderr, - "usage: kgmon [-bhrp] [-M core] [-N system]\n"); + "usage: kgmon [-Bbhrp] [-M core] [-N system]\n"); exit(1); } } @@ -140,6 +144,8 @@ main(int argc, char **argv) mode = getprof(&kvmvars); if (hflag) disp = GMON_PROF_OFF; + else if (Bflag) + disp = GMON_PROF_HIRES; else if (bflag) disp = GMON_PROF_ON; else @@ -151,7 +157,12 @@ main(int argc, char **argv) if (accessmode == O_RDWR) setprof(&kvmvars, disp); (void)fprintf(stdout, "kgmon: kernel profiling is %s.\n", - disp == GMON_PROF_OFF ? "off" : "running"); + disp == GMON_PROF_OFF ? "off" : + disp == GMON_PROF_HIRES ? "running (high resolution)" : + disp == GMON_PROF_ON ? "running" : + disp == GMON_PROF_BUSY ? "busy" : + disp == GMON_PROF_ERROR ? "off (error)" : + "in an unknown state"); return (0); } @@ -176,8 +187,9 @@ openfiles(system, kmemf, kvp) "kgmon: profiling not defined in kernel.\n"); exit(20); } - if (!(bflag || hflag || rflag || - (pflag && state == GMON_PROF_ON))) + if (!(Bflag || bflag || hflag || rflag || + (pflag && + (state == GMON_PROF_HIRES || state == GMON_PROF_ON)))) return (O_RDONLY); (void)seteuid(0); if (sysctl(mib, 3, NULL, NULL, &state, size) >= 0) @@ -186,7 +198,8 @@ openfiles(system, kmemf, kvp) kern_readonly(state); return (O_RDONLY); } - openmode = (bflag || hflag || pflag || rflag) ? O_RDWR : O_RDONLY; + openmode = (Bflag || bflag || hflag || pflag || rflag) + ? O_RDWR : O_RDONLY; kvp->kd = kvm_openfiles(system, kmemf, NULL, openmode, errbuf); if (kvp->kd == NULL) { if (openmode == O_RDWR) { @@ -221,15 +234,17 @@ kern_readonly(mode) { (void)fprintf(stderr, "kgmon: kernel read-only: "); - if (pflag && mode == GMON_PROF_ON) + if (pflag && (mode == GMON_PROF_HIRES || mode == GMON_PROF_ON)) (void)fprintf(stderr, "data may be inconsistent\n"); if (rflag) (void)fprintf(stderr, "-r supressed\n"); + if (Bflag) + (void)fprintf(stderr, "-B supressed\n"); if (bflag) (void)fprintf(stderr, "-b supressed\n"); if (hflag) (void)fprintf(stderr, "-h supressed\n"); - rflag = bflag = hflag = 0; + rflag = Bflag = bflag = hflag = 0; } /* @@ -324,7 +339,9 @@ dumpstate(kvp) h.hpc = kvp->gpm.highpc; h.ncnt = kvp->gpm.kcountsize + sizeof(h); h.version = GMONVERSION; - h.profrate = getprofhz(kvp); + h.profrate = kvp->gpm.profrate; + if (h.profrate == 0) + h.profrate = getprofhz(kvp); /* ancient kernel */ fwrite((char *)&h, sizeof(h), 1, fp); /* |