PMU performance counters in Beagle Board C4

I don't seem to be able to enable the performance counters on my
beagleboard, and I'm certain that there's an obvious step that I'm
missing. The cycle counter works just fine, but the 4 performance
counter registers only ever return 0. When I read from any of the
setup registers like the PMNC etc they seem to have the right values,
or at least the values that I'm putting in them.

I'm running a fairly recent build of Angstrom (2.6.32 kernel) on my C4
board. Here's the code that I'm using to set things up:

<blockquote>

inline void _setCounting( int enable )
{
    unsigned bits;
    asm ("MRC p15, 0, %0, C9, C12, 0\n\t" : "=r" (bits));
    bits |= enable; // counting enable
    asm ("MCR p15, 0, %0, C9, C12, 0\n\t" :: "r" (bits));
}

inline void _resetClockCounter()
{
    unsigned bits;
    asm ("MRC p15, 0, %0, C9, C12, 0\n\t" : "=r" (bits));
    bits |= (1<<2);
    asm ("MCR p15, 0, %0, C9, C12, 0\n\t" :: "r" (bits));
}

inline void _resetEventCounters()
{
    unsigned bits;
    asm ("MRC p15, 0, %0, C9, C12, 0\n\t" : "=r" (bits));
    bits |= (1<<1);
    asm ("MCR p15, 0, %0, C9, C12, 0\n\t" :: "r" (bits));
}

inline void _enableCounters( int cycleCounter
                           , int counter0
                           , int counter1
                           , int counter2
                           , int counter3 )
{
    unsigned bits = cycleCounter << 31
        > counter0 << 0
        > counter1 << 1
        > counter2 << 2
        > counter3 << 3;
    asm ("MCR p15, 0, %0, C9, C12, 1\n\t" :: "r" (bits));
}

inline void _disableCounters()
{
    asm ("MCR p15, 0, %0, C9, C12, 2\n\t" :: "r" (1<<31 | 1<<3 | 1<<2

1<<1 | 1<<0));

}

inline void _incPerfCounter( int counter )
{
    asm ("MCR p15, 0, %0, C9, C12, 4\n\t" :: "r" (1<<counter));
}

inline void _selectPerfCounter( int counter )
{
    asm ("MCR p15, 0, %0, C9, C12, 5\n\t" :: "r" (counter));
}

inline void _setPerfCounterFunction( int function )
{
    asm ("MCR p15, 0, %0, C9, C13, 1\n\t" :: "r" (function));
}

inline void _writeToPerfCounter( unsigned val )
{
    asm ("MCR p15, 0, %0, C9, C13, 2\n\t" :: "r" (val));
}

inline void _writeToCycleCounter( unsigned val )
{
    asm ("MCR p15, 0, %0, C9, C13, 0\n\t" :: "r" (val));
}

inline unsigned _readFromCycleCounter()
{
    unsigned retval;
    asm ("MRC p15, 0, %0, C9, C13, 0\n\t" : "=r" (retval));
    return retval;
}

inline unsigned _readFromPerfCounter()
{
    unsigned retval;
    asm ("MRC p15, 0, %0, C9, C13, 2\n\t" : "=r" (retval));
    return retval;
}

inline void startCounters( int count0func
           , int count1func
           , int count2func
           , int count3func )
{
    _setCounting(1);
    _resetClockCounter();
    _resetEventCounters();
    _selectPerfCounter(0);
    _setPerfCounterFunction( count0func );
    _selectPerfCounter(1);
    _setPerfCounterFunction( count1func );
    _selectPerfCounter(2);
    _setPerfCounterFunction( count2func );
    _selectPerfCounter(3);
    _setPerfCounterFunction( count3func );
    _enableCounters(1, 1, 1, 1, 1);
}

inline void stopCounters( unsigned * cycles
          , unsigned * counter0
          , unsigned * counter1
          , unsigned * counter2
          , unsigned * counter3 )
{
    _disableCounters();
    _selectPerfCounter(0);
    *counter0 = _readFromPerfCounter();
    _selectPerfCounter(1);
    *counter1 = _readFromPerfCounter();
    _selectPerfCounter(2);
    *counter2 = _readFromPerfCounter();
    _selectPerfCounter(3);
    *counter3 = _readFromPerfCounter();
    *cycles = _readFromCycleCounter();
    _setCounting(0);
}

</blockquote>

... and then I'm using it around a bit of code that I want to test:

<blockquote>

    startCounters( 0x8, 0xf, 0x11, 0x40 );

    printf( "Hello PMU\n" );

    stopCounters( &cycles, &count0, &count1, &count2, &count3 );

</blockquote>

Can anyone see what I'm doing wrong here?

More information to help out - the disassembly of all of this looks
like:

...

;; _setCounting(1)
ee19 1f1c mrc 15, 0, r1, cr9, cr12, {0}
f041 0001 orr.w r0, r1, #1
ee09 0f1c mcr 15, 0, r0, cr9, cr12, {0}

;; _resetClockCounter();
ee19 3f1c mrc 15, 0, r3, cr9, cr12, {0}
f043 0704 orr.w r7, r3, #4
ee09 7f1c mcr 15, 0, r7, cr9, cr12, {0}

;; _resetEventCounters();
ee19 6f1c mrc 15, 0, r6, cr9, cr12, {0}
f046 0c02 orr.w ip, r6, #2
ee09 cf1c mcr 15, 0, ip, cr9, cr12, {0}

;; _selectPerfCounter(0);
;; _setPerfCounterFunction( count0func=0x8 );
2200 movs r2, #0
ee09 2fbc mcr 15, 0, r2, cr9, cr12, {5}
2108 movs r1, #8
ee09 1f3d mcr 15, 0, r1, cr9, cr13, {1}

;; _selectPerfCounter(1);
;; _setPerfCounterFunction( count1func=0xf );
2001 movs r0, #1
ee09 0fbc mcr 15, 0, r0, cr9, cr12, {5}
230f movs r3, #15
ee09 3f3d mcr 15, 0, r3, cr9, cr13, {1}

;; _selectPerfCounter(2);
;; _setPerfCounterFunction( count2func=0x11 );
2702 movs r7, #2
ee09 7fbc mcr 15, 0, r7, cr9, cr12, {5}
2611 movs r6, #17
ee09 6f3d mcr 15, 0, r6, cr9, cr13, {1}

;; _selectPerfCounter(3);
;; _setPerfCounterFunction( count3func=0x40 );
2203 movs r2, #3
ee09 2fbc mcr 15, 0, r2, cr9, cr12, {5}
2140 movs r1, #64 ; 0x40
ee09 1f3d mcr 15, 0, r1, cr9, cr13, {1}

;; _enableCounters(1, 1, 1, 1, 1);
200f movs r0, #15
f2c8 0000 movt r0, #32768 ; 0x8000
ee09 0f3c mcr 15, 0, r0, cr9, cr12, {1}

...

Also, after calling startInstructionCounter() but before calling
stopInstructionCounter(), this is what I get when I read from various
PMU registers. As far as I can tell, it all seems to be ok, but I
still get 0 from the perf counter regs.

PMNC: 41002001
CNTENS: 8000000f
CNTENC: 8000000f
FLAG: 00000000
SWINCR: 00000000
CCNT: 0000009f
USEREN: 00000001

Hi Ted,

This could be mostly because the performance counters are not enabled while building Linux kernel. If you can look at DBGAUTHSTATUS , you may see that NIDEN is disabled. If you have kernel sources with you enable OMAP 3 Debug Peripherals in kernel configuration and rebuild the kernel.

Thanks,
Bijesh

Interesting - is NIDEN specifically de-asserted by the Linux startup?
Since I wrote this I've tried the above code in u-boot and the PMU
works just fine. In Angstrom, either in user mode or in kernel mode in
a kernel module the above code fails (the counters don't work). This
seems to suggest that not only is something not being initialized, but
rather something is being turned off. Could that be DBGAUTHSTATUS?