~ft/h264bsd

4b7065cc238365afc13bd3a92c500b4629ea629b — Sigrid Haflínudóttir 2 months ago fbf22d9
add faster x86 version of clz
6 files changed, 17 insertions(+), 32 deletions(-)

A src/clz.386.s
A src/clz.amd64.s
M src/h264bsd_util.c
M src/h264bsd_util.h
M src/h264bsd_vlc.c
M src/mkfile
A src/clz.386.s => src/clz.386.s +4 -0
@@ 0,0 1,4 @@
TEXT h264bsdCountLeadingZeros(SB),$0
	BYTE $0x0F; BYTE $0xBD; BYTE $0xC5 /* BSRL RARG, AX */
	XORL $31, AX
	RET

A src/clz.amd64.s => src/clz.amd64.s +4 -0
@@ 0,0 1,4 @@
TEXT h264bsdCountLeadingZeros(SB),$0
	BYTE $0x0F; BYTE $0xBD; BYTE $0xC5 /* BSRL RARG, AX */
	XORL $31, AX
	RET

M src/h264bsd_util.c => src/h264bsd_util.c +7 -20
@@ 68,7 68,6 @@ const u32 h264bsdQpC[52] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,

        Inputs:
            value   code word
            length  number of bits in the code word

        Outputs:
            none


@@ 77,26 76,14 @@ const u32 h264bsdQpC[52] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
            number of leading zeros in the code word

------------------------------------------------------------------------------*/
#ifndef H264DEC_NEON
u32 h264bsdCountLeadingZeros(u32 value, u32 length)
#ifndef __plan9__
u32 h264bsdCountLeadingZeros(u32 value)
{

/* Variables */

    u32 zeros = 0;
    u32 mask = 1 << (length - 1);

/* Code */

    ASSERT(length <= 32);

    while (mask && !(value & mask))
    {
        zeros++;
        mask >>= 1;
    }
    return(zeros);

    unsigned int r;
    if(x == 0)
        return 32;
    for(r = 0; (x & (1UL<<31)) == 0; x <<= 1, r++);
    return r;
}
#endif
/*------------------------------------------------------------------------------

M src/h264bsd_util.h => src/h264bsd_util.h +0 -4
@@ 163,11 163,7 @@ extern const u32 h264bsdQpC[52];
/*------------------------------------------------------------------------------
    4. Function prototypes
------------------------------------------------------------------------------*/
#ifndef H264DEC_NEON
u32 h264bsdCountLeadingZeros(u32 value, u32 length);
#else
u32 h264bsdCountLeadingZeros(u32 value);
#endif
u32 h264bsdRbspTrailingBits(strmData_t *strmData);

u32 h264bsdMoreRbspData(strmData_t *strmData);

M src/h264bsd_vlc.c => src/h264bsd_vlc.c +0 -4
@@ 150,11 150,7 @@ u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *codeNum)
    /* other code lengths */
    else
    {
#ifndef H264DEC_NEON
        numZeros = 4 + h264bsdCountLeadingZeros(bits, 28);
#else
        numZeros = h264bsdCountLeadingZeros(bits);
#endif
        /* all 32 bits are zero */
        if (numZeros == 32)
        {

M src/mkfile => src/mkfile +2 -4
@@ 34,6 34,7 @@ HFILES=\
	h264bsd_vui.h\

OFILES=\
	clz.$objtype.$O\
	h264bsd_byte_stream.$O\
	h264bsd_cavlc.$O\
	h264bsd_conceal.$O\


@@ 61,7 62,4 @@ OFILES=\
	h264bsd_vlc.$O\
	h264bsd_vui.$O\

/sys/include/%.h:
	cp ../include/$stem.h /sys/include/$stem.h

</sys/src/cmd/mksyslib
</sys/src/cmd/mklib