/* Intel SIMD MMX implementation of Viterbi ACS butterflies for 256-state (k=9) convolutional code Copyright 2004 Phil Karn, KA9Q This code may be used under the terms of the GNU Lesser General Public License (LGPL) void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits); */ # These are offsets into struct v29, defined in viterbi29.h .set DP,512 .set OLDMETRICS,516 .set NEWMETRICS,520 .text .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2 .type update_viterbi29_blk_mmx,@function .align 16 # MMX (64-bit SIMD) version # requires Pentium-MMX, Pentium-II or better update_viterbi29_blk_mmx: pushl %ebp movl %esp,%ebp pushl %esi pushl %edi pushl %edx pushl %ebx movl 8(%ebp),%edx # edx = vp movl 8(%ebp),%edx # edx = vp testl %edx,%edx jnz 0f movl -1,%eax jmp err 0: movl OLDMETRICS(%edx),%esi # esi -> old metrics movl NEWMETRICS(%edx),%edi # edi -> new metrics movl DP(%edx),%edx # edx -> decisions 1: movl 16(%ebp),%eax # eax = nbits decl %eax jl 2f # passed zero, we're done movl %eax,16(%ebp) movl 12(%ebp),%ebx # ebx = syms movw (%ebx),%ax # ax = second symbol : first symbol addl $2,%ebx movl %ebx,12(%ebp) movb %ah,%bl andl $255,%eax andl $255,%ebx # shift into first array index dimension slot shll $7,%eax shll $7,%ebx # each invocation of this macro will do 8 butterflies in parallel .MACRO butterfly GROUP # Compute branch metrics movq (Mettab29_1+8*\GROUP)(%eax),%mm3 movq fifteens,%mm0 paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3 paddb ones,%mm3 # emulate pavgb - this may not be necessary psrlq $1,%mm3 pand %mm0,%mm3 movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1 movq %mm6,%mm1 movq %mm2,%mm7 paddb %mm3,%mm6 paddb %mm3,%mm2 pxor %mm0,%mm3 # invert branch metric paddb %mm3,%mm7 # path metric for inverted symbols paddb %mm3,%mm1 # live registers 1 2 6 7 # Compare mm6 and mm7; mm1 and mm2 pxor %mm3,%mm3 movq %mm6,%mm4 movq %mm1,%mm5 psubb %mm7,%mm4 # mm4 = mm6 - mm7 psubb %mm2,%mm5 # mm5 = mm1 - mm2 pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) pcmpgtb %mm3,%mm5 # mm5 = second set of decisions # live registers 1 2 4 5 6 7 # select survivors movq %mm4,%mm0 pand %mm4,%mm7 movq %mm5,%mm3 pand %mm5,%mm2 pandn %mm6,%mm0 pandn %mm1,%mm3 por %mm0,%mm7 # mm7 = first set of survivors por %mm3,%mm2 # mm2 = second set of survivors # live registers 2 4 5 7 # interleave & store decisions in mm4, mm5 # interleave & store new branch metrics in mm2, mm7 movq %mm4,%mm3 movq %mm7,%mm0 punpckhbw %mm5,%mm4 punpcklbw %mm5,%mm3 punpcklbw %mm2,%mm7 # interleave second 8 new metrics punpckhbw %mm2,%mm0 # interleave first 8 new metrics movq %mm4,(16*\GROUP+8)(%edx) movq %mm3,(16*\GROUP)(%edx) movq %mm7,(16*\GROUP)(%edi) movq %mm0,(16*\GROUP+8)(%edi) .endm # invoke macro 16 times for a total of 128 butterflies butterfly GROUP=0 butterfly GROUP=1 butterfly GROUP=2 butterfly GROUP=3 butterfly GROUP=4 butterfly GROUP=5 butterfly GROUP=6 butterfly GROUP=7 butterfly GROUP=8 butterfly GROUP=9 butterfly GROUP=10 butterfly GROUP=11 butterfly GROUP=12 butterfly GROUP=13 butterfly GROUP=14 butterfly GROUP=15 addl $256,%edx # bump decision pointer # swap metrics movl %esi,%eax movl %edi,%esi movl %eax,%edi jmp 1b 2: emms movl 8(%ebp),%ebx # ebx = vp # stash metric pointers movl %esi,OLDMETRICS(%ebx) movl %edi,NEWMETRICS(%ebx) movl %edx,DP(%ebx) # stash incremented value of vp->dp xorl %eax,%eax err: popl %ebx popl %edx popl %edi popl %esi popl %ebp ret .data .align 8 fifteens: .byte 15,15,15,15,15,15,15,15 .align 8 ones: .byte 1,1,1,1,1,1,1,1