@;
@; utils.s
@; -------
@;  Part of the Shine MP3 encoder for MidiCon
@;  16/9/2025
@;
@; Assembler utilities.
@;
@; Signed fractional multiply by partial products.
@; For use on non StrongARM machines.
@; All the multiply functions have the following c declaration:
@;  long mul(long a1, long a2);
@;
        .text
        .align 2
@; mul_a:
@; ------
@; Fractional multiply.
        .global mul
mul:
        stmfd   sp!,{v1,v2,v3,lr}
@; strip signs
        cmp     a1, #0              @; check sign of input
        rsbmi   a1, a1, #0          @; create abs of input
        mvnmi   a3, #0              @; save sign,
        movpl   a3, #0              @; of input
        cmp     a2, #0              @; check sign of other input
        rsbmi   a2, a2, #0          @; create abs of other input
        mvnmi   a4, #0              @; save sign,
        movpl   a4, #0              @; of other input
        eor     v3, a3, a4          @; combine signs and save
@; unsigned long multiply
        mov     a3, a1, lsr #16     @; Split input ,
        eor     v1, a1, a3, lsl #16 @; into two halves.
        mov     a4, a2, lsr #16     @; Split other input,
        eor     v2, a2, a4, lsl #16 @; into two halves.
        mul     a1, a3, a4          @; Produce,
        mul     a2, v1, v2          @; the,
        mul     a3, v2, a3          @; partial,
        mul     a4, v1, a4          @; products.
        adds    a2, a2, a4, lsl #16 @; Combine,
        adc     a1, a1, a4, lsr #16 @; the,
        adds    a2, a2, a3, lsl #16 @; partial,
        adc     a1, a1, a3, lsr #16 @; products.
@; make result signed
        cmp     v3, #0              @; check result sign
        beq     exit_a              @; don't negate result if positive
        rsbs    a2, a2, #0          @; else negate,
        rsc     a1, a1, #0          @; the result.
exit_a:
        ldmfd   sp!,{v1,v2,v3,pc}   @; return, a1 = msw of result.

@; muls_a:
@; -------
@; Fractional multiply with single bit left shift.
        .global  muls
        .align 2
muls:
        stmfd   sp!,{v1,v2,v3,lr}
@; strip signs
        cmp     a1, #0              @; check sign of input
        rsbmi   a1, a1, #0          @; create abs of input
        mvnmi   a3, #0              @; save sign,
        movpl   a3, #0              @; of input
        cmp     a2, #0              @; check sign of other input
        rsbmi   a2, a2, #0          @; create abs of other input
        mvnmi   a4, #0              @; save sign,
        movpl   a4, #0              @; of other input
        eor     v3, a3, a4          @; combine signs and save
@; unsigned long multiply
        mov     a3, a1, lsr #16     @; Split input ,
        eor     v1, a1, a3, lsl #16 @; into two halves.
        mov     a4, a2, lsr #16     @; Split other input,
        eor     v2, a2, a4, lsl #16 @; into two halves.
        mul     a1, a3, a4          @; Produce,
        mul     a2, v1, v2          @; the,
        mul     a3, v2, a3          @; partial,
        mul     a4, v1, a4          @; products.
        adds    a2, a2, a4, lsl #16 @; Combine,
        adc     a1, a1, a4, lsr #16 @; the,
        adds    a2, a2, a3, lsl #16 @; partial,
        adc     a1, a1, a3, lsr #16 @; products.
@; make result signed
        cmp     v3, #0              @; check result sign
        beq     exit_b              @; don't negate result if positive
        rsbs    a2, a2, #0          @; else negate,
        rsc     a1, a1, #0          @; the result.
exit_b:
@; shift and round
        movs    a2, a2, lsl #1      @; shift lsw 1 bit left,
        adc     a1, a1, a1          @; and msw, for fractional result.
        ldmfd   sp!,{v1,v2,v3,pc}   @; return, a1 = msw of result.

@; mulr_a:
@; -------
@; Fractional multiply with rounding.
        .global mulr
        .align 2
mulr:
        stmfd   sp!,{v1,v2,v3,lr}
@; strip signs
        cmp     a1, #0              @; check sign of input
        rsbmi   a1, a1, #0          @; create abs of input
        mvnmi   a3, #0              @; save sign,
        movpl   a3, #0              @; of input
        cmp     a2, #0              @; check sign of other input
        rsbmi   a2, a2, #0          @; create abs of other input
        mvnmi   a4, #0              @; save sign,
        movpl   a4, #0              @; of other input
        eor     v3, a3, a4          @; combine signs and save
@; unsigned long multiply
        mov     a3, a1, lsr #16     @; Split input ,
        eor     v1, a1, a3, lsl #16 @; into two halves.
        mov     a4, a2, lsr #16     @; Split other input,
        eor     v2, a2, a4, lsl #16 @; into two halves.
        mul     a1, a3, a4          @; Produce,
        mul     a2, v1, v2          @; the,
        mul     a3, v2, a3          @; partial,
        mul     a4, v1, a4          @; products.
        adds    a2, a2, a4, lsl #16 @; Combine,
        adc     a1, a1, a4, lsr #16 @; the,
        adds    a2, a2, a3, lsl #16 @; partial,
        adc     a1, a1, a3, lsr #16 @; products.
@; make result signed
        cmp     v3, #0              @; check result sign
        beq     exit_c              @; don't negate result if positive
        rsbs    a2, a2, #0          @; else negate,
        rsc     a1, a1, #0          @; the result.
exit_c:
@; shift and round
        adds    a2, a2, #0x80000000 @; add 1 to the bit below,
        adc     a1, a1, #0          @; the msw for rounding.
        ldmfd   sp!,{v1,v2,v3,pc}   @; return, a1 = msw of result.

@; SA_test:
@; --------
@; This tests for a strongarm by checking
@; if self modifying code works. If it does
@; then this is not a strongarm.
@; Returns true if strongarm detected.
@; Credit goes to Jan Vlietinick, taken from his most excellent
@; Trackman2 program.

        .global SA_test
        .align 2
SA_test:
        mov     a3, #2
modify:
        mov     a1, #1              @; this instruction may be modified
        ldr     a2, modify
        sub     a2, a2, #1          @; next time a1 may be loaded with 0
        str     a2, modify
        subs    a3, a3, #1
        bne     modify
        mov     pc, lr


@; quantize
@; --------


@    IMPORT pow2_4
@    IMPORT pow3_4
@    IMPORT xrabs
@    IMPORT cutoff
@    IMPORT xrmax

@; address holders
pow2_4_addr: .word pow2_4
pow3_4_addr: .word pow3_4
xrabs_addr:  .word xrabs
cutoff_addr: .word cutoff
xrmax_addr:  .word xrmax

@; int quantize(int ix[samp_per_frame2], int stepsize )
    .global quantize_sa
quantize_sa:
@; a1 = &ix, a2 = stepsize
    ldr     a3, pow2_4_addr
    sub     a2, a2, #1
    ldr     a3, [a3, a2, lsl #2]  @; scale = pow2_4[stepsize-1]
    ldr     a4, xrmax_addr
    ldr     a4, [a4]              @; xrmax
    smull   a2, a4, a3, a4        @; scale * xrmax
    cmp     a4, #1000             @; >= 1000 ?
    movge   a1, #0                @;  if so,
    movge   pc, lr                @;  return 0.
@; a1 = &ix, a3 = scale
    stmfd   sp!, {v1-v3, lr}
    ldr     a2, cutoff_addr
    ldr     a2, [a2]
    sub     a2, a2, #1            @; i = cutoff - 1
    ldr     a4, xrabs_addr        @; xrabs[]
    ldr     v1, pow3_4_addr       @; pow3_4[]
@;  for(i=cutoff-1@; i>=0@; i--)
@;    ix[i] = pow3_4[xrabs[i] * scale]@;
@; a1 = &ix
@; a2 = i
@; a3 = scale
@; a4 = &xrabs
@; v1 = &pow3_4
@; v2 = temp
@; v3 = temp
quantize_loop:
    ldr     v2, [a4, a2, lsl #2]  @; xrabs[i]
    smull   v3, v2, a3, v2        @; xrabs[i] * scale
    adds    v3, v3, #0x80000000   @; round the
    adc     v2, v2, #0            @; result

@; unsigned half word load
    movs    v2, v2, lsr #1
    ldr     v2, [v1, v2, lsl #2]
    movcc   v2, v2, lsl #16
    mov     v2, v2, lsr #16


@;    ldr     v2, [v1, v2, lsl #1]  @; (short)pow3_4[result]
@;    mov     v2, v2, lsl #16
@;    mov     v2, v2, lsr #16

    str     v2, [a1, a2, lsl #2]  @; = ix[i]
    subs    a2, a2, #1
    bpl     quantize_loop
    mov     a1, #1                @; return 1@;
    ldmfd   sp!, {v1-v3, pc}


@;	EXPORT	quantize_sa
@;quantize_sa
@;	mov	ip, sp
@;	stmfd	sp!, {v1, v2, v3, v4, v5, v6, fp, ip, lr, pc}
@;	sub	fp, ip, #4
@;	ldr	ip, |L..58|
@;	ldr	a3, |L..58|+4
@;	sub	a2, a2, #1
@;	ldr	v2, [ip, a2, asl #2]
@;	mov	v5, a1
@;	ldr	a1, [a3, #0]
@;	mov	a2, v2
@;	bl	|mulr|
@;	mov	ip, #9984
@;	add	ip, ip, #15
@;	cmp	a1, ip
@;	movgt	a1, #0
@;	ldmgtea	fp, {v1, v2, v3, v4, v5, v6, fp, sp, pc}
@;|L..51|
@;	ldr	a3, |L..58|+8
@;	mov	v1, #0
@;	ldr	ip, [a3, #0]
@;	mov	v6, a3
@;	cmp	v1, ip
@;	bge	|L..53|
@;	ldr	v4, |L..58|+12
@;	ldr	v3, |L..58|+16
@;|L..55|
@;	ldr	a1, [v4, v1, asl #2]
@;	mov	a2, v2
@;	bl	|mulr|
@;	ldr	ip, [v3, a1, asl #1]	@; movhi
@;	mov	ip, ip, asl #16
@;	mov	ip, ip, asr #16
@;	str	ip, [v5, v1, asl #2]
@;	ldr	a3, [v6, #0]
@;	add	v1, v1, #1
@;	cmp	v1, a3
@;	blt	|L..55|
@;|L..53|
@;	mov	a1, #1
@;	ldmea	fp, {v1, v2, v3, v4, v5, v6, fp, sp, pc}
@;|L..59|
@;	ALIGN
@;|L..58|
@;	DCD	|pow2_4|
@;	DCD	|xrmax|
@;	DCD	|cutoff|
@;	DCD	|xrabs|
@;	DCD	|pow3_4|

@;-----------------------------------------------------------------------
        .align 2
        .end

