Skip to content

Instantly share code, notes, and snippets.

@devinacker
Created July 10, 2018 01:13
Show Gist options
  • Save devinacker/b3658c6c671bdf3abf58da33f15ef284 to your computer and use it in GitHub Desktop.
Save devinacker/b3658c6c671bdf3abf58da33f15ef284 to your computer and use it in GitHub Desktop.

Revisions

  1. devinacker created this gist Jul 10, 2018.
    364 changes: 364 additions & 0 deletions exomizer-816.asm
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,364 @@
    .include "libSFX.i"
    .feature force_range

    .export Decrunch

    /*
    Exomizer (raw mode) decruncher for 65c816 / ca65.
    by Devin Acker (Revenant/RSE), 2018

    For use on SNES, and theoretically other 65c816-based platforms.
    Currently requires libSFX for some convenience macros and stuff, but it
    will probably be made more assembler-agnostic (or at least to support
    stock ca65) at some point.

    Unlike some other implementations, this one provides its own read/write
    calls internally, instead of requiring you to export your own, since it is
    assumed everything will just involve ROM/RAM instead of external I/O.

    To use, just import and JSL to Decrunch with the following register values:
    Input: X = destination address
    AL = destination address (bank)
    Y = source address
    AH = source address (bank)

    Output: X = decrunched size

    See the defines/comments below for some fine tuning options.

    Current assembled code size:
    With default options: 272 bytes
    Without literal support: 250 bytes

    */

    ; comment out to use stack memory instead of a fixed buffer in RAM.
    ; using stack memory makes the decrunch routine reentrant, and doesn't require
    ; having a buffer available at a fixed address all the time.
    ; using a page-aligned scratchpad provides a speed advantage, though.
    EXO_SCRATCHPAD = RPAD
    ; set to 1 to inline reads/writes. makes the code faster, but slightly larger
    EXO_INLINE_RW = 0
    ; set to 1 to disable literal support. makes the decrunch code smaller,
    ; if you don't need literals in your crunched data
    EXO_NO_LITERALS = 0

    ;-------------------------------------------------------------------------------
    .code

    TablesBits = 0
    LengthsBits = TablesBits
    Offsets3Bits = LengthsBits+16*2
    Offsets2Bits = Offsets3Bits+16*2
    Offsets1Bits = Offsets2Bits+16*2
    TablesBase = Offsets1Bits+4*2
    LengthsBase = TablesBase
    Offsets3Base = LengthsBase+16*2
    Offsets2Base = Offsets3Base+16*2
    Offsets1Base = Offsets2Base+16*2
    SequenceLen = Offsets1Base+4*2

    BitBuffer = SequenceLen+2
    SourceAddr = BitBuffer+2
    DestAddr = SourceAddr+3
    OrigDest = DestAddr+2

    LocalsSize = OrigDest+2

    ; for measuring code size
    StartExoCode:

    ;-------------------------------------------------------------------------------
    .macro _read
    lda [SourceAddr]
    inc z:SourceAddr
    and #$00ff
    .endmac

    .macro _write
    RW a8
    sta (DestAddr)
    RW a16
    inc z:DestAddr
    .endmac

    .if EXO_INLINE_RW <> 1
    ; non-inline read/write calls
    proc ReadByte, a16
    _read
    rts
    endproc
    .macro read_byte
    jsr ReadByte
    .endmac

    proc WriteByte, a16
    _write
    rts
    endproc
    .macro write_byte
    jsr WriteByte
    .endmac

    proc CopyByte, a16
    jsr ReadByte
    bra WriteByte
    endproc
    .macro copy_byte
    jsr CopyByte
    .endmac

    .else
    ; inline read/write calls
    .macro read_byte
    _read
    .endmac

    .macro write_byte
    _write
    .endmac

    .macro copy_byte
    RW a8
    lda [SourceAddr]
    sta (DestAddr)
    RW a16
    inc z:SourceAddr
    inc z:DestAddr
    .endmac

    .endif

    ;-------------------------------------------------------------------------------
    /*
    Input: X = destination address
    AL = destination address (bank)
    Y = source address
    AH = source address (bank)

    Output: X = decrunched size
    */
    proc Decrunch, a16

    ; break
    php
    phd
    phb

    RW a8
    pha
    plb
    xba

    .ifdef ::EXO_SCRATCHPAD
    ; use scratchpad
    pea EXO_SCRATCHPAD
    pld
    sty z:SourceAddr
    sta z:SourceAddr+2
    stx z:DestAddr
    stx z:OrigDest
    RW a16
    .else
    ; use stack
    phx ; OrigDest
    phx ; DestAddr
    pha ; SourceAddr+2
    phy ; SourceAddr
    RW a16
    tsc
    sec
    sbc #LocalsSize-7 ; allocate everything but what we just pushed
    tcs
    inc
    tcd
    .endif

    read_byte
    sta z:BitBuffer

    RW i8
    ; lengths table
    ldx #TablesBits
    ldy #16
    jsr GenerateTable
    ; offsets table 3
    jsr GenerateTable
    ; offsets table 2
    jsr GenerateTable
    ; offsets table 1
    ldy #4
    jsr GenerateTable

    NextByte:
    ldy #1
    jsr ReadBits
    lsr
    bcc GetGamma
    copy_byte
    bra NextByte

    GetGamma:
    ; X is the 'length index' but treat it as a 16-bit pointer index (i.e. mult by 2)
    ldx #-2
    : inx
    inx
    ldy #1
    jsr ReadBits
    lsr
    bcc :-

    cpx #16*2
    beq end

    .if ::EXO_NO_LITERALS <> 1
    cpx #17*2
    bne Sequence

    ; literal data block
    ldy #16
    jsr ReadBits
    RW i16
    tax
    : copy_byte
    dex
    bne :-
    RW i8
    bra NextByte
    .endif

    Sequence:
    ; sliding window sequence
    ; at this point A = 0 (from checking/shifting out gamma bits)
    ldy z:LengthsBits,x
    beq :+
    jsr ReadBits
    ; at this point carry is clear either from comparing gamma code or calling ReadBits
    : adc z:LengthsBase,x
    sta z:SequenceLen

    ldy #4 ; argument of ReadBits
    cmp #1
    beq len1
    cmp #2
    beq len2

    jsr ReadBits
    adc #Offsets3Bits>>1
    bra :+
    len2:
    jsr ReadBits
    adc #Offsets2Bits>>1
    bra :+
    len1:
    ldy #2
    jsr ReadBits
    adc #Offsets1Bits>>1

    ; get sequence offset
    : asl
    tax
    ldy z:TablesBits,x
    tya ; use 0 if we don't call readbits
    beq :+
    jsr ReadBits
    ; at this point carry is clear either from shifting out or calling ReadBits
    : adc z:TablesBase,x
    ; make offset relative to current output position
    eor #-1
    sec
    adc z:DestAddr
    RW i16
    tax
    ; destination bank in DBR will be used here
    : lda a:0,x
    write_byte
    inx
    dec z:SequenceLen
    bne :-
    ; end of sequence
    RW i8
    brl NextByte

    end:
    ; break
    RW i16
    lda z:DestAddr
    sec
    sbc z:OrigDest
    tax

    .ifndef ::EXO_SCRATCHPAD
    ; deallocate stack mem
    tsc
    clc
    adc #LocalsSize
    tcs
    .endif

    plb
    pld
    plp
    rtl
    endproc

    ;-------------------------------------------------------------------------------
    /*
    Input: Y = number of bits to read (1-16)

    Output: A = value
    carry clear
    */
    proc ReadBits, a16
    lda #0
    loop:
    lsr z:BitBuffer
    bne :+
    pha
    read_byte
    ora #$0100
    lsr
    sta z:BitBuffer
    pla
    : rol
    dey
    bne loop
    end:
    rts
    endproc

    ;-------------------------------------------------------------------------------
    /*
    Input: X = offset into bits/base tables
    Y = number of table entries (preserved to make repeated calls smaller)

    Output: X = next offset into tables
    */
    proc GenerateTable, a16i8
    phy
    lda #1
    loop:
    sta z:TablesBase,x
    phy
    ldy #4
    jsr ReadBits
    sta z:TablesBits,x

    tay
    lda #0
    sec
    : rol
    dey
    bpl :-
    ; carry will already be clear from previous shifts
    adc z:TablesBase,x
    inx
    inx
    ply
    dey
    bne loop
    ply
    rts
    endproc

    .out .sprintf("exomizer code size: %u", *-StartExoCode)