nes-cdl-disasm.py with syntax coloring

This HTML file was generated with Kalle's syntaxcolor.py


   1"""
   2NES disassembler with CDL (FCEUX Code/Data Logger) file support
   3By Kalle (http://qalle.net)
   4
   5Format of each PRG-ROM CDL byte (from FCEUX help file): -PdcAADC:
   6    P  = logged as PCM audio data
   7    d  = indirectly accessed as data; e.g. JMP ($nnnn)
   8    c  = indirectly accessed as code; e.g. LDA ($nn),Y
   9    AA = ROM bank when last accessed; $8000 + AA * $2000
  10    D  = accessed as data
  11    C  = accessed as code
  12"""
  13
  14import sys
  15import os.path
  16
  17INDENT = 4 * " "
  18
  19HELP_TEXT = """\
  20NES disassembler with CDL (FCEUX Code/Data Logger) file support.
  21Doesn't support games with more than 32 kB of PRG-ROM.
  22The output can be assembled with Ophis.
  23
  24Arguments: SourceFile LogFile OmitUnreadBytes
  25    SourceFile
  26        iNES ROM file (.nes) to read
  27    LogFile
  28        Code/data log file (.cdl) to read
  29    OmitUnreadBytes:
  30        "0" (include unread bytes) or "1" (omit unread bytes)\
  31"""
  32
  33PRG_ROM_INTRO_LINES = (
  34    "; Types of PRG-ROM chunks (sequences of bytes of the same type):",
  35    ";   - unread (neither code nor data)",
  36    ";   - read as code (but not data)",
  37    ";   - read as data (but not code)",
  38    ";   - read as both code and data",
  39    "; Chunks are separated by an empty line.",
  40    "; Code-only and data-only chunks aren't indicated in the comments.",
  41)
  42
  43CDL_UNREAD = 0x00
  44CDL_CODE = 0x01
  45CDL_DATA = 0x02
  46CDL_BOTH = 0x03
  47
  48VECTOR_COMMENTS = ("NMI", "Reset", "IRQ")
  49
  50OPERAND_SIZES = {
  51    "imp"   : 0,
  52    "acc"   : 0,
  53    "imm"   : 1,
  54    "zp"    : 1,
  55    "zp,x"  : 1,
  56    "zp,y"  : 1,
  57    "(zp,x)": 1,
  58    "(zp),y": 1,
  59    "rel"   : 1,
  60    "abs"   : 2,
  61    "abs,x" : 2,
  62    "abs,y" : 2,
  63    "(abs)" : 2,
  64}
  65
  66OPERAND_FORMATS = {
  67    "imp"   : "",
  68    "acc"   : "",
  69    "imm"   : " #${:02x}",
  70    "zp"    : " ${:02x}",
  71    "zp,x"  : " ${:02x},x",
  72    "zp,y"  : " ${:02x},y",
  73    "(zp,x)": " (${:02x},x)",
  74    "(zp),y": " (${:02x}),y",
  75    "rel"   : " ${:04x}",
  76    "abs"   : " ${:04x}",
  77    "abs,x" : " ${:04x},x",
  78    "abs,y" : " ${:04x},y",
  79    "(abs)" : " (${:04x})",
  80}
  81
  82OPERAND_FORMATS_LABEL = {
  83    "rel"   : " {:s}",
  84    "abs"   : " {:s}",
  85    "abs,x" : " {:s},x",
  86    "abs,y" : " {:s},y",
  87    "(abs)" : " ({:s})",
  88}
  89
  90OPCODES = {
  91    0x00: ("brk", "imp"),
  92    0x01: ("ora", "(zp,x)"),
  93    0x05: ("ora", "zp"),
  94    0x06: ("asl", "zp"),
  95    0x08: ("php", "imp"),
  96    0x09: ("ora", "imm"),
  97    0x0A: ("asl", "acc"),
  98    0x0D: ("ora", "abs"),
  99    0x0E: ("asl", "abs"),
 100    0x10: ("bpl", "rel"),
 101    0x11: ("ora", "(zp),y"),
 102    0x15: ("ora", "zp,x"),
 103    0x16: ("asl", "zp,x"),
 104    0x18: ("clc", "imp"),
 105    0x19: ("ora", "abs,y"),
 106    0x1D: ("ora", "abs,x"),
 107    0x1E: ("asl", "abs,x"),
 108    0x20: ("jsr", "abs"),
 109    0x21: ("and", "(zp,x)"),
 110    0x24: ("bit", "zp"),
 111    0x25: ("and", "zp"),
 112    0x26: ("rol", "zp"),
 113    0x28: ("plp", "imp"),
 114    0x29: ("and", "imm"),
 115    0x2A: ("rol", "acc"),
 116    0x2C: ("bit", "abs"),
 117    0x2D: ("and", "abs"),
 118    0x2E: ("rol", "abs"),
 119    0x30: ("bmi", "rel"),
 120    0x31: ("and", "(zp),y"),
 121    0x35: ("and", "zp,x"),
 122    0x36: ("rol", "zp,x"),
 123    0x38: ("sec", "imp"),
 124    0x39: ("and", "abs,y"),
 125    0x3D: ("and", "abs,x"),
 126    0x3E: ("rol", "abs,x"),
 127    0x40: ("rti", "imp"),
 128    0x41: ("eor", "(zp,x)"),
 129    0x45: ("eor", "zp"),
 130    0x46: ("lsr", "zp"),
 131    0x48: ("pha", "imp"),
 132    0x49: ("eor", "imm"),
 133    0x4A: ("lsr", "acc"),
 134    0x4C: ("jmp", "abs"),
 135    0x4D: ("eor", "abs"),
 136    0x4E: ("lsr", "abs"),
 137    0x50: ("bvc", "rel"),
 138    0x51: ("eor", "(zp),y"),
 139    0x55: ("eor", "zp,x"),
 140    0x56: ("lsr", "zp,x"),
 141    0x58: ("cli", "imp"),
 142    0x59: ("eor", "abs,y"),
 143    0x5D: ("eor", "abs,x"),
 144    0x5E: ("lsr", "abs,x"),
 145    0x60: ("rts", "imp"),
 146    0x61: ("adc", "(zp,x)"),
 147    0x65: ("adc", "zp"),
 148    0x66: ("ror", "zp"),
 149    0x68: ("pla", "imp"),
 150    0x69: ("adc", "imm"),
 151    0x6A: ("ror", "acc"),
 152    0x6C: ("jmp", "(abs)"),
 153    0x6D: ("adc", "abs"),
 154    0x6E: ("ror", "abs"),
 155    0x70: ("bvs", "rel"),
 156    0x71: ("adc", "(zp),y"),
 157    0x75: ("adc", "zp,x"),
 158    0x76: ("ror", "zp,x"),
 159    0x78: ("sei", "imp"),
 160    0x79: ("adc", "abs,y"),
 161    0x7D: ("adc", "abs,x"),
 162    0x7E: ("ror", "abs,x"),
 163    0x81: ("sta", "(zp,x)"),
 164    0x84: ("sty", "zp"),
 165    0x85: ("sta", "zp"),
 166    0x86: ("stx", "zp"),
 167    0x88: ("dey", "imp"),
 168    0x8A: ("txa", "imp"),
 169    0x8C: ("sty", "abs"),
 170    0x8D: ("sta", "abs"),
 171    0x8E: ("stx", "abs"),
 172    0x90: ("bcc", "rel"),
 173    0x91: ("sta", "(zp),y"),
 174    0x94: ("sty", "zp,x"),
 175    0x95: ("sta", "zp,x"),
 176    0x96: ("stx", "zp,y"),
 177    0x98: ("tya", "imp"),
 178    0x99: ("sta", "abs,y"),
 179    0x9A: ("txs", "imp"),
 180    0x9D: ("sta", "abs,x"),
 181    0xA0: ("ldy", "imm"),
 182    0xA1: ("lda", "(zp,x)"),
 183    0xA2: ("ldx", "imm"),
 184    0xA4: ("ldy", "zp"),
 185    0xA5: ("lda", "zp"),
 186    0xA6: ("ldx", "zp"),
 187    0xA8: ("tay", "imp"),
 188    0xA9: ("lda", "imm"),
 189    0xAA: ("tax", "imp"),
 190    0xAC: ("ldy", "abs"),
 191    0xAD: ("lda", "abs"),
 192    0xAE: ("ldx", "abs"),
 193    0xB0: ("bcs", "rel"),
 194    0xB1: ("lda", "(zp),y"),
 195    0xB4: ("ldy", "zp,x"),
 196    0xB5: ("lda", "zp,x"),
 197    0xB6: ("ldx", "zp,y"),
 198    0xB8: ("clv", "imp"),
 199    0xB9: ("lda", "abs,y"),
 200    0xBA: ("tsx", "imp"),
 201    0xBC: ("ldy", "abs,x"),
 202    0xBD: ("lda", "abs,x"),
 203    0xBE: ("ldx", "abs,y"),
 204    0xC0: ("cpy", "imm"),
 205    0xC1: ("cmp", "(zp,x)"),
 206    0xC4: ("cpy", "zp"),
 207    0xC5: ("cmp", "zp"),
 208    0xC6: ("dec", "zp"),
 209    0xC8: ("iny", "imp"),
 210    0xC9: ("cmp", "imm"),
 211    0xCA: ("dex", "imp"),
 212    0xCC: ("cpy", "abs"),
 213    0xCD: ("cmp", "abs"),
 214    0xCE: ("dec", "abs"),
 215    0xD0: ("bne", "rel"),
 216    0xD1: ("cmp", "(zp),y"),
 217    0xD5: ("cmp", "zp,x"),
 218    0xD6: ("dec", "zp,x"),
 219    0xD8: ("cld", "imp"),
 220    0xD9: ("cmp", "abs,y"),
 221    0xDD: ("cmp", "abs,x"),
 222    0xDE: ("dec", "abs,x"),
 223    0xE0: ("cpx", "imm"),
 224    0xE1: ("sbc", "(zp,x)"),
 225    0xE4: ("cpx", "zp"),
 226    0xE5: ("sbc", "zp"),
 227    0xE6: ("inc", "zp"),
 228    0xE8: ("inx", "imp"),
 229    0xE9: ("sbc", "imm"),
 230    0xEA: ("nop", "imp"),
 231    0xEC: ("cpx", "abs"),
 232    0xED: ("sbc", "abs"),
 233    0xEE: ("inc", "abs"),
 234    0xF0: ("beq", "rel"),
 235    0xF1: ("sbc", "(zp),y"),
 236    0xF5: ("sbc", "zp,x"),
 237    0xF6: ("inc", "zp,x"),
 238    0xF8: ("sed", "imp"),
 239    0xF9: ("sbc", "abs,y"),
 240    0xFD: ("sbc", "abs,x"),
 241    0xFE: ("inc", "abs,x"),
 242}
 243
 244# NES registers
 245# http://wiki.nesdev.com/w/index.php/2A03
 246# http://wiki.nesdev.com/w/index.php/PPU_registers
 247NES_REGS = {
 248    # PPU
 249    0x2000: "PPUCTRL",
 250    0x2001: "PPUMASK",
 251    0x2002: "PPUSTATUS",
 252    0x2003: "OAMADDR",
 253    0x2004: "OAMDATA",
 254    0x2005: "PPUSCROLL",
 255    0x2006: "PPUADDR",
 256    0x2007: "PPUDATA",
 257    # CPU
 258    0x4000: "SQ1_VOL",
 259    0x4001: "SQ1_SWEEP",
 260    0x4002: "SQ1_LO",
 261    0x4003: "SQ1_HI",
 262    0x4004: "SQ2_VOL",
 263    0x4005: "SQ2_SWEEP",
 264    0x4006: "SQ2_LO",
 265    0x4007: "SQ2_HI",
 266    0x4008: "TRI_LINEAR",
 267    0x400a: "TRI_LO",
 268    0x400b: "TRI_HI",
 269    0x400c: "NOISE_VOL",
 270    0x400e: "NOISE_LO",
 271    0x400f: "NOISE_HI",
 272    0x4010: "DMC_FREQ",
 273    0x4011: "DMC_RAW",
 274    0x4012: "DMC_START",
 275    0x4013: "DMC_LEN",
 276    0x4014: "OAMDMA",
 277    0x4015: "SND_CHN",
 278    0x4016: "JOY1",
 279    0x4017: "JOY2",
 280}
 281
 282def read_iNES_header(handle):
 283    """
 284    Read iNES file header, return info in a dict.
 285    """
 286
 287    # file size must be at least 16 bytes
 288    fileSize = handle.seek(0, 2)
 289    if fileSize < 16:
 290        exit("Invalid iNES file (less than 16 bytes).".format(maxSize))
 291
 292    # read header
 293    handle.seek(0)
 294    header = handle.read(16)
 295
 296    # validate identifier
 297    if header[:4] != b"NES\x1a":
 298        exit("Invalid iNES file (invalid identifier).")
 299
 300    # read PRG-ROM and CHR-ROM size and check if trainer is present
 301    PRGROMSize = header[4]  # unit: 16 kB
 302    CHRROMSize = header[5]  # unit 8 kB
 303    trainerPresent = (header[6] >> 2) & 0b1
 304
 305    # file size must match header
 306    correctSize = (
 307        16 +
 308        trainerPresent * 512 +
 309        PRGROMSize * 16384 +
 310        CHRROMSize * 8192
 311    )
 312    if fileSize != correctSize:
 313        exit(
 314            "Incorrect iNES file size (should be {:d} bytes, is {:d} bytes)."
 315            .format(correctSize, fileSize)
 316        )
 317
 318    return {
 319        "prg": PRGROMSize,
 320        "chr": CHRROMSize,
 321        "mapper": (header[6] >> 4) | (header[7] & 0xF0),
 322        "vertical": header[6] & 0b1,
 323        "fourScreen": (header[6] >> 3) & 0b1,
 324        "save": (header[6] >> 1) & 0b1,
 325        "trainer": trainerPresent,
 326    }
 327
 328def disassemble_header(header):
 329    """Disassemble iNES header."""
 330
 331    # encode bitfields
 332    bitField1 = (
 333        ((header["mapper"] & 0xF) << 4) |
 334        (header["fourScreen"] << 3) |
 335        (header["trainer"] << 2) |
 336        (header["save"] << 1) |
 337        (header["vertical"])
 338    )
 339    bitField2 = header["mapper"] & 0xF0
 340
 341    print(INDENT + ".text")
 342    print(INDENT + ".org $0000")
 343    print(INDENT + ".byte $4E, $45, $53, $1A")
 344    print(INDENT + ".byte {:d}".format(header["prg"]))
 345    print(INDENT + ".byte {:d}".format(header["chr"]))
 346    print(INDENT + ".byte %{:08b}".format(bitField1))
 347    print(INDENT + ".byte %{:08b}".format(bitField2))
 348    print(INDENT + ".byte $00, $00, $00, $00, $00, $00, $00, $00")
 349
 350def CDL_to_chunks(hnd, PRGSize):
 351    """
 352    Read PRG-ROM part of CDL file except for interrupt vectors, return as
 353    chunks of consecutive bytes of the same type.
 354
 355    Args:
 356        hnd: CDL file handle
 357        PRGSize: size of PRG-ROM part in bytes
 358
 359    Return:
 360        chunks: [(address1, length1, type1), (address2, length2, type2), ...]
 361        (type = CDL byte AND 0b11)
 362    """
 363
 364    # read PRG-ROM part of CDL file except for interrupt vectors
 365    hnd.seek(0)
 366    CDLData = hnd.read(PRGSize - 6)
 367
 368    chunks = []
 369    chunkType = -1
 370
 371    for (addr, byte) in enumerate(CDLData):
 372        byte &= 0b11
 373
 374        if byte != chunkType:
 375            if addr > 0:
 376                chunks.append((chunkStart, addr - chunkStart, chunkType))
 377            chunkStart = addr
 378            chunkType = byte
 379
 380    # add last chunk
 381    chunks.append((chunkStart, PRGSize - 6 - chunkStart, chunkType))
 382
 383    return chunks
 384
 385def attempt_disassembly(chunk):
 386    """
 387    Try to disassemble a code chunk.
 388
 389    Return how many bytes we were able to disassemble.
 390    """
 391
 392    chunkLen = len(chunk)
 393    offset = 0
 394
 395    while offset < chunkLen:
 396        byte = chunk[offset]
 397
 398        try:
 399            addrMode = OPCODES[byte][1]
 400        except KeyError:
 401            # invalid opcode; can't disassemble from now on
 402            return offset
 403
 404        operandSize = OPERAND_SIZES[addrMode]
 405        if operandSize >= chunkLen - offset:
 406            # operand extends past end of chunk; can't disassemble from now on
 407            return offset
 408
 409        offset += 1 + operandSize
 410
 411    # whole chunk can be disassembled
 412    return offset
 413
 414def collect_instruction_start_addresses(chunk, org):
 415    """
 416    Disassemble a code chunk (already verified to be disassemblable) but
 417    don't print anything, only addresses of instructions.
 418    """
 419
 420    offset = 0
 421    addresses = set()
 422
 423    while offset < len(chunk):
 424        addresses.add(org + offset)
 425        offset += 1 + OPERAND_SIZES[OPCODES[chunk[offset]][1]]
 426
 427    return addresses
 428
 429def collect_valid_labels(PRGROM, org, CDLChunks):
 430    """
 431    Disassemble PRG-ROM, but don't print anything, just collect addresses that
 432    are suitable for labels (don't fall inside an instruction).
 433
 434    Args:
 435        PRGROM: PRG-ROM data as bytes
 436        org: value to add to addresses to get PRG-ROM address
 437        CDLchunks: [(addr1, length1, type1), (addr2, length2, type2), ...]
 438    """
 439
 440    validLabelAddresses = set()
 441
 442    for (addr, len_, type_) in CDLChunks:
 443        # chunk start address
 444        validLabelAddresses.add(org + addr)
 445
 446        # if code chunk, also address of each instruction
 447        if type_ in (CDL_CODE, CDL_BOTH):
 448            len_ = attempt_disassembly(PRGROM[addr : addr + len_])
 449            chunk = PRGROM[addr : addr + len_]
 450            validLabelAddresses.update(
 451                collect_instruction_start_addresses(chunk, org + addr)
 452            )
 453
 454    return validLabelAddresses
 455
 456def collect_labels_from_code(chunk, org, addr):
 457    """
 458    Disassemble a code chunk (already verified to be disassemblable) but
 459    don't print anything, only collect label addresses.
 460    """
 461
 462    labels = set()
 463    chunkLen = len(chunk)
 464    chunkAddr = org + addr
 465    offset = 0
 466    comment = ""
 467
 468    while offset < chunkLen:
 469        byte = chunk[offset]
 470        fileAddr = chunkAddr + offset  # address in PRG-ROM
 471
 472        (instruction, addrMode) = OPCODES[byte]
 473        operandSize = OPERAND_SIZES[addrMode]
 474
 475        # read value of operand
 476        operand = 0
 477        if operandSize >= 1:
 478            operand += chunk[offset + 1]
 479            if operandSize >= 2:
 480                operand += chunk[offset + 2] * 0x100
 481
 482        if addrMode == "rel":
 483            operand = \
 484            (fileAddr + 2 - (operand & 0x80) + (operand & 0x7f)) & 0xffff
 485
 486        if addrMode in ("rel", "abs", "abs,x", "abs,y", "(abs)") and \
 487        operand >= org:
 488            labels.add(operand)
 489
 490        offset += 1 + operandSize
 491
 492    return labels
 493
 494def collect_labels(PRGROM, org, CDLChunks):
 495    """
 496    Disassemble PRG-ROM, but don't print anything, only collect addresses of
 497    labels.
 498
 499    Args:
 500        PRGROM: PRG-ROM data as bytes
 501        org: value to add to addresses to get PRG-ROM address
 502        CDLchunks: [(addr1, length1, type1), (addr2, length2, type2), ...]
 503    """
 504
 505    labels = set((org,))
 506
 507    for (addr, len_, type_) in CDLChunks:
 508        if type_ in (CDL_CODE, CDL_BOTH):
 509            len_ = attempt_disassembly(PRGROM[addr : addr + len_])
 510            chunk = PRGROM[addr : addr + len_]
 511            labels.update(collect_labels_from_code(chunk, org, addr))
 512        elif type_ == CDL_DATA:
 513            labels.add(org + addr)
 514
 515    # interrupt vectors
 516    for pos in range(len(PRGROM) - 6, len(PRGROM), 2):
 517        labels.add(PRGROM[pos] + PRGROM[pos+1] * 0x100)
 518
 519    return labels
 520
 521def print_data_bytes(chunk):
 522    """Print data chunk eight bytes at a time."""
 523
 524    for pos in range(0, len(chunk), 8):
 525        print(INDENT + ".byte " + ", ".join(
 526            "${:02x}".format(byte) for byte in chunk[pos : pos + 8]
 527        ))
 528
 529def print_data_bytes_and_disassembly(chunk):
 530    """Print bytes one by one and the disassembly of each."""
 531
 532    for byte in chunk:
 533        try:
 534            (instruction, addrMode) = OPCODES[byte]
 535            disassembly = instruction + " " + addrMode
 536        except KeyError:
 537            disassembly = "(unofficial opcode)"
 538
 539        print(INDENT + ".byte ${:02x}  ; {:s}".format(byte, disassembly))
 540
 541def create_label_reference(addr, labels):
 542    """
 543    Create reference to an address using labels, e.g. "Label_6" or "Label_6+9".
 544    """
 545
 546    if addr < labels[0]:
 547        return "${:04x}".format(addr)
 548
 549    # find index to largest label address that's <= target address
 550    for (labelIndex, label) in enumerate(labels):
 551        if label > addr:
 552            labelIndex -= 1
 553            break
 554
 555    # how much we have to add to the label
 556    offset = addr - labels[labelIndex]
 557
 558    return "Label_{:d}{:s}".format(
 559        labelIndex + 1, ("+" + str(offset) if offset > 0 else "")
 560    )
 561
 562def print_disassembly(chunk, chunkAddr, asBytesToo, labels):
 563    """
 564    Print disassembly of code chunk (already verified to be disassemblable).
 565
 566    Args:
 567        chunk: code chunk as bytes
 568        chunkAddr: address in PRG-ROM
 569        asBytesToo: also print instructions as bytes in comments
 570        labels: [label1Address, label2Address, ...]
 571    """
 572
 573    chunkLen = len(chunk)
 574    offset = 0
 575    comment = ""
 576
 577    while offset < chunkLen:
 578        byte = chunk[offset]
 579        fileAddr = chunkAddr + offset  # address in PRG-ROM
 580
 581        # print label if needed (label's been already printed at start of
 582        # every chunk)
 583        if offset > 0 and fileAddr in labels:
 584            print("Label_{:d}:".format(labels.index(fileAddr) + 1))
 585
 586        (instruction, addrMode) = OPCODES[byte]
 587        operandSize = OPERAND_SIZES[addrMode]
 588
 589        # read value of operand
 590        operand = 0
 591        if operandSize >= 1:
 592            operand += chunk[offset + 1]
 593            if operandSize >= 2:
 594                operand += chunk[offset + 2] * 0x100
 595
 596        if addrMode == "rel":
 597            operand = \
 598            (fileAddr + 2 - (operand & 0x80) + (operand & 0x7f)) & 0xffff
 599
 600        regName = NES_REGS.get(operand, None)
 601        if regName is not None:
 602            formattedOperand = OPERAND_FORMATS_LABEL[addrMode].format(regName)
 603        else:
 604            if operand < labels[0]:
 605                formattedOperand = OPERAND_FORMATS[addrMode].format(operand)
 606            else:
 607                formattedOperand = OPERAND_FORMATS_LABEL[addrMode].format(
 608                    create_label_reference(operand, labels)
 609                )
 610
 611        if asBytesToo:
 612            comment = "  ; " + ", ".join(
 613                "${:02x}".format(byte) for byte
 614                in chunk[offset : offset + 1 + operandSize]
 615            )
 616
 617        print(INDENT + instruction + formattedOperand + comment)
 618
 619        offset += 1 + operandSize
 620
 621def disassemble_PRG_ROM(PRGROM, org, CDLChunks, labels, omitUnreadChunks):
 622    """
 623    Disassemble PRG-ROM.
 624
 625    Args:
 626        PRGROM: PRG-ROM data as bytes
 627        org: value to add to addresses to get PRG-ROM address
 628        chunks: [(address1, length1, type1), (address2, length2, type2), ...]
 629        labels: [label1Address, label2Address, ...]
 630        omitUnreadChunks: .advance past unread chunks (0 or 1)
 631    """
 632
 633    for (addr, len_, type_) in CDLChunks:
 634        PRGAddr = org + addr
 635
 636        # print label if needed
 637        if PRGAddr in labels:
 638            print("Label_{:d}:".format(labels.index(PRGAddr) + 1))
 639
 640        if type_ == CDL_DATA:
 641            chunk = PRGROM[addr : addr + len_]
 642            print_data_bytes(chunk)
 643
 644        elif type_ != CDL_UNREAD:
 645            chunk = PRGROM[addr : addr + len_]
 646
 647            # try to disassemble chunk (don't print anything yet)
 648            bytesDisassemblable = attempt_disassembly(chunk)
 649            asBytesToo = (type_ != CDL_CODE)
 650
 651            if type_ == CDL_BOTH:
 652                print(INDENT + "; both code and data")
 653
 654            # disassemble as much as possible
 655            print_disassembly(
 656                chunk[:bytesDisassemblable], PRGAddr, asBytesToo, labels
 657            )
 658
 659            if bytesDisassemblable == 0:
 660                print(INDENT + "; chunk couldn't be disassembled")
 661            elif bytesDisassemblable < len_:
 662                print(INDENT + "; rest of chunk couldn't be disassembled")
 663
 664            # print the rest as bytes
 665            print_data_bytes_and_disassembly(chunk[bytesDisassemblable:])
 666
 667        else:
 668            # unread chunk; never disassemble as it would generate instructions
 669            # that some assemblers would try to optimize (e.g. "lda $00ff" ->
 670            # "lda $ff")
 671
 672            if omitUnreadChunks:
 673                print(INDENT + ".advance ^+{:d}".format(len_))
 674            else:
 675                print(INDENT + "; unread")
 676                print_data_bytes_and_disassembly(PRGROM[addr : addr + len_])
 677
 678        print()
 679
 680def str_to_ASCII(str_):
 681    return str_.encode("ascii", errors = "backslashreplace").decode("ascii")
 682
 683def main():
 684    if len(sys.argv) != 4:
 685        exit(HELP_TEXT)
 686
 687    (source, CDL, omitUnreadChunks) = sys.argv[1:]
 688
 689    try:
 690        omitUnreadChunks = int(omitUnreadChunks)
 691        if omitUnreadChunks not in (0, 1):
 692            raise ValueError
 693    except ValueError:
 694        exit("Invalid OmitUnreadBytes argument.")
 695
 696    # read iNES file
 697    try:
 698        with open(source, "rb") as hnd:
 699            sourceSize = hnd.seek(0, 2)
 700
 701            # get header info
 702            header = read_iNES_header(hnd)
 703
 704            # validate game type
 705            if not 1 <= header["prg"] <= 2:
 706                exit("Game must have 16 or 32 kB of PRG-ROM.")
 707
 708            # read PRG-ROM
 709            hnd.seek(16 + header["trainer"] * 512)
 710            PRGROM = hnd.read(header["prg"] * 16384)
 711    except FileNotFoundError:
 712        exit("iNES file not found.")
 713    except PermissionError:
 714        exit("iNES file permission denied.")
 715    except OSError:
 716        exit("Error reading iNES file.")
 717
 718    # read CDL file
 719    try:
 720        with open(CDL, "rb") as hnd:
 721            if hnd.seek(0, 2) != sourceSize - 16:
 722                exit("The .cdl file size doesn't match the .nes file size.")
 723
 724            CDLChunks = CDL_to_chunks(hnd, header["prg"] * 16384)
 725    except FileNotFoundError:
 726        exit("CDL file not found.")
 727    except PermissionError:
 728        exit("CDL file permission denied.")
 729    except OSError:
 730        exit("Error reading CDL file.")
 731
 732    # print names of source files
 733    print(INDENT + '; "{:s}" disassembled with "{:s}"'.format(
 734        str_to_ASCII(os.path.basename(source)),
 735        str_to_ASCII(os.path.basename(CDL))
 736    ))
 737    print()
 738
 739    # print NES register definitions
 740    print(INDENT + "; NES registers")
 741    maxRegNameLen = max(len(NES_REGS[reg]) for reg in NES_REGS)
 742    format_ = INDENT + ".alias {:" + str(maxRegNameLen) + "s} ${:04x}"
 743    for reg in sorted(NES_REGS):
 744        print(format_.format(NES_REGS[reg], reg))
 745    print()
 746
 747    # print iNES header disassembly
 748    print(INDENT + "; iNES header")
 749    disassemble_header(header)
 750    print()
 751
 752    # determine PRG-ROM origin address
 753    PRGOrg = (0xC000 if header["prg"] == 1 else 0x8000)
 754
 755    # create a list of label addresses by collecting addresses from operands
 756    # and discarding those that fall e.g. inside instructions
 757    labels = sorted(
 758        collect_labels(PRGROM, PRGOrg, CDLChunks) &
 759        collect_valid_labels(PRGROM, PRGOrg, CDLChunks)
 760    )
 761
 762    # get number of unread chunks and their total size
 763    unreadChunks = sum(1 for chunk in CDLChunks if chunk[2] == CDL_UNREAD)
 764    unreadBytes = sum(
 765        chunk[1] for chunk in CDLChunks if chunk[2] == CDL_UNREAD
 766    )
 767
 768    print(INDENT + "; PRG-ROM")
 769    print()
 770    for line in PRG_ROM_INTRO_LINES:
 771        print(INDENT + line)
 772    print()
 773    unreadPercentage = unreadBytes / (header["prg"] * 16384) * 100
 774    print(
 775        "{:s}; {:d} unread byte(s) ({:.2f}% of PRG-ROM) in {:d} chunk(s)"
 776        .format(INDENT, unreadBytes, unreadPercentage, unreadChunks)
 777    )
 778    print()
 779    print(INDENT + ".org ${:04x}".format(PRGOrg))
 780    print()
 781
 782    # the real disassembly
 783    disassemble_PRG_ROM(PRGROM, PRGOrg, CDLChunks, labels, omitUnreadChunks)
 784
 785    print(INDENT + "; Interrupt vectors")
 786    print()
 787    for i in range(3):
 788        vector = PRGROM[-6+i*2] + PRGROM[-5+i*2] * 0x100
 789        print("{:s}.word {:s}  ; {:s}".format(
 790            INDENT, create_label_reference(vector, labels), VECTOR_COMMENTS[i]
 791        ))
 792
 793    if header["chr"] > 0:
 794        print()
 795        print(INDENT + "; CHR-ROM")
 796        print()
 797        print(INDENT + ".org $0000")
 798        print(INDENT + '.incbin "CHR_ROM_FILE_HERE"')
 799        print(INDENT + ".advance ${:04x}".format(header["chr"] * 8192))
 800
 801if __name__ == "__main__":
 802    main()