mirror of
https://github.com/Pecusx/scorch_src.git
synced 2026-05-20 22:33:43 +02:00
225 lines
6.8 KiB
Python
225 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Pack Scorch talk texts into 5-bit stream.
|
|
|
|
Reads artwork/talk.asm and generates artwork/talk_packed.asm.
|
|
|
|
Design goals:
|
|
- Keep the original artwork/talk.asm as editable source of strings.
|
|
- Generate a MADS-friendly .asm include with:
|
|
- .proc talk (namespace-compatible)
|
|
- talk5_alphabet (32 chars)
|
|
- talk5_data: records of [len][packed bytes...]
|
|
- constants (NumberOfOffensiveTexts, etc.) copied verbatim
|
|
- hoverFull/hoverEmpty blocks copied verbatim (uncompressed)
|
|
|
|
Bit packing:
|
|
- 5-bit codes are packed LSB-first.
|
|
- For each string record:
|
|
- 1 byte length (0..63)
|
|
- packed bytes little-endian (first char in bits 0..4)
|
|
|
|
The decoder in 6502 should read 5-bit codes from the low bits.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Iterable, List, Tuple
|
|
|
|
|
|
# 32-symbol alphabet.
|
|
# Note: We intentionally omit 'X' to make room for punctuation.
|
|
# Order must match the decoder table.
|
|
ALPHABET = " ABCDEFGHIJKLMNOPQRSTUVWYZ'!,-.?" # length must be 32
|
|
|
|
|
|
_DTA_STR_RE = re.compile(r"^\s*dta\s+d\"(.*?)\"\^\s*(?:;.*)?$")
|
|
|
|
|
|
def _iter_lines(path: Path) -> List[str]:
|
|
return path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
|
|
|
|
def _find_section(lines: List[str], start_pat: re.Pattern[str], end_pat: re.Pattern[str]) -> Tuple[int, int]:
|
|
start_idx = None
|
|
for i, line in enumerate(lines):
|
|
if start_pat.search(line):
|
|
start_idx = i
|
|
break
|
|
if start_idx is None:
|
|
raise ValueError(f"Start pattern not found: {start_pat.pattern}")
|
|
|
|
for j in range(start_idx + 1, len(lines)):
|
|
if end_pat.search(lines[j]):
|
|
return start_idx, j
|
|
raise ValueError(f"End pattern not found: {end_pat.pattern}")
|
|
|
|
|
|
def extract_talk_strings(lines: List[str]) -> List[str]:
|
|
# Only pack strings inside `.proc talk` up to the `LEND` marker.
|
|
proc_start, _ = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
|
|
|
lend_idx = None
|
|
for i in range(proc_start, len(lines)):
|
|
if re.match(r"^\s*LEND\b", lines[i]):
|
|
lend_idx = i
|
|
break
|
|
if lend_idx is None:
|
|
raise ValueError("LEND marker not found inside .proc talk")
|
|
|
|
strings: List[str] = []
|
|
for line in lines[proc_start:lend_idx]:
|
|
m = _DTA_STR_RE.match(line)
|
|
if m:
|
|
strings.append(m.group(1))
|
|
|
|
if not strings:
|
|
raise ValueError("No talk strings found to pack")
|
|
|
|
return strings
|
|
|
|
|
|
def extract_constants_block(lines: List[str]) -> List[str]:
|
|
# Copy constant definitions from after LEND up to `.endp` (inclusive of constants, exclusive of .endp).
|
|
proc_start, proc_end = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
|
|
|
lend_idx = None
|
|
for i in range(proc_start, proc_end + 1):
|
|
if re.match(r"^\s*LEND\b", lines[i]):
|
|
lend_idx = i
|
|
break
|
|
if lend_idx is None:
|
|
raise ValueError("LEND marker not found inside .proc talk")
|
|
|
|
# Keep from LEND line through the line before `.endp`.
|
|
return lines[lend_idx:proc_end]
|
|
|
|
|
|
def extract_tail_after_talk_proc(lines: List[str]) -> List[str]:
|
|
# Copy everything after `.endp` for talk proc. This includes hoverFull/hoverEmpty.
|
|
_, proc_end = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
|
return lines[proc_end + 1 :]
|
|
|
|
|
|
def validate_alphabet() -> None:
|
|
if len(ALPHABET) != 32:
|
|
raise ValueError(f"ALPHABET must be 32 chars, got {len(ALPHABET)}")
|
|
if len(set(ALPHABET)) != len(ALPHABET):
|
|
raise ValueError("ALPHABET has duplicate characters")
|
|
|
|
|
|
def pack_string_5bit(s: str, mapping: dict[str, int]) -> bytes:
|
|
if len(s) > 63:
|
|
raise ValueError(f"String too long ({len(s)}): {s!r}")
|
|
|
|
out = bytearray()
|
|
out.append(len(s) & 0xFF)
|
|
|
|
bitbuf = 0
|
|
bitcount = 0
|
|
|
|
for ch in s:
|
|
try:
|
|
code = mapping[ch]
|
|
except KeyError as e:
|
|
raise ValueError(f"Character {ch!r} not in alphabet") from e
|
|
|
|
bitbuf |= (code & 0x1F) << bitcount
|
|
bitcount += 5
|
|
|
|
while bitcount >= 8:
|
|
out.append(bitbuf & 0xFF)
|
|
bitbuf >>= 8
|
|
bitcount -= 8
|
|
|
|
if bitcount:
|
|
out.append(bitbuf & 0xFF)
|
|
|
|
return bytes(out)
|
|
|
|
|
|
def format_dta_bytes(data: bytes, indent: str = " ", per_line: int = 16) -> List[str]:
|
|
lines: List[str] = []
|
|
for i in range(0, len(data), per_line):
|
|
chunk = data[i : i + per_line]
|
|
nums = ",".join(f"${b:02x}" for b in chunk)
|
|
lines.append(f"{indent}dta b({nums})")
|
|
return lines
|
|
|
|
|
|
def generate_output(
|
|
source_path: Path,
|
|
strings: List[str],
|
|
constants_block: List[str],
|
|
tail_lines: List[str],
|
|
) -> str:
|
|
mapping = {ch: i for i, ch in enumerate(ALPHABET)}
|
|
|
|
packed_records: List[bytes] = [pack_string_5bit(s, mapping) for s in strings]
|
|
|
|
out_lines: List[str] = []
|
|
out_lines.append("; AUTO-GENERATED FILE - DO NOT EDIT")
|
|
out_lines.append(f"; Generated by {source_path.name} -> pack_talk5.py")
|
|
out_lines.append("; Source: artwork/talk.asm")
|
|
out_lines.append("")
|
|
|
|
out_lines.append(".proc talk")
|
|
out_lines.append("; 5-bit packed talk strings (len + packed bytes)")
|
|
out_lines.append(f"talk5_alphabet dta d\"{ALPHABET}\"")
|
|
out_lines.append("talk5_data")
|
|
|
|
for rec in packed_records:
|
|
out_lines.extend(format_dta_bytes(rec))
|
|
|
|
out_lines.append(";")
|
|
out_lines.append("; Constants copied from source")
|
|
out_lines.extend(constants_block)
|
|
out_lines.append(".endp")
|
|
|
|
if tail_lines:
|
|
out_lines.append("")
|
|
out_lines.append("; Tail copied from source (uncompressed)")
|
|
out_lines.extend(tail_lines)
|
|
|
|
out_lines.append("")
|
|
return "\n".join(out_lines)
|
|
|
|
|
|
def main(argv: List[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(description="Pack artwork/talk.asm into a 5-bit blob for MADS.")
|
|
parser.add_argument(
|
|
"--src",
|
|
default="artwork/talk.asm",
|
|
help="Path to source talk.asm (default: artwork/talk.asm)",
|
|
)
|
|
parser.add_argument(
|
|
"--out",
|
|
default="artwork/talk_packed.asm",
|
|
help="Path to output .asm include (default: artwork/talk_packed.asm)",
|
|
)
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
validate_alphabet()
|
|
|
|
src_path = Path(args.src)
|
|
out_path = Path(args.out)
|
|
|
|
lines = _iter_lines(src_path)
|
|
strings = extract_talk_strings(lines)
|
|
constants_block = extract_constants_block(lines)
|
|
tail_lines = extract_tail_after_talk_proc(lines)
|
|
|
|
content = generate_output(src_path, strings, constants_block, tail_lines)
|
|
|
|
out_path.write_text(content, encoding="utf-8")
|
|
print(f"Wrote {out_path} ({len(content.encode('utf-8'))} bytes text)")
|
|
print(f"Packed {len(strings)} strings")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|