mirror of
https://github.com/pkali/scorch_src.git
synced 2026-05-20 22:34:21 +02:00
5 bits workinggit status!
This commit is contained in:
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Pack Scorch talk texts into 5-bit stream.
|
||||
|
||||
Reads artwork/talk.asm and generates artwork/talk_packed.asm.
|
||||
|
||||
Design goals:
|
||||
- Keep the original artwork/talk.asm as editable source of strings.
|
||||
- Generate a MADS-friendly .asm include with:
|
||||
- .proc talk (namespace-compatible)
|
||||
- talk5_alphabet (32 chars)
|
||||
- talk5_data: records of [len][packed bytes...]
|
||||
- constants (NumberOfOffensiveTexts, etc.) copied verbatim
|
||||
- hoverFull/hoverEmpty blocks copied verbatim (uncompressed)
|
||||
|
||||
Bit packing:
|
||||
- 5-bit codes are packed LSB-first.
|
||||
- For each string record:
|
||||
- 1 byte length (0..63)
|
||||
- packed bytes little-endian (first char in bits 0..4)
|
||||
|
||||
The decoder in 6502 should read 5-bit codes from the low bits.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
|
||||
# 32-symbol alphabet.
|
||||
# Note: We intentionally omit 'X' to make room for punctuation.
|
||||
# Order must match the decoder table.
|
||||
ALPHABET = " ABCDEFGHIJKLMNOPQRSTUVWYZ'!,-.?" # length must be 32
|
||||
|
||||
|
||||
_DTA_STR_RE = re.compile(r"^\s*dta\s+d\"(.*?)\"\^\s*(?:;.*)?$")
|
||||
|
||||
|
||||
def _iter_lines(path: Path) -> List[str]:
|
||||
return path.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||
|
||||
|
||||
def _find_section(lines: List[str], start_pat: re.Pattern[str], end_pat: re.Pattern[str]) -> Tuple[int, int]:
|
||||
start_idx = None
|
||||
for i, line in enumerate(lines):
|
||||
if start_pat.search(line):
|
||||
start_idx = i
|
||||
break
|
||||
if start_idx is None:
|
||||
raise ValueError(f"Start pattern not found: {start_pat.pattern}")
|
||||
|
||||
for j in range(start_idx + 1, len(lines)):
|
||||
if end_pat.search(lines[j]):
|
||||
return start_idx, j
|
||||
raise ValueError(f"End pattern not found: {end_pat.pattern}")
|
||||
|
||||
|
||||
def extract_talk_strings(lines: List[str]) -> List[str]:
|
||||
# Only pack strings inside `.proc talk` up to the `LEND` marker.
|
||||
proc_start, _ = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
||||
|
||||
lend_idx = None
|
||||
for i in range(proc_start, len(lines)):
|
||||
if re.match(r"^\s*LEND\b", lines[i]):
|
||||
lend_idx = i
|
||||
break
|
||||
if lend_idx is None:
|
||||
raise ValueError("LEND marker not found inside .proc talk")
|
||||
|
||||
strings: List[str] = []
|
||||
for line in lines[proc_start:lend_idx]:
|
||||
m = _DTA_STR_RE.match(line)
|
||||
if m:
|
||||
strings.append(m.group(1))
|
||||
|
||||
if not strings:
|
||||
raise ValueError("No talk strings found to pack")
|
||||
|
||||
return strings
|
||||
|
||||
|
||||
def extract_constants_block(lines: List[str]) -> List[str]:
|
||||
# Copy constant definitions from after LEND up to `.endp` (inclusive of constants, exclusive of .endp).
|
||||
proc_start, proc_end = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
||||
|
||||
lend_idx = None
|
||||
for i in range(proc_start, proc_end + 1):
|
||||
if re.match(r"^\s*LEND\b", lines[i]):
|
||||
lend_idx = i
|
||||
break
|
||||
if lend_idx is None:
|
||||
raise ValueError("LEND marker not found inside .proc talk")
|
||||
|
||||
# Keep from LEND line through the line before `.endp`.
|
||||
return lines[lend_idx:proc_end]
|
||||
|
||||
|
||||
def extract_tail_after_talk_proc(lines: List[str]) -> List[str]:
|
||||
# Copy everything after `.endp` for talk proc. This includes hoverFull/hoverEmpty.
|
||||
_, proc_end = _find_section(lines, re.compile(r"^\s*\.proc\s+talk\b"), re.compile(r"^\s*\.endp\b"))
|
||||
return lines[proc_end + 1 :]
|
||||
|
||||
|
||||
def validate_alphabet() -> None:
|
||||
if len(ALPHABET) != 32:
|
||||
raise ValueError(f"ALPHABET must be 32 chars, got {len(ALPHABET)}")
|
||||
if len(set(ALPHABET)) != len(ALPHABET):
|
||||
raise ValueError("ALPHABET has duplicate characters")
|
||||
|
||||
|
||||
def pack_string_5bit(s: str, mapping: dict[str, int]) -> bytes:
|
||||
if len(s) > 63:
|
||||
raise ValueError(f"String too long ({len(s)}): {s!r}")
|
||||
|
||||
out = bytearray()
|
||||
out.append(len(s) & 0xFF)
|
||||
|
||||
bitbuf = 0
|
||||
bitcount = 0
|
||||
|
||||
for ch in s:
|
||||
try:
|
||||
code = mapping[ch]
|
||||
except KeyError as e:
|
||||
raise ValueError(f"Character {ch!r} not in alphabet") from e
|
||||
|
||||
bitbuf |= (code & 0x1F) << bitcount
|
||||
bitcount += 5
|
||||
|
||||
while bitcount >= 8:
|
||||
out.append(bitbuf & 0xFF)
|
||||
bitbuf >>= 8
|
||||
bitcount -= 8
|
||||
|
||||
if bitcount:
|
||||
out.append(bitbuf & 0xFF)
|
||||
|
||||
return bytes(out)
|
||||
|
||||
|
||||
def format_dta_bytes(data: bytes, indent: str = " ", per_line: int = 16) -> List[str]:
|
||||
lines: List[str] = []
|
||||
for i in range(0, len(data), per_line):
|
||||
chunk = data[i : i + per_line]
|
||||
nums = ",".join(f"${b:02x}" for b in chunk)
|
||||
lines.append(f"{indent}dta b({nums})")
|
||||
return lines
|
||||
|
||||
|
||||
def generate_output(
|
||||
source_path: Path,
|
||||
strings: List[str],
|
||||
constants_block: List[str],
|
||||
tail_lines: List[str],
|
||||
) -> str:
|
||||
mapping = {ch: i for i, ch in enumerate(ALPHABET)}
|
||||
|
||||
packed_records: List[bytes] = [pack_string_5bit(s, mapping) for s in strings]
|
||||
|
||||
out_lines: List[str] = []
|
||||
out_lines.append("; AUTO-GENERATED FILE - DO NOT EDIT")
|
||||
out_lines.append(f"; Generated by {source_path.name} -> pack_talk5.py")
|
||||
out_lines.append("; Source: artwork/talk.asm")
|
||||
out_lines.append("")
|
||||
|
||||
out_lines.append(".proc talk")
|
||||
out_lines.append("; 5-bit packed talk strings (len + packed bytes)")
|
||||
out_lines.append(f"talk5_alphabet dta d\"{ALPHABET}\"")
|
||||
out_lines.append("talk5_data")
|
||||
|
||||
for rec in packed_records:
|
||||
out_lines.extend(format_dta_bytes(rec))
|
||||
|
||||
out_lines.append(";")
|
||||
out_lines.append("; Constants copied from source")
|
||||
out_lines.extend(constants_block)
|
||||
out_lines.append(".endp")
|
||||
|
||||
if tail_lines:
|
||||
out_lines.append("")
|
||||
out_lines.append("; Tail copied from source (uncompressed)")
|
||||
out_lines.extend(tail_lines)
|
||||
|
||||
out_lines.append("")
|
||||
return "\n".join(out_lines)
|
||||
|
||||
|
||||
def main(argv: List[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description="Pack artwork/talk.asm into a 5-bit blob for MADS.")
|
||||
parser.add_argument(
|
||||
"--src",
|
||||
default="artwork/talk.asm",
|
||||
help="Path to source talk.asm (default: artwork/talk.asm)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default="artwork/talk_packed.asm",
|
||||
help="Path to output .asm include (default: artwork/talk_packed.asm)",
|
||||
)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
validate_alphabet()
|
||||
|
||||
src_path = Path(args.src)
|
||||
out_path = Path(args.out)
|
||||
|
||||
lines = _iter_lines(src_path)
|
||||
strings = extract_talk_strings(lines)
|
||||
constants_block = extract_constants_block(lines)
|
||||
tail_lines = extract_tail_after_talk_proc(lines)
|
||||
|
||||
content = generate_output(src_path, strings, constants_block, tail_lines)
|
||||
|
||||
out_path.write_text(content, encoding="utf-8")
|
||||
print(f"Wrote {out_path} ({len(content.encode('utf-8'))} bytes text)")
|
||||
print(f"Packed {len(strings)} strings")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user