~quf/tocs

34c71793be14532adc59ee4ea772becc3e6ab4a5 — Lukas Himbert 2 months ago dfd8ad9
roundtrip reverie battle scripts
A tocs/src/aldat/deser.rs => tocs/src/aldat/deser.rs +238 -0
@@ 0,0 1,238 @@
use super::{ActionTableEntry, AlDat, AlDatEntry, AlgoTableEntry};

use crate::io::MemReader;

/// Checks the given condition and returns `None` if it is false.
macro_rules! ensure {
    ($cond:expr) => {
        if !($cond) {
            return None;
        }
    };
}

fn remove_end_padding_from_entry_data(padded: &[u8]) -> Option<&[u8]> {
    // padding is a 1 byte followed by some number of zero bytes; we check this from the back
    for (i, x) in padded.iter().copied().enumerate().rev() {
        match x {
            0 => {}                         // still padding
            1 => return Some(&padded[..i]), // start of padding
            _ => return None,               // invalid padding
        }
    }
    None
}

pub fn parse(data: &[u8]) -> Option<AlDat> {
    let mut reader = MemReader::new(data);

    // check magic bytes
    let a = reader.read_u32_le()?;
    let b = reader.read_u32_le()?;

    ensure!(a == 0x20);
    ensure!(b == 0x20);

    // number of tables and a bunch of useless stuff
    let table_list_offset = reader.read_u32_le()?;
    let n1 = reader.read_u32_le()?;
    let n2 = reader.read_u32_le()?;
    let n_tables_u32 = reader.read_u32_le()?;
    let n_tables: usize = n_tables_u32.try_into().unwrap();
    let table_names_end_offset = reader.read_u32_le()?;

    ensure!(n1 == 4 * n_tables_u32);
    ensure!(n2 == table_list_offset + n1);

    // read more
    let unk2 = reader.read_chunk::<4>()?;
    let script_name = reader.read_c_ascii_str().ok()?;

    // read offsets of table names and data
    reader.set_offset(usize::try_from(table_list_offset).unwrap());

    let mut table_name_offsets = std::vec::Vec::with_capacity(n_tables);
    let mut table_data_offsets = std::vec::Vec::with_capacity(n_tables);

    for _ in 0..n_tables {
        let off = reader.read_u32_le()?;
        ensure!(off % 4 == 0);
        table_data_offsets.push(off);
    }
    for _ in 0..n_tables {
        table_name_offsets.push(reader.read_u16_le()?);
    }

    // read table names and contents, record alignments
    let mut tables = std::vec::Vec::with_capacity(n_tables);
    let mut alignment = std::collections::BTreeMap::new();
    for i in 0..n_tables {
        println!("start {i}");
        let name = reader.clone_at_offset(table_name_offsets[i].try_into().unwrap()).read_c_ascii_str().ok()?;

        // we assume the table data is always sorted. it seems to work out.
        let data_start = usize::try_from(table_data_offsets[i]).unwrap();
        let data_end = table_data_offsets.get(i + 1).copied().map(|n| usize::try_from(n).unwrap()).unwrap_or(data.len());
        let table_data_including_terminator = data.get(data_start..data_end)?;

        println!(
            "data {}; next one: {}, offset 0x{:x}",
            name.to_str(),
            reader
                .clone_at_offset(table_name_offsets[i].try_into().unwrap())
                .read_c_ascii_str()
                .ok()
                .map(|s| s.to_str())
                .unwrap_or(""),
            table_data_offsets.get(i + 1).map(|x| *x as usize).unwrap_or(data.len())
        );
        let table_data = remove_end_padding_from_entry_data(table_data_including_terminator)?;
        //println!("foo {i}");

        let table = match name.to_str() {
            /*
            "ActionTable" => AlDatEntry::ActionTable(read_action_table_entries(table_data)?),
            "AlgoTable" => AlDatEntry::ActionTable(read_action_table_entries(table_data)?),
            */
            _ => AlDatEntry::Generic {
                name: name.to_owned(),
                data: table_data.to_owned(),
            },
        };

        let align = largest_power_of_two_that_divides(data_start);
        alignment.entry(name.to_owned()).and_modify(|a| *a = std::cmp::min(*a, align)).or_insert(align);

        tables.push(table);
    }

    // check that entry names are densely packed
    if let Some(start_offset) = table_name_offsets.first().copied() {
        let combined_length_of_entry_names = tables.iter().map(|ent| ent.name_len_with_padding()).sum::<usize>();
        ensure!(table_names_end_offset == start_offset as u32 + combined_length_of_entry_names as u32);
    }

    Some(AlDat {
        unk2,
        alignment,
        name: script_name.to_owned(),
        entries: tables,
    })
}

fn largest_power_of_two_that_divides(n: usize) -> usize {
    1 << n.trailing_zeros()
}

pub fn read_action_table_entries(data: &[u8]) -> Option<std::vec::Vec<ActionTableEntry>> {
    let mut reader = MemReader::new(data);
    let mut entries = std::vec::Vec::with_capacity(data.len() / 212);

    loop {
        let id = reader.read_i16_le()?;
        if id == 1 {
            ensure!(reader.rest() == &[0, 0]);
            break;
        }

        let unk1 = reader.read_u16_le()?;
        let unk2 = reader.read_chunk::<4>()?;
        let unk3 = reader.read_f32_le()?;
        let unk4 = reader.read_f32_le()?;
        let unk5 = reader.read_f32_le()?;

        let unk10 = reader.read_chunk::<4>()?;

        let unk11 = reader.read_u16_le()?;
        let unk12 = reader.read_u16_le()?;
        let unk13 = reader.read_u16_le()?;
        let unk14 = reader.read_u16_le()?;
        let unk15 = reader.read_u16_le()?;
        let unk16 = reader.read_u16_le()?;

        let unk17: [i32; 15] = [
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
            reader.read_i32_le()?,
        ];

        let unk18 = reader.read_u16_le()?;
        let unk19 = reader.read_u16_le()?;

        let flags = reader.read_padded_ascii_str::<16>().ok()?.to_owned();
        let animation = reader.read_padded_utf8_str::<32>().ok()?.to_owned();
        let name = reader.read_padded_utf8_str::<64>().ok()?.to_owned();

        entries.push(ActionTableEntry {
            id,
            unk1,
            unk2,
            unk3,
            unk4,
            unk5,
            unk10,
            unk11,
            unk12,
            unk13,
            unk14,
            unk15,
            unk16,
            unk17,
            unk18,
            unk19,
            flags,
            animation,
            name,
        });
    }

    Some(entries)
}

pub fn read_algo_table_entries(data: &[u8]) -> Option<std::vec::Vec<AlgoTableEntry>> {
    let mut reader = MemReader::new(data);
    let mut entries = std::vec::Vec::with_capacity(data.len() / 32);

    while reader.offset() + 32 < data.len() {
        let action_or_magic_id = reader.read_i16_le()?;
        let condition = reader.read_byte()?;
        let chance = reader.read_byte()?;
        let use_limit = reader.read_byte()?;
        let target_type = reader.read_byte()?;
        let unk_byte_1 = reader.read_byte()?;
        let unk_byte_2 = reader.read_byte()?;
        let mut params = [0u32; 6];
        for x in params.iter_mut() {
            *x = reader.read_u32_le()?;
        }

        entries.push(AlgoTableEntry {
            action_or_magic_id,
            condition,
            chance,
            use_limit,
            target_type,
            unk_byte_1,
            unk_byte_2,
            params,
        });
    }

    // check final padding
    ensure!(reader.read_byte()? == 1);
    ensure!(reader.rest().iter().copied().all(|b| b == 0));

    Some(entries)
}

A tocs/src/aldat/mod.rs => tocs/src/aldat/mod.rs +84 -0
@@ 0,0 1,84 @@
mod deser;
mod ser;

#[cfg(all(test, feature = "private-tests"))]
mod test;

pub use deser::parse;
pub use ser::serialize;

use crate::io::{PaddedAsciiStr, PaddedAsciiString, PaddedUtf8String};

#[derive(Debug)]
pub struct AlDat {
    unk2: [u8; 4],
    name: PaddedAsciiString<1>,
    alignment: std::collections::BTreeMap<PaddedAsciiString<1>, usize>, // TODO: builtin known alignments
    entries: std::vec::Vec<AlDatEntry>,                                 // TODO: BTreeSet? (with name as key)
}

#[derive(Debug)]
pub enum AlDatEntry {
    ActionTable(std::vec::Vec<ActionTableEntry>),
    AlgoTable(std::vec::Vec<AlgoTableEntry>),
    Generic { name: PaddedAsciiString<1>, data: std::vec::Vec<u8> },
}

impl AlDatEntry {
    pub fn name_len_with_padding(&self) -> usize {
        match self {
            AlDatEntry::ActionTable { .. } => 12,
            AlDatEntry::AlgoTable { .. } => 10,
            AlDatEntry::Generic { name, .. } => name.len_with_padding().get(),
        }
    }

    pub fn name(&self) -> PaddedAsciiStr<'_, 1> {
        match self {
            // TODO: constify PaddedAsciiStr
            AlDatEntry::ActionTable { .. } => PaddedAsciiStr::new("ActionTable").unwrap(),
            AlDatEntry::AlgoTable { .. } => PaddedAsciiStr::new("AlgoTable").unwrap(),
            AlDatEntry::Generic { name, .. } => name.as_padded_ascii_str(),
        }
    }
}

#[derive(Debug)]
pub struct ActionTableEntry {
    pub id: i16,
    pub unk1: u16,
    pub unk2: [u8; 4],
    pub unk3: f32,
    pub unk4: f32,
    pub unk5: f32,

    pub unk10: [u8; 4],

    pub unk11: u16,
    pub unk12: u16,
    pub unk13: u16,
    pub unk14: u16,
    pub unk15: u16,
    pub unk16: u16,

    pub unk17: [i32; 15],

    pub unk18: u16,
    pub unk19: u16,

    pub flags: PaddedAsciiString<16>,
    pub animation: PaddedUtf8String<32>,
    pub name: PaddedUtf8String<64>,
}

#[derive(Debug)]
pub struct AlgoTableEntry {
    pub action_or_magic_id: i16,
    pub condition: u8,
    pub chance: u8,
    pub use_limit: u8,
    pub target_type: u8,
    pub unk_byte_1: u8,
    pub unk_byte_2: u8,
    pub params: [u32; 6],
}

A tocs/src/aldat/ser.rs => tocs/src/aldat/ser.rs +156 -0
@@ 0,0 1,156 @@
use super::{ActionTableEntry, AlDat, AlDatEntry, AlgoTableEntry};
use crate::io::MemWriter;

pub fn serialize(aldat: &AlDat) -> Option<std::vec::Vec<u8>> {
    let mut writer = MemWriter::with_capacity(1 << 16);

    // magic header or something like that
    writer.write_data(&[0x20, 0, 0, 0, 0x20, 0, 0, 0]);

    // offset of the table list - the name is padded such that it is a multiple of 4
    let padding_before_table_list = {
        // for no discernible reason, the padding is at least 3 and the last pad byte is 0xff
        let n = aldat.name.len_with_padding().get() + 1; // + 1 for the 0xff byte which is there for some reason???
        let mut p = 3;
        // TODO: make this better
        while ((n + p) % 4) != 0 {
            p += 1;
        }
        p
    };

    let table_list_offset: usize = 33 + aldat.name.len_with_padding().get() + padding_before_table_list;
    writer.write_u32_le(table_list_offset.try_into().unwrap());

    // redundant stuff
    let n1 = 4 * aldat.entries.len();
    writer.write_u32_le(n1.try_into().unwrap());
    writer.write_u32_le((table_list_offset + n1).try_into().unwrap());

    // number of tables
    writer.write_u32_le(aldat.entries.len().try_into().unwrap());

    // calculate table data offsets and name offsets
    let table_names_offset: usize = table_list_offset + 6 * aldat.entries.len();
    let table_names_end_offset = table_names_offset + aldat.entries.iter().map(|ent| ent.name_len_with_padding()).sum::<usize>();
    let table_data_padding = if table_names_end_offset % 4 == 0 { 0 } else { 4 - table_names_end_offset % 4 };
    let table_data_offset = table_names_end_offset + table_data_padding;
    writer.write_u32_le(table_names_end_offset.try_into().unwrap());

    // unknown
    writer.write_data(aldat.unk2.as_slice());

    // script name
    writer.write_padded_ascii_str(aldat.name.as_padded_ascii_str());

    // padding
    if padding_before_table_list != 0 {
        let buf = [0u8; 8];
        writer.write_data(&buf[..padding_before_table_list]);
    }
    writer.write_u8(0xff);

    {
        // table data offsets
        let mut off = table_data_offset;
        for (i, entry) in aldat.entries.iter().enumerate() {
            writer.write_u32_le(off.try_into().ok()?);
            off += entry.data_len();

            if let Some(next_entry) = aldat.entries.get(i + 1) {
                let pad_target = aldat.alignment.get(&next_entry.name().to_owned()).copied()?; // TODO: datastructure that doesn't require cloning
                let padding_len = if off % pad_target == 0 { pad_target } else { pad_target - off % pad_target };
                off += padding_len;
            } else {
                off += 1
            }
        }
    }

    {
        // table name offsets
        let mut off = table_names_offset;
        for ent in aldat.entries.iter() {
            writer.write_u16_le(off.try_into().ok()?);
            off += ent.name_len_with_padding();
        }
    }

    // table names
    for ent in aldat.entries.iter() {
        writer.write_padded_ascii_str(ent.name());
    }

    // pad start of data blocks to multiple of 4
    {
        let buf = [0u8; 4];
        writer.write_data(&buf[..table_data_padding]);
    }

    // write table data
    for (i, entry) in aldat.entries.iter().enumerate() {
        entry.serialize_data(&mut writer);
        // padding
        if let Some(next_entry) = aldat.entries.get(i + 1) {
            let pad_target = aldat.alignment.get(&next_entry.name().to_owned()).copied()?; // TODO: datastructure that doesn't require copying
            let n = writer.len();
            let padding_len = pad_target - n % pad_target;
            assert!(padding_len > 0);
            writer.write_u8(1);
            for i in 1..padding_len {
                writer.write_u8(0);
            }
        } else {
            // last entry
            writer.write_u8(1);
        }
    }

    // TODO: final padding
    if aldat.entries.is_empty() {}

    Some(writer.into_inner())
}

impl AlDatEntry {
    fn serialize_data(&self, writer: &mut MemWriter) -> Option<()> {
        match self {
            AlDatEntry::ActionTable(entries) => {
                for entry in entries {
                    entry.serialize_data(writer)?;
                }
            }
            AlDatEntry::AlgoTable(entries) => {
                for entry in entries {
                    entry.serialize_data(writer)?;
                }
            }
            AlDatEntry::Generic { name: _, data } => {
                writer.write_data(data.as_slice());
            }
        }
        Some(())
    }
}

impl ActionTableEntry {
    fn serialize_data(&self, vec: &mut MemWriter) -> Option<()> {
        todo!()
    }
}

impl AlgoTableEntry {
    fn serialize_data(&self, vec: &mut MemWriter) -> Option<()> {
        todo!()
    }
}

impl AlDatEntry {
    fn data_len(&self) -> usize {
        match self {
            AlDatEntry::ActionTable { .. } => 212,
            AlDatEntry::AlgoTable { .. } => 32,
            AlDatEntry::Generic { name: _, data } => data.len(),
        }
    }
}

A tocs/src/aldat/test.rs => tocs/src/aldat/test.rs +58 -0
@@ 0,0 1,58 @@
use super::{parse, serialize};

fn compare_data(expected: &[u8], actual: &[u8]) {
    for i in 0..std::cmp::max(expected.len(), actual.len()) {
        let x1 = expected.get(i);
        let x2 = actual.get(i);
        if x1 != x2 {
            panic!("results differ, first difference at {i} (0x{i:x}). expected {x1:?}, found {x2:?}");
        }
    }
}

#[test]
fn test_battle_scripts_reverie() {
    let dir = "private-test-data/cs5/nisa-pc-1.0.6/scripts/battle/dat_en/";
    //let dir = "private-test-data/cs5/nisa-pc-1.0.6/scripts/scena/dat_en/";
    //let dir = "private-test-data/cs5/nisa-pc-1.0.6/scripts/ani/dat_en/";
    for dirent in std::fs::read_dir(dir).unwrap() {
        let path = dirent.unwrap().path();
        println!("{}", path.display());
        let data = std::fs::read(path).unwrap();
        let aldat = parse(data.as_slice()).unwrap();
        //println!("{:?}", aldat);
        let roundtripped = serialize(&aldat).unwrap();
        std::fs::write("/tmp/foo.tmp", &roundtripped).unwrap();
        compare_data(&data, &roundtripped);
    }
}

#[test]
fn test_tmp() {
    /*
    let dir = "private-test-data/cs5/nisa-pc-1.0.6/scripts/battle/dat_en/";
    let skip = [
        "btl1000.dat",
        "btl1001.dat",
        "btlwin.dat",
        "btl_00_01_02.dat",
        "btl_A1_11_01.dat",
        "btl_A4_20_01.dat",
        "btl_TU_00_00_00.dat",
        "btl_TU_00_02_00.dat",
        "btl_TU_00_03_00.dat",
        "btl_TU_A1_03_00.dat",
        "btl_TU_B1_06_00.dat",
        "btlsys.dat",
    ];
    for dirent in std::fs::read_dir(dir).unwrap() {
        let path = dirent.unwrap().path();
        if skip.iter().any(|f| path.ends_with(f)) {
            println!("{}", path.display());
            let data = std::fs::read(path).unwrap();
            assert!(parse(data.as_slice()).is_none());
        }
    }
    todo!()
    */
}

M tocs/src/io/mem_reader.rs => tocs/src/io/mem_reader.rs +6 -3
@@ 14,12 14,17 @@ impl<'a> MemReader<'a> {
        self.off
    }

    #[allow(dead_code)]
    pub fn set_offset(&mut self, off: usize) {
        assert!(off <= self.data.len());
        self.off = off;
    }

    pub fn clone_at_offset(&mut self, off: usize) -> Self {
        let mut result = self.clone();
        result.set_offset(off);
        result
    }

    pub fn at_end(&self) -> bool {
        self.off == self.data.len()
    }


@@ 81,7 86,6 @@ impl<'a> MemReader<'a> {
        Ok(bytes)
    }

    #[allow(dead_code)]
    pub fn read_padded_ascii_str<const PADDING: usize>(&mut self) -> Result<super::PaddedAsciiStr<'a, PADDING>, super::PaddedStringReadError> {
        let (ascii, _rest) = super::PaddedAsciiStr::<'_, PADDING>::read_from_slice(self.rest())?;
        self.off += ascii.len_with_padding().get();


@@ 89,7 93,6 @@ impl<'a> MemReader<'a> {
        Ok(ascii)
    }

    #[allow(dead_code)]
    pub fn read_padded_utf8_str<const PADDING: usize>(&mut self) -> Result<super::PaddedUtf8Str<'a, PADDING>, super::PaddedStringReadError> {
        let (utf8, _rest) = super::PaddedUtf8Str::<'_, PADDING>::read_from_slice(self.rest())?;
        self.off += utf8.len_with_padding().get();

A tocs/src/io/mem_writer.rs => tocs/src/io/mem_writer.rs +65 -0
@@ 0,0 1,65 @@
use super::{PaddedAsciiStr, PaddedByteStr, PaddedUtf8Str};

pub struct MemWriter {
    inner: std::vec::Vec<u8>,
}

impl MemWriter {
    pub fn new() -> Self {
        Self::with_capacity(0)
    }

    pub fn with_capacity(cap: usize) -> Self {
        MemWriter {
            inner: std::vec::Vec::with_capacity(cap),
        }
    }

    pub fn len(&self) -> usize {
        self.inner.len()
    }

    pub fn into_inner(self) -> std::vec::Vec<u8> {
        let MemWriter { inner } = self;
        inner
    }
}

macro_rules! write_primitive {
    ($f:ident, $t:ty) => {
        #[inline(always)]
        pub fn $f(&mut self, x: $t) {
            self.inner.extend_from_slice(&x.to_le_bytes());
        }
    };
}

impl MemWriter {
    write_primitive! {write_u8, u8}
    write_primitive! {write_i8, i8}
    write_primitive! {write_u16_le, u16}
    write_primitive! {write_u32_le, u32}
    write_primitive! {write_i16_le, i16}
    write_primitive! {write_i32_le, i32}
    write_primitive! {write_f32_le, f32}
}

impl MemWriter {
    pub fn write_data(&mut self, s: &[u8]) {
        self.inner.extend_from_slice(s);
    }

    pub fn write_padded_byte_str<const PADDING: usize>(&mut self, s: PaddedByteStr<'_, PADDING>) {
        s.write_to(&mut self.inner).unwrap(); // writing to vec should not fail
    }

    #[allow(dead_code)]
    pub fn write_padded_ascii_str<const PADDING: usize>(&mut self, s: PaddedAsciiStr<'_, PADDING>) {
        s.write_to(&mut self.inner).unwrap();
    }

    #[allow(dead_code)]
    pub fn write_padded_utf8_str<const PADDING: usize>(&mut self, s: PaddedUtf8Str<'_, PADDING>) {
        s.write_to(&mut self.inner).unwrap();
    }
}

M tocs/src/io/mod.rs => tocs/src/io/mod.rs +3 -1
@@ 8,10 8,12 @@ pub use bytestring::{PaddedByteStr, PaddedByteString};
pub use utf8::{PaddedUtf8Str, PaddedUtf8String};

mod mem_reader;
mod mem_writer;

pub use mem_reader::MemReader;
pub use mem_writer::MemWriter;

// TODO: std::io::Read reader; MemWriter; maybe std::io::Write writer?
// TODO: std::io::Read reader; maybe std::io::Write writer?

pub fn read_byte(f: impl std::io::Read) -> Result<u8, std::io::Error> {
    let a: [u8; 1] = read_bytes(f)?;

M tocs/src/lib.rs => tocs/src/lib.rs +2 -0
@@ 14,3 14,5 @@ pub mod pka;
pub mod pkg;

pub mod tbl;

pub mod aldat;