~quf/libcs3tio

1d04dc73d0f2d16e360226c3263ce9aeaeca68fa — Lukas Himbert 7 months ago f995057
fix errors in english and french tbls for testing
5 files changed, 200 insertions(+), 7 deletions(-)

M .builds/arch-clang.yml
M .builds/arch-gcc.yml
M compile.sh
A test/fixtbls.cpp
M test/roundtrip.cpp
M .builds/arch-clang.yml => .builds/arch-clang.yml +3 -0
@@ 18,6 18,9 @@ tasks:
      mkdir test-data
      cd test-data
      gpg --decrypt --passphrase-file "$HOME/data-v1.0.5-password" --batch --pinentry-mode loopback < ../.builds/data-v1.0.5.tar.xz.gpg | xzcat | tar xf -
  - fix-data: |
      cd libcs3tio
      build/test/fixtbls test-data/text/
  - test-roundtrip: |
      cd libcs3tio
      build/test/roundtrip test-data/text/

M .builds/arch-gcc.yml => .builds/arch-gcc.yml +3 -0
@@ 18,6 18,9 @@ tasks:
      mkdir test-data
      cd test-data
      gpg --decrypt --passphrase-file "$HOME/data-v1.0.5-password" --batch --pinentry-mode loopback < ../.builds/data-v1.0.5.tar.xz.gpg | xzcat | tar xf -
  - fix-data: |
      cd libcs3tio
      build/test/fixtbls test-data/text/
  - test-roundtrip: |
      cd libcs3tio
      build/test/roundtrip test-data/text/

M compile.sh => compile.sh +1 -1
@@ 18,7 18,7 @@ done

ar cr build/output/libcs3tio.a build/obj/*.o build/obj-ksp/*.o

for x in compiles duplicates header_names tio roundtrip; do
for x in compiles duplicates fixtbls header_names tio roundtrip; do
  "$CXX" -std=c++20 -W -Wall -Werror -Wfatal-errors -fsanitize=address,undefined -I src test/$x.cpp build/output/libcs3tio.a -o build/test/$x
  #build/test/$x
done

A test/fixtbls.cpp => test/fixtbls.cpp +192 -0
@@ 0,0 1,192 @@
// certain .tbl files in the localized versions have incorrect length bytes. this program fixes them.

#include <array>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <streambuf>
#include <vector>

struct Fix {
  const char *dir;
  const char *name;
  size_t pos;
  uint16_t expected;
  uint16_t fix;
};

const std::array<Fix, 88> fixlist = {
    Fix{.dir = "dat_en", .name = "t_name.tbl", .pos = 22392, .expected = 87, .fix = 85},
    Fix{.dir = "dat_en", .name = "t_name.tbl", .pos = 22493, .expected = 87, .fix = 85},
    Fix{.dir = "dat_en", .name = "t_name.tbl", .pos = 23898, .expected = 85, .fix = 83},
    Fix{.dir = "dat_en", .name = "t_name.tbl", .pos = 23997, .expected = 84, .fix = 82},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 914, .expected = 44, .fix = 48},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 972, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1039, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1105, .expected = 47, .fix = 51},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1166, .expected = 43, .fix = 47},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1223, .expected = 51, .fix = 55},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1288, .expected = 44, .fix = 48},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1346, .expected = 44, .fix = 48},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1404, .expected = 50, .fix = 54},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1468, .expected = 43, .fix = 47},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1525, .expected = 46, .fix = 50},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1585, .expected = 45, .fix = 49},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1644, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1710, .expected = 54, .fix = 58},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1778, .expected = 46, .fix = 50},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1838, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1904, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 1970, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2037, .expected = 51, .fix = 55},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2102, .expected = 38, .fix = 42},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2154, .expected = 45, .fix = 49},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2213, .expected = 46, .fix = 50},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2273, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2340, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2406, .expected = 45, .fix = 49},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2465, .expected = 54, .fix = 58},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2533, .expected = 54, .fix = 58},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2601, .expected = 48, .fix = 52},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2663, .expected = 54, .fix = 58},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2731, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2798, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2864, .expected = 52, .fix = 56},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2930, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 2997, .expected = 54, .fix = 58},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3065, .expected = 40, .fix = 44},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3119, .expected = 49, .fix = 53},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3182, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3249, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3316, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3383, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3450, .expected = 53, .fix = 57},
    Fix{.dir = "dat_en", .name = "t_quest.tbl", .pos = 3517, .expected = 47, .fix = 51},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 921, .expected = 39, .fix = 43},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 974, .expected = 54, .fix = 58},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1042, .expected = 57, .fix = 61},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1113, .expected = 51, .fix = 55},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1178, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1244, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1310, .expected = 50, .fix = 54},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1374, .expected = 55, .fix = 59},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1443, .expected = 49, .fix = 53},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1506, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1572, .expected = 55, .fix = 59},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1641, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1711, .expected = 53, .fix = 57},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1778, .expected = 51, .fix = 55},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1843, .expected = 54, .fix = 58},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1911, .expected = 49, .fix = 53},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 1974, .expected = 54, .fix = 58},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2042, .expected = 50, .fix = 54},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2106, .expected = 55, .fix = 59},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2175, .expected = 51, .fix = 55},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2240, .expected = 57, .fix = 61},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2311, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2377, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2447, .expected = 49, .fix = 53},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2510, .expected = 53, .fix = 57},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2577, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2643, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2709, .expected = 45, .fix = 49},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2768, .expected = 53, .fix = 57},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2835, .expected = 52, .fix = 56},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2901, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 2971, .expected = 58, .fix = 62},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3043, .expected = 40, .fix = 44},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3097, .expected = 45, .fix = 49},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3156, .expected = 44, .fix = 48},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3214, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3284, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3354, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3424, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3494, .expected = 56, .fix = 60},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3564, .expected = 55, .fix = 59},
    Fix{.dir = "dat_fr", .name = "t_quest.tbl", .pos = 3633, .expected = 51, .fix = 55},
};

struct TblPath {
  const char *dir;
  const char *name;
};

struct FixLoc {
  size_t pos;
  uint16_t expected;
  uint16_t fix;
};

struct TblPathComparator {
  bool operator()(const TblPath &p1, const TblPath &p2) const {
    return (strcmp(p1.dir, p2.dir) < 0) || (strcmp(p1.name, p2.name) < 0);
  };
};

const std::map<TblPath, std::vector<FixLoc>, TblPathComparator> get_files_to_fix() {
  auto files_to_fix = std::map<TblPath, std::vector<FixLoc>, TblPathComparator>();
  for (const Fix &fix : fixlist) {
    TblPath tblpath = {
        .dir = fix.dir,
        .name = fix.name,
    };
    FixLoc fixloc = {
        .pos = fix.pos,
        .expected = fix.expected,
        .fix = fix.fix,
    };
    auto it = files_to_fix.find(tblpath);
    if (it == files_to_fix.end()) {
      auto [it_ins, success] = files_to_fix.emplace(std::pair{tblpath, std::vector<FixLoc>(0)});
      if (!success) {
        throw std::logic_error("inserting a key that's not present should be successful");
      }
      it = it_ins;
    }
    it->second.push_back(fixloc);
  }
  return files_to_fix;
}

std::string read_file(const std::filesystem::path &fn) {
  std::ostringstream os;
  auto ifs = std::ifstream(fn);
  os << ifs.rdbuf();
  return os.str();
}

void write_file(const std::filesystem::path &fn, const std::string &data) {
  auto ofs = std::ofstream(fn);
  ofs << data;
}

void apply_fixes(std::string &data, const std::vector<FixLoc> &fixes) {
  for (const FixLoc &fix : fixes) {
    // note: little endianness
    uint16_t actual = data.at(fix.pos) | (data.at(fix.pos + 1) << 8);
    if (actual != fix.expected) {
      std::cerr << "Unexpected value at position " << fix.pos << ", expected " << fix.expected << ", got " << actual << std::endl;
      throw std::runtime_error("Unexpected value.");
    }
    data.at(fix.pos) = uint8_t(fix.fix & 255);
    data.at(fix.pos + 1) = uint8_t(fix.fix >> 8);
  }
}

int main(int argc, char *argv[]) {
  if (argc != 2) {
    throw std::runtime_error("expected directory name as single cli argument");
  }
  auto dir = std::filesystem::path(argv[1]);
  const auto files_to_fix = get_files_to_fix();
  for (const auto &[tblpath, fixes] : files_to_fix) {
    const auto fn = dir / tblpath.dir / tblpath.name;
    std::cout << fn << std::endl;
    std::string data = read_file(fn);
    apply_fixes(data, fixes);
    write_file(fn, data);
  }
}

M test/roundtrip.cpp => test/roundtrip.cpp +1 -6
@@ 26,12 26,6 @@ std::string serialize(const cs3tio::Tbl &tbl) {
}

void check_file(const std::filesystem::path &fn) {
  if (
      (fn.stem() == "t_quest" && fn.parent_path().stem() == "dat_en") || (fn.stem() == "t_name" && fn.parent_path().stem() == "dat_en") || (fn.stem() == "t_quest" && fn.parent_path().stem() == "dat_fr")) {
    std::cout << "NOT CHECKING '" << fn << "'" << std::endl; // these files have broken lengths in the original
    // TODO: path the length bytes in memory somehow?
    return;
  }
  std::cout << "Checking '" << fn << "'." << std::endl;
  auto original = read_file(fn);
  auto table = parse(original);


@@ 39,6 33,7 @@ void check_file(const std::filesystem::path &fn) {
  for (size_t i = 0; i < std::min(original.size(), serialized.size()); ++i) {
    if (original.at(i) != serialized.at(i)) {
      std::cout << "original and serialized differ at byte " << i << std::endl;
      std::cout << "expected " << int(uint8_t(original.at(i))) << ", got " << int(uint8_t(serialized.at(i))) << std::endl;
      throw std::logic_error("BUG");
    }
  }