~nch/glue

5bf1fe8612fb89e8e79df6be9b42b70ae2ac569c — nc 1 year, 4 months ago cb6e03c
add table implementation
2 files changed, 95 insertions(+), 0 deletions(-)

A table.py
A table_test.py
A table.py => table.py +66 -0
@@ 0,0 1,66 @@
from dataclasses import dataclass
from typing import List, Dict
import numpy as np # type: ignore
from collections.abc import Iterable

# TODO: implement pure relational alebra version
# TODO: implement tuple calculus version

def _ensure_columns_match(a, b):
    if set(a) != set(b):
        raise ValueError('columns do not match', a, b)

class Table:
    def __init__(self, x, columns=None):
        if isinstance(x, dict):
            self.columns = list(x.keys())
            self.column_data = [np.array(x[k]) for k in self.columns]
        if columns is not None:
            _ensure_columns_match(self.columns, columns)
            self.columns = columns

        assert(self.columns)
        assert(self.column_data)

    def coli(self, name: str):
        try:
            return self.columns.index(name)
        except ValueError:
            pass
            raise AttributeError('No such column', name)

    def tuples(self):
        for t in zip(*self.column_data):
            yield t

    def tuple(self):
        assert(self.column_data[0].shape == ())
        return tuple(self[c].item() for c in self.columns)

    def __getitem__(self, key):
        if isinstance(key, int):
            return self.column_data[key]
        if isinstance(key, str):
            return self.column_data[self.coli(key)]

        if isinstance(key, list):
            return Table({key: self[key] for key in columns})
        if isinstance(key, slice):
            return Table({key: self[key][key] for key in columns})
        if isinstance(key, tuple):
            colkey, rowkey = key
            if isinstance(colkey, int) or isinstance(colkey, str):
                return self[colkey][rowkey]
            elif isinstance(colkey, list):
                r = Table({col: self.column_data[self.coli(col)][rowkey] for col in colkey})
                if isinstance(rowkey, int):
                    return r.tuple()
                return r
            elif isinstance(colkey, slice):
                return self[self.columns[colkey], rowkey]

    def append(self, vals):
        if isinstance(vals, dict):
            _ensure_columns_match(vals.keys(), self.columns)

    # TODO: to_str

A table_test.py => table_test.py +29 -0
@@ 0,0 1,29 @@
from table import *
import unittest

class TestTable(unittest.TestCase):
    def test_coli(self):
        t = Table({'a': [1, 2, 3]})
        self.assertEqual(t.coli('a'), 0)
        with self.assertRaises(AttributeError) as ctx:
            t.coli('b')

    def test_subscript(self):
        t = Table({'a': np.array([1,2,3]),
                   'b': np.array(['a', 'b', 'c'])},
                   columns=['a', 'b'])

        self.assertEqual(list(t[0]), list(t['a']))

        self.assertEqual(list(t[0]), [1,2,3])
        self.assertEqual(list(t['a']), [1,2,3])
        self.assertEqual(list(t['a', :2]), [1,2])
        self.assertEqual(list(t[0, :2]), [1,2])
        self.assertEqual(list(t[['a', 'b'], :2]['a']), [1,2])
        t2 = t[:, t['a'] == 2]
        self.assertEqual(set(t2.columns), set(t.columns))

        self.assertEqual(t[:, 1], (2, 'b'))

if __name__ == '__main__':
    unittest.main()