~tuxpup/xlsx-dict-reader

1c3d62daaf904fae6dfaf01131cc37c36b11c1c4 — Geoff Beier a month ago 1f822be
Strip leading and trailing whitespace from header field names by default. Fixes #1.
A tests/data/whitespace_in_header.xlsx => tests/data/whitespace_in_header.xlsx +0 -0
M tests/test_dict_reader.py => tests/test_dict_reader.py +34 -0
@@ 31,6 31,11 @@ def merged_cells_sheet():
    return load_workbook(FIXTURE_DIR / "ignore_merged.xlsx").active


@pytest.fixture
def whitespace_in_header_sheet():
    return load_workbook(FIXTURE_DIR / "whitespace_in_header.xlsx")["Sheet1"]


def test_simple_headers(simple_sheet):
    from xlsx_dict_reader import DictReader



@@ 122,3 127,32 @@ def test_ignore_merged_cells(merged_cells_sheet):
    assert len(dicts) == 11
    assert dicts[0]["Day"] == "Sunday"
    assert dicts[-1]["Day"] == "Wednesday"


def test_whitespace_in_header(whitespace_in_header_sheet):
    from xlsx_dict_reader import DictReader

    reader = DictReader(
        whitespace_in_header_sheet,
        min_row=3,
        min_col=1,
        max_col=4,
        skip_blank_rows=True,
    )
    dicts = list(reader)
    assert "Level" in dicts[0].keys()


def test_whitespace_in_header_override(whitespace_in_header_sheet):
    from xlsx_dict_reader import DictReader

    reader = DictReader(
        whitespace_in_header_sheet,
        min_row=3,
        min_col=1,
        max_col=4,
        skip_blank_rows=True,
        strip_whitespace_in_headers=False,
    )
    dicts = list(reader)
    assert "Level " in dicts[0].keys()

M xlsx_dict_reader/dict_reader.py => xlsx_dict_reader/dict_reader.py +4 -0
@@ 15,6 15,7 @@ class DictReader:
        headers: List[str] = None,
        values_only=True,
        skip_blank_rows=False,
        strip_whitespace_in_headers=True,
    ):
        self.worksheet = worksheet
        self.min_row = min_row


@@ 23,6 24,7 @@ class DictReader:
        self.max_col = max_col
        self.values_only = values_only
        self.skip_blank_rows = skip_blank_rows
        self.strip_whitespace_in_headers = strip_whitespace_in_headers
        self.curr_row = 1

        if headers:


@@ 43,6 45,8 @@ class DictReader:
            hv = col[0].value
            if not hv:
                break
            if self.strip_whitespace_in_headers:
                hv = hv.strip()
            if hv in headers:
                raise ValueError(f"Duplicate header found: {hv}: {headers}")
            headers.append(hv)