~fkfd/closure-filter-poc

e9912ec70fb4be21dce44957da0c879563b9a414 — Frederick Yin 3 years ago 63c88bd master
New applications for filter
4 files changed, 99 insertions(+), 63 deletions(-)

M .gitignore
M filter.py
A images.py
R objects.py => webcomics.py
M .gitignore => .gitignore +2 -1
@@ 1,3 1,4 @@
.vscode/
__pycache__

img/
irc_log*

M filter.py => filter.py +49 -60
@@ 1,6 1,6 @@
import sys
from datetime import datetime
from objects import Webcomic


def get_attribute(obj, attr: str):
    if type(obj) == dict:


@@ 9,46 9,50 @@ def get_attribute(obj, attr: str):
        except KeyError:
            return None
    else:
        if not attr:
            return obj
        try:
            return getattr(obj, attr)
        except AttributeError:
            return None


def filter_closure(filter_string: str) -> list:
def gen_closure(filter_string: str) -> list:
    # Test each object given against provided criteria.
    # Criteria are separated with ampersands(&) TODO: OR(|),
    # each of which is of format 'attribute[.conversion]<operator>value'.
    # `operator` can be one of =, <, >, <=, >=, and !=.
    # [TODO: -> (is a superset of), and <- (is a subset of).]
    criteria = filter_string.split('&')
    def as_is(v): return v
    # `operator` can be one of =, <, >, <=, >=, !=, <-, ->, !<-, !->.
    criteria = filter_string.split("&")

    def as_is(v):
        return v

    convertions = {
        # convertions to apply to left and right, respectively
        # applied with: convertions[i][0 or 1](value of either side)
        'str': (str, str),
        'str[]': (list, str),
        'int': (int, int),
        'int[]': (list, int),
        'len': (len, int),  # get number of elems in key
        "str": (str, str),
        "str[]": (list, str),
        "int": (int, int),
        "int[]": (list, int),
        "len": (len, int),  # get number of elems in key
        # syntactic sugar; analogous to True if s == 'true' else False
        'bool': (bool, lambda s: s == 'true' and True or False)
        "bool": (bool, lambda s: s == "true" and True or False),
    }
    operators = {
        '<=': lambda a, b: a <= b,
        '>=': lambda a, b: a >= b,
        '!=': lambda a, b: a != b,
        '=': lambda a, b: a == b,
        '!<-': lambda a, b: a not in b,
        '!->': lambda a, b: b not in a,
        '<-': lambda a, b: a in b,
        '->': lambda a, b: b in a,
        '<': lambda a, b: a < b,
        '>': lambda a, b: a > b,
        "<=": lambda a, b: a <= b,
        ">=": lambda a, b: a >= b,
        "!=": lambda a, b: a != b,
        "=": lambda a, b: a == b,
        "!<-": lambda a, b: a not in b,
        "!->": lambda a, b: b not in a,
        "<-": lambda a, b: a in b,
        "->": lambda a, b: b in a,
        "<": lambda a, b: a < b,
        ">": lambda a, b: a > b,
    }

    for criterion in criteria:
        attr, op, value = '', '', ''
        attr, op, value = "", "", ""
        for o in operators.keys():
            # This attempts to separate the criterion string into 3 parts.
            # If o is not present, op and value are set blank by


@@ 59,54 63,39 @@ def filter_closure(filter_string: str) -> list:
                break

        if not op in operators.keys():
            print('No operator found in expression. Skip.', file=sys.stderr)
            print("No operator found in expression. Skip.", file=sys.stderr)
            continue

        # split key further for type conversion
        attr, conv = attr.split('.', 1)
        attr, conv = attr.split(".", 1)
        # print(attr, conv, op, value)

        if not conv in convertions.keys():
            print(
                f'Type conversion "{conv}" for attribute "{attr}" is not understood. Skip.',
                file=sys.stderr)
                file=sys.stderr,
            )
            continue  # skip this criterion

        yield lambda obj: operators[op](
            convertions[conv][0](
                get_attribute(obj, attr)
            ),
            convertions[conv][1](value)
            convertions[conv][0](get_attribute(obj, attr)), convertions[conv][1](value)
        )


if __name__ == '__main__':
    xkcd = Webcomic(
        'xkcd',
        'https://xkcd.com',
        ['Randall Munroe'],
        ['humor', 'romance', 'sarcasm', 'math', 'language', 'compsci'],
        datetime(2006, 1, 1),
        'triweekly')
    smbc = Webcomic(
        'SMBC',
        'https://smbc-comics.com',
        ['Zach Weinersmith'],
        ['humor', 'philosophy', 'sociology', 'sci-fi', 'graph jokes'],
        datetime(2002, 9, 5),
        'daily')
    cnh = Webcomic(
        'C&H',
        'http://explosm.net',
        ['Kris Wilson', 'Dave McElfatrick', 'Rob DenBleyker', 'Matt Melvin'],
        ['satire', 'surreal', 'puns', 'adult content'],
        datetime(2005, 1, 25),
        'daily')

    str1 = 'authors.len<2&age.int<15'
    str2 = 'ssl.bool=true&themes.str[]->humor'
    closures = [filter_closure(s) for s in (str1, str2)]
    for closure in closures:
        for test in closure:
            for webcomic in (xkcd, smbc, cnh):
                print(webcomic.name, test(webcomic))
def exec_filter(objs: list, criteria: str):
    # objs: list of objects to filter
    # criteria: string as described in gen_closure

    closure = gen_closure(criteria)
    objs_status = [1 for o in objs]
    for test in closure:
        for i, o in enumerate(objs):
            if objs_status[i] == 1 and not test(o):
                objs_status[i] = 0

    result_objs = []
    for i, o in enumerate(objs):
        if objs_status[i]:
            result_objs.append(o)
    return result_objs


A images.py => images.py +9 -0
@@ 0,0 1,9 @@
from filter import exec_filter
from PIL import Image

images = [
    Image.open(f"img/{im}.png")
    for im in ["cate", "catto", "freddiecat", "politecat", "tiredcat"]
]

print(exec_filter(images, "width.int>=100&height.int>=100"))

R objects.py => webcomics.py +39 -2
@@ 1,3 1,5 @@
from filter import exec_filter

# Example classes to demonstrate how closure filtering works
from datetime import datetime



@@ 22,7 24,42 @@ class Webcomic:
    @property
    def age(self):
        return datetime.now().year - self.since.year
        

    @property
    def ssl(self):
        return self.url.startswith('https://')
\ No newline at end of file
        return self.url.startswith("https://")

    def __repr__(self):
        return f"Webcomic<{self.name}>"


xkcd = Webcomic(
    "xkcd",
    "https://xkcd.com",
    ["Randall Munroe"],
    ["humor", "romance", "sarcasm", "math", "language", "compsci"],
    datetime(2006, 1, 1),
    "triweekly",
)
smbc = Webcomic(
    "SMBC",
    "https://smbc-comics.com",
    ["Zach Weinersmith"],
    ["humor", "philosophy", "sociology", "sci-fi", "graph jokes"],
    datetime(2002, 9, 5),
    "daily",
)
cnh = Webcomic(
    "C&H",
    "http://explosm.net",
    ["Kris Wilson", "Dave McElfatrick", "Rob DenBleyker", "Matt Melvin"],
    ["satire", "surreal", "puns", "adult content"],
    datetime(2005, 1, 25),
    "daily",
)

str1 = "authors.len<2&age.int<15"
str2 = "ssl.bool=true&themes.str[]->humor"
comics = [xkcd, smbc, cnh]
print(exec_filter(comics, str1))
print(exec_filter(comics, str2))