~amirouche/sudopython

c38c49914587dc97287ad3083d241d27538f9acc — Amirouche 1 year, 5 months ago 6583e10
bbkh: replace `big` with `little`, major bugfix...

... some cleanup.
2 files changed, 9 insertions(+), 20 deletions(-)

M bbkh.py
M benchmark-typofix.py
M bbkh.py => bbkh.py +5 -8
@@ 53,10 53,9 @@ def merkletree(booleans):
        new = []
        for (right, left) in chunks(booleans, 2):
            value = right or left
            # new.append(value)
            # TODO: maybe this is better:
            new.insert(0, value)
            new.append(value)
        booleans = new
    assert index == 0
    return out




@@ 73,7 72,7 @@ def bbkh(string):
    fuzz = ''.join('1' if x else '0' for x in tree)
    buzz = int(fuzz, 2)
    assert buzz <= 2 ** (BYTES_COUNT * 8)
    hash = buzz.to_bytes(BYTES_COUNT, 'big')
    hash = buzz.to_bytes(BYTES_COUNT, 'little')
    return hash




@@ 99,8 98,7 @@ def search(db, space, query, distance, limit=10):
            break
        _, _, other = lexode.unpack(key)
        score = distance(query, other)
        if score > 65:  # depends on fuzzywuzzy and wild approximation
            scores[other] = score
        scores[other] = score

    # select candidates backward
    candidates = db.iterator(stop=near, start=lexode.pack((space,)), reverse=True)


@@ 109,7 107,6 @@ def search(db, space, query, distance, limit=10):
            break
        _, _, other = lexode.unpack(key)
        score = distance(query, other)
        if score > 65:
            scores[other] = score
        scores[other] = score

    return scores.most_common(limit)

M benchmark-typofix.py => benchmark-typofix.py +4 -12
@@ 53,12 53,9 @@ options = dict(
    multiple_processes=False,
    transaction_log=False,
    page_size=1024 ** 2,
    block_size=10 * 1024 ** 2,    
    block_size=10 * 1024 ** 2,
)

db = Path('typofix.okvslite')
if db.exists():
    db.unlink()

def index(name):
    name = name.lower()


@@ 112,24 109,21 @@ def progress(args):
    name, key = args

    total += 1
    

    if name is None:
        return
    

    key = bbkh.lexode.pack((b'foobar', key, name))
    if len(key) > size:
        print("new max key", len(key))
        size = len(key)

    db.put(key, b'')
    

    if (total % 1_000) == 0:
        print(total, name, size, len(key), int(time() - start))





async def main(loop):

    with futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as pool:


@@ 141,8 135,6 @@ loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
loop.close()



start = time()
top = bbkh.search(db, b'foobar', query, score)
print(time() - start)