~minus/stewdio-api

dc64b0744db288437aa48d854261785a348dbd26 — minus 5 years ago 9f5e2e8
Improve search performance for tags/favs

Replacing the use of HAVING with WHERE massively improves performance
with many tags/favs because HAVING filters at the very end and has to
build a complete rendering on the database first. Filtering there is the
slowest part because it needs to filter everything without any indexes.
M stewdio/search/__init__.py => stewdio/search/__init__.py +1 -1
@@ 1,3 1,3 @@
#!/usr/bin/env python3

from .query import search, search_by_hash, search_favorites, get_random
from .query import search, search_favorites, get_random

M stewdio/search/ast.py => stewdio/search/ast.py +6 -8
@@ 5,7 5,6 @@ from psycopg2.sql import Literal, SQL

class Ops:
    ILIKE = 'ILIKE'
    IN = 'IN'
    IN_LOWERCASE = 'IN_LOWERCASE'
    EQUALS = 'EQUALS'
    GREATER_THAN = 'GREATER_THAN'


@@ 14,11 13,10 @@ class Ops:

OP_MAP = {
    'ILIKE': lambda k, v: k + SQL(" ILIKE '%' || ") + v + SQL(" || '%'"),
    'IN': lambda k, v: SQL("ARRAY[") + v + SQL("] <@ ") + k,
    'IN_LOWERCASE': lambda k, v: SQL("ARRAY[lower(") + v + SQL(")] <@ ") + k,
    'EQUALS': lambda k, v: k + SQL(" ILIKE ") + v,
    'GREATER_THAN': lambda k, v: k + SQL(" > ") + v,
    'LESS_THAN': lambda k, v: k + SQL(" < ") + v,
    'IN_LOWERCASE': lambda k, v: SQL('EXISTS(') + k.format(SQL('lower({})').format(v)) + SQL(')'),
    'EQUALS': lambda k, v: k + SQL(' ILIKE ') + v,
    'GREATER_THAN': lambda k, v: k + SQL(' > ') + v,
    'LESS_THAN': lambda k, v: k + SQL(' < ') + v,
}




@@ 36,8 34,8 @@ QUALIFIERS = {
    'hash': OpsConfig(SQL('songs.hash'), {':': Ops.ILIKE, '=': Ops.EQUALS}, Ops.ILIKE),
    'path': OpsConfig(SQL('songs.path'), {':': Ops.ILIKE, '=': Ops.EQUALS}, Ops.ILIKE),
    'duration': OpsConfig(SQL('songs.duration'), {'>': Ops.GREATER_THAN, '<': Ops.LESS_THAN}, None),
    'fav': OpsConfig(SQL('array_agg(users.name)'), {':': Ops.IN_LOWERCASE,}, Ops.IN_LOWERCASE),
    'tag': OpsConfig(SQL('array_agg(tags.name)'), {':': Ops.IN,}, Ops.IN),
    'fav': OpsConfig(SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = {}'), {':': Ops.IN_LOWERCASE,}, Ops.IN_LOWERCASE),
    'tag': OpsConfig(SQL('SELECT 1 FROM taggings JOIN tags ON (taggings.tag = tags.id) WHERE taggings.song = songs.id AND tags.name = {}'), {':': Ops.IN_LOWERCASE,}, Ops.IN_LOWERCASE),
}

# when no qualifier is given, look at all those; must have a default op

M stewdio/search/parse_test.py => stewdio/search/parse_test.py +5 -2
@@ 20,9 20,10 @@ cases = (
    ('''(artist:mizuki OR artist:水樹) AND NOT fav:minus AND album:'supernal liberty' OR million''', Or(And(And(Or(Qualified('artist', String('mizuki')), Qualified('artist', String('水樹'))), Not(Qualified('fav', String('minus')))), Qualified('album', String('supernal liberty'))), Unqualified(String('million'))), None),
    ('''world.execute(me)''', (ValueError, "Ran into a Token('RPAREN', ')') where it wasn't expected"), None),
    ('''path:"comet lucifer" -inst''', And(Qualified('path', String('comet lucifer')), Not(Unqualified(String('inst')))), None),
    ('''(fav:minus OR fav:nyc OR fav:jdiez) NOT fav:sircmpwn''', And(Or(Or(Qualified('fav', String('minus')), Qualified('fav', String('nyc'))), Qualified('fav', String('jdiez'))), Not(Qualified('fav', String('sircmpwn')))), Composed([SQL('('), SQL('('), SQL('('), SQL('ARRAY[lower('), Literal('minus'), SQL(')] <@ '), SQL('array_agg(users.name)'), SQL(' OR '), SQL('ARRAY[lower('), Literal('nyc'), SQL(')] <@ '), SQL('array_agg(users.name)'), SQL(')'), SQL(' OR '), SQL('ARRAY[lower('), Literal('jdiez'), SQL(')] <@ '), SQL('array_agg(users.name)'), SQL(')'), SQL(' AND '), SQL('NOT '), SQL('ARRAY[lower('), Literal('sircmpwn'), SQL(')] <@ '), SQL('array_agg(users.name)'), SQL(')')])),
    ('''(fav:minus OR fav:nyc OR fav:jdiez) NOT fav:sircmpwn''', And(Or(Or(Qualified('fav', String('minus')), Qualified('fav', String('nyc'))), Qualified('fav', String('jdiez'))), Not(Qualified('fav', String('sircmpwn')))), Composed([SQL('('), SQL('('), SQL('('), SQL('EXISTS('), SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = '), Composed([SQL('lower('), Literal('minus'), SQL(')')]), SQL(')'), SQL(' OR '), SQL('EXISTS('), SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = '), Composed([SQL('lower('), Literal('nyc'), SQL(')')]), SQL(')'), SQL(')'), SQL(' OR '), SQL('EXISTS('), SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = '), Composed([SQL('lower('), Literal('jdiez'), SQL(')')]), SQL(')'), SQL(')'), SQL(' AND '), SQL('NOT '), SQL('EXISTS('), SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = '), Composed([SQL('lower('), Literal('sircmpwn'), SQL(')')]), SQL(')'), SQL(')')])),
    ('''title="why?"''', Qualified('title', String('why?'), op=Ops.EQUALS), None),
    ('''#op @minus''', And(Qualified('tag', String('op')), Qualified('fav', String('minus'))), None),
    ('''@minus''', Qualified('fav', String('minus')), Composed([SQL('EXISTS('), SQL('SELECT 1 FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id AND users.name = '), Composed([SQL('lower('), Literal('minus'), SQL(')')]), SQL(')')])),
    ('''duration>10 AND duration<500''', And(Qualified('duration', String('10'), op=Ops.GREATER_THAN), Qualified('duration', String('500'), op=Ops.LESS_THAN)), None),
)



@@ 34,4 35,6 @@ def test_parse(input, expected_ast, expected_sql):
        parsed = e.__class__, *e.args
    assert expected_ast == parsed
    if expected_sql is not None:
        assert parsed.build() == expected_sql
        generated_sql = parsed.build()
        print(generated_sql)
        assert generated_sql == expected_sql

M stewdio/search/query.py => stewdio/search/query.py +10 -35
@@ 1,5 1,5 @@
#!/usr/bin/env python3

from .ast import Qualified
from .parse import parse
from psycopg2.sql import SQL, Literal



@@ 13,55 13,30 @@ SELECT
    songs.path AS path,
    songs.duration AS duration,
    songs.status AS status,
    array_remove(array_agg(DISTINCT tags.name), NULL) AS tags,
    array_remove(array_agg(DISTINCT users.name), NULL) AS favored_by
    ARRAY(SELECT tags.name FROM taggings JOIN tags ON (taggings.tag = tags.id) WHERE taggings.song = songs.id) AS tags,
    ARRAY(SELECT users.name FROM users JOIN favorites ON (favorites.user_id = users.id) WHERE favorites.song = songs.id) AS favored_by
FROM songs
JOIN artists ON songs.artist = artists.id
JOIN albums ON songs.album = albums.id
LEFT JOIN favorites ON songs.id = favorites.song
LEFT JOIN users ON favorites.user_id = users.id
LEFT JOIN taggings ON songs.id = taggings.song
LEFT JOIN tags ON taggings.tag = tags.id
{where}
GROUP BY
    songs.id,
    songs.hash,
    songs.title,
    artists.name,
    albums.name,
    songs.path,
    songs.duration,
    songs.status
''')


def search(cursor, query, limit=None):
    where = SQL("WHERE songs.status = 'active'")
    having = parse(query).build()
    where = SQL("WHERE songs.status = 'active' AND ") + parse(query).build()
    q = BASE_QUERY.format(where=where)
    q += SQL(' HAVING ') + having
    if limit:
        q += SQL(' LIMIT ') + Literal(limit)
    cursor.execute(q)
    return [dict(r) for r in cursor]


def search_by_hash(cursor, hash):
    q = BASE_QUERY.format(where=SQL(''))
    q += SQL(" HAVING songs.hash ILIKE %s || '%%'")
    cursor.execute(q, (hash,))
    if cursor.rowcount > 1:
        raise ValueError(f"Expected one result, got {cursor.rowcount}")
    elif cursor.rowcount == 0:
        return None
    return dict(cursor.fetchone())

def search_favorites(cursor, user):
    q = BASE_QUERY.format(where=SQL(''))
    q += SQL(" HAVING ARRAY[%s] <@ array_agg(users.name)")
    cursor.execute(q, (user.lower(),))
    q = BASE_QUERY.format(where=Qualified('fav', user).build())
    cursor.execute(q, (user,))
    return [dict(r) for r in cursor]


def get_random(cursor, off_vocal_regex=None):
    where = SQL("WHERE songs.status = 'active'")
    if off_vocal_regex:


@@ 88,11 63,11 @@ if __name__ == '__main__':
    print("original query from user input:")
    print(q)

    having = parse(q).build()
    where = parse(q).build()
    q = BASE_QUERY.format(where=SQL(''))
    q += SQL(' HAVING ') + having
    q += SQL(' WHERE ') + where
    print("generated SQL condition:")
    print(having)
    print(where)
    print("generated SQL query:")
    print(cursor.mogrify(q).decode())
    cursor.execute(q)