~pjjw/cephfs-layout-tool

0df4021193d46348d8f326bb2d28bb044e0a566c — peter woodman 4 years ago 2b76e21
adding memoization of layout checks and tmpdir creation
1 files changed, 36 insertions(+), 24 deletions(-)

M migrate_pools.py
M migrate_pools.py => migrate_pools.py +36 -24
@@ 1,23 1,33 @@
#!/usr/bin/env python3
from __future__ import print_function

import os
import shutil
import sys
import tempfile
from collections import namedtuple
from typing import Optional
from typing import Optional, Callable

import xattr  # type: ignore
import humanize  # type: ignore

CephLayout = namedtuple("CephLayout", ["stripe_unit", "stripe_count", "object_size", "pool"])
CephLayout = namedtuple("CephLayout", ["stripe_count", "object_size", "pool"])

TMPDIR = "/c/tmp"

OK_POOLS = {"cephfs_crs52data", "cephfs_crs52data2"}


TMPDIR = "/c/scratch/convert"
def memoize(fn: Callable):
    """ Memoization decorator for a function taking a single argument """

OK_POOLS = {"cephfs_crs52data", "cephfs_crs52data2", "cephfs_crs52data3", "cephfs_crs52data4"}
    class MemoDict(dict):
        def __missing__(self, key):
            ret = self[key] = fn(key)
            return ret

    return MemoDict().__getitem__


@memoize
def extract_layout(filename: str) -> Optional[CephLayout]:
    filetype = "file"
    if os.path.isdir(filename):


@@ 32,19 42,23 @@ def extract_layout(filename: str) -> Optional[CephLayout]:
        )
    except OSError:
        # no layout on given file/dir
        if filetype == "dir":
            return extract_layout(os.path.dirname(filename))
        return None
    for attr in xattrs:
        n = attr.split("=")
        cephlayout[n[0]] = n[1]
    print(cephlayout)
    del cephlayout["stripe_unit"]
    return CephLayout(**cephlayout)


# make a temp dir with the same layout as the given dir
def mkdtemp_layout(layout: CephLayout, prefix: str = "/c/tmp") -> str:
@memoize
def mkdtemp_layout(layout: CephLayout, prefix: str = TMPDIR) -> str:
    tempdir = tempfile.mkdtemp(dir=prefix)
    xattrs = xattr.xattr(tempdir)
    for attr in layout._fields:
        xattr.setxattr(tempdir, "ceph.dir.layout.{}".format(attr), getattr(layout, attr))
        xattrs.set("ceph.dir.layout.{}".format(attr), bytes(getattr(layout, attr), "utf-8"))
    return tempdir




@@ 54,30 68,29 @@ def main():
    total_savings = 0
    total_moved = 0

    session_tmpdir = tempfile.mkdtemp(dir=TMPDIR)

    print("starting scan of {}".format(startdir), file=sys.stderr)
    for root, _, files in os.walk(startdir, topdown=False):
        print("looking at {}".format(root), file=sys.stderr)
        print("## total savings so far: {} ##".format(humanize.naturalsize(total_savings)))
        if root.startswith("/c/archive"):
            continue
        dir_layout = extract_layout(root)
        print("layout for {}: {}".format(root, dir_layout))
        tmp_layout_dir = mkdtemp_layout(dir_layout, prefix=session_tmpdir)
        for name in files:
            filename = os.path.join(root, name)
            fstat = os.stat(filename)
            if fstat.st_nlink > 1:
                print("skipping {} due to multiple hard links".format(name))
                continue
            cephlayout = {}
            try:
                for attr in str(xattr.getxattr(filename, "ceph.file.layout")).strip("'").split():
                    n = str(attr).split("=")
                    cephlayout[str(n[0])] = str(n[1])
            except IOError:
                pass
            if "pool" in cephlayout.keys() and cephlayout["pool"] not in OK_POOLS:
                print("%s in wrong pool: %s" % (name, cephlayout["pool"]))
            file_layout = extract_layout(filename)
            if not file_layout:
                continue
            if dir_layout.pool != file_layout.pool:
                print("%s in wrong pool: %s" % (name, file_layout.pool))
                statinfo = os.stat(filename)
                tmploc = os.path.join(TMPDIR, name)
                print("copying {} to new location and pool {}".format(filename, tmploc))
                tmploc = os.path.join(tmp_layout_dir, name)
                print("copying {} to temp location {}".format(filename, tmploc))
                shutil.copy2(filename, tmploc)
                print("moving back on top of original")
                shutil.move(tmploc, filename)


@@ 87,10 100,9 @@ def main():
                total_moved += 1
                total_savings += savings
                print("saved {}".format(humanize.naturalsize(savings)))
            else:
                pass

    print("saved space in total: {}".format(humanize.naturalsize(total_savings)))
    os.rmdir(session_tmpdir)


if __name__ == "__main__":