~pjjw/cephfs-layout-tool

96124a42ad1d6e7729ce263205c48190351a27e8 — peter woodman 5 years ago fb539fc
a script to migrate pools
1 files changed, 61 insertions(+), 42 deletions(-)

M migrate_pools.py
M migrate_pools.py => migrate_pools.py +61 -42
@@ 10,45 10,64 @@ import humanize

TMPDIR = "/c/scratch/convert"

STARTDIR = sys.argv[1]

OK_POOLS = {'cephfs_crs52data', 'cephfs_crs52data2', 'cephfs_crs52data3', 'cephfs_crs52data4'}

total_savings = 0
total_moved = 0

print("starting scan of {}".format(STARTDIR), file=sys.stderr)
for root, dirs, files in os.walk(STARTDIR, topdown=False):
    print("looking at {}".format(root), file=sys.stderr)
    print("## total savings so far: {} ##".format(humanize.naturalsize(total_savings)))
    if root.startswith("/c/archive"):
        continue
    for name in files:
        filename = os.path.join(root, name)
        cephlayout = {}
        try:
            for attr in str(xattr.getxattr(filename, "ceph.file.layout")).strip("'").split():
                # print(attr)
                n = str(attr).split("=")
                cephlayout[str(n[0])] = str(n[1])
        except IOError as e:
            pass
        # print(cephlayout)
        if "pool" in cephlayout.keys() and cephlayout["pool"] not in OK_POOLS:
            print("%s in wrong pool: %s" % (name, cephlayout["pool"]))
            statinfo = os.stat(filename)
            tmploc = os.path.join(TMPDIR, name)
            print("copying {} to new location and pool {}".format(filename, tmploc))
            shutil.copy2(filename, tmploc)
            print("moving back on top of original")
            shutil.move(tmploc, filename)
            oldusage = (statinfo.st_size / 4) * 6
            newusage = (statinfo.st_size / 5) * 7
            savings = oldusage - newusage
            total_moved += 1
            total_savings += savings
            print("saved {}".format(humanize.naturalsize(savings)))
        #else:
        #    pass

print("saved space in total: {}".format(humanize.naturalsize(total_savings)))
OK_POOLS = {
    "cephfs_crs52data",
    "cephfs_crs52data2",
    "cephfs_crs52data3",
    "cephfs_crs52data4",
}


def main():
    startdir = sys.argv[1]

    total_savings = 0
    total_moved = 0

    print("starting scan of {}".format(startdir), file=sys.stderr)
    for root, dirs, files in os.walk(startdir, topdown=False):
        print("looking at {}".format(root), file=sys.stderr)
        print(
            "## total savings so far: {} ##".format(humanize.naturalsize(total_savings))
        )
        if root.startswith("/c/archive"):
            continue
        for name in files:
            filename = os.path.join(root, name)
            fstat = os.stat(filename)
            if fstat.st_nlink > 1:
                print("skipping {} due to multiple hard links".format(name))
                continue
            cephlayout = {}
            try:
                for attr in (
                    str(xattr.getxattr(filename, "ceph.file.layout")).strip("'").split()
                ):
                    # print(attr)
                    n = str(attr).split("=")
                    cephlayout[str(n[0])] = str(n[1])
            except IOError as e:
                pass
            # print(cephlayout)
            if "pool" in cephlayout.keys() and cephlayout["pool"] not in OK_POOLS:
                print("%s in wrong pool: %s" % (name, cephlayout["pool"]))
                statinfo = os.stat(filename)
                tmploc = os.path.join(TMPDIR, name)
                print("copying {} to new location and pool {}".format(filename, tmploc))
                shutil.copy2(filename, tmploc)
                print("moving back on top of original")
                shutil.move(tmploc, filename)
                oldusage = (statinfo.st_size / 4) * 6
                newusage = (statinfo.st_size / 5) * 7
                savings = oldusage - newusage
                total_moved += 1
                total_savings += savings
                print("saved {}".format(humanize.naturalsize(savings)))
            # else:
            #    pass

    print("saved space in total: {}".format(humanize.naturalsize(total_savings)))


if __name__ == "__main__":
    main()