@@ 1,23 1,33 @@
#!/usr/bin/env python3
-from __future__ import print_function
-
import os
import shutil
import sys
import tempfile
from collections import namedtuple
-from typing import Optional
+from typing import Optional, Callable
import xattr # type: ignore
import humanize # type: ignore
-CephLayout = namedtuple("CephLayout", ["stripe_unit", "stripe_count", "object_size", "pool"])
+CephLayout = namedtuple("CephLayout", ["stripe_count", "object_size", "pool"])
+
+TMPDIR = "/c/tmp"
+
+OK_POOLS = {"cephfs_crs52data", "cephfs_crs52data2"}
+
-TMPDIR = "/c/scratch/convert"
+def memoize(fn: Callable):
+ """ Memoization decorator for a function taking a single argument """
-OK_POOLS = {"cephfs_crs52data", "cephfs_crs52data2", "cephfs_crs52data3", "cephfs_crs52data4"}
+ class MemoDict(dict):
+ def __missing__(self, key):
+ ret = self[key] = fn(key)
+ return ret
+ return MemoDict().__getitem__
+
+@memoize
def extract_layout(filename: str) -> Optional[CephLayout]:
filetype = "file"
if os.path.isdir(filename):
@@ 32,19 42,23 @@ def extract_layout(filename: str) -> Optional[CephLayout]:
)
except OSError:
# no layout on given file/dir
+ if filetype == "dir":
+ return extract_layout(os.path.dirname(filename))
return None
for attr in xattrs:
n = attr.split("=")
cephlayout[n[0]] = n[1]
- print(cephlayout)
+ del cephlayout["stripe_unit"]
return CephLayout(**cephlayout)
# make a temp dir with the same layout as the given dir
-def mkdtemp_layout(layout: CephLayout, prefix: str = "/c/tmp") -> str:
+@memoize
+def mkdtemp_layout(layout: CephLayout, prefix: str = TMPDIR) -> str:
tempdir = tempfile.mkdtemp(dir=prefix)
+ xattrs = xattr.xattr(tempdir)
for attr in layout._fields:
- xattr.setxattr(tempdir, "ceph.dir.layout.{}".format(attr), getattr(layout, attr))
+ xattrs.set("ceph.dir.layout.{}".format(attr), bytes(getattr(layout, attr), "utf-8"))
return tempdir
@@ 54,30 68,29 @@ def main():
total_savings = 0
total_moved = 0
+ session_tmpdir = tempfile.mkdtemp(dir=TMPDIR)
+
print("starting scan of {}".format(startdir), file=sys.stderr)
for root, _, files in os.walk(startdir, topdown=False):
print("looking at {}".format(root), file=sys.stderr)
print("## total savings so far: {} ##".format(humanize.naturalsize(total_savings)))
- if root.startswith("/c/archive"):
- continue
+ dir_layout = extract_layout(root)
+ print("layout for {}: {}".format(root, dir_layout))
+ tmp_layout_dir = mkdtemp_layout(dir_layout, prefix=session_tmpdir)
for name in files:
filename = os.path.join(root, name)
fstat = os.stat(filename)
if fstat.st_nlink > 1:
print("skipping {} due to multiple hard links".format(name))
continue
- cephlayout = {}
- try:
- for attr in str(xattr.getxattr(filename, "ceph.file.layout")).strip("'").split():
- n = str(attr).split("=")
- cephlayout[str(n[0])] = str(n[1])
- except IOError:
- pass
- if "pool" in cephlayout.keys() and cephlayout["pool"] not in OK_POOLS:
- print("%s in wrong pool: %s" % (name, cephlayout["pool"]))
+ file_layout = extract_layout(filename)
+ if not file_layout:
+ continue
+ if dir_layout.pool != file_layout.pool:
+ print("%s in wrong pool: %s" % (name, file_layout.pool))
statinfo = os.stat(filename)
- tmploc = os.path.join(TMPDIR, name)
- print("copying {} to new location and pool {}".format(filename, tmploc))
+ tmploc = os.path.join(tmp_layout_dir, name)
+ print("copying {} to temp location {}".format(filename, tmploc))
shutil.copy2(filename, tmploc)
print("moving back on top of original")
shutil.move(tmploc, filename)
@@ 87,10 100,9 @@ def main():
total_moved += 1
total_savings += savings
print("saved {}".format(humanize.naturalsize(savings)))
- else:
- pass
print("saved space in total: {}".format(humanize.naturalsize(total_savings)))
+ os.rmdir(session_tmpdir)
if __name__ == "__main__":