~gpanders/pushbroom

ref: 68d19f75979d2060a9ee13b2ba4dd5098d7628bd pushbroom/src/pushbroom/sweep.py -rw-r--r-- 2.0 KiB
68d19f75Greg Anders Use pathlib for path operations 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import logging
import os
import re
import time
from pathlib import Path

SECONDS_PER_DAY = 24 * 60 * 60


def delete(path, shred):
    """Delete the file at the given path.

    If ``shred`` is True, first write over the file with random data before deleting.
    """
    if shred:
        with open(path, "ba+") as fil:
            length = fil.tell()
            fil.seek(0)
            fil.write(os.urandom(length))

    Path(path).unlink()


def sweep(name, path, num_days, ignore, match, trash, dry_run, shred):
    # pylint: disable = too-many-arguments
    """Remove old files from a directory

    :name:     Name of the section being cleaned
    :path:     Path to remove files from
    :num_days: Remove files older than this many days
    :ignore:   Regular expression pattern of paths to ignore
    :match:    Regular expression pattern of paths to remove
    :trash:    If set, move files to this directory instead of deleting them
    :dry_run:  Only show what would happen without actually doing anything
    :shred:    Securely delete file data before removing

    """
    logging.info("Sweeping %s", name)
    num_seconds = num_days * SECONDS_PER_DAY
    thresh = time.time() - num_seconds
    for root, dirs, files in os.walk(path):
        dirs[:] = [d for d in dirs if re.match(match, d) and not re.match(ignore, d)]
        files = [f for f in files if re.match(match, f) and not re.match(ignore, f)]
        for file in files:
            fpath = Path(root).joinpath(file)
            if not fpath.exists():
                continue

            if fpath.stat().st_mtime >= thresh:
                continue

            if trash:
                logging.info("Moving %s to %s", fpath, trash)
                if not dry_run:
                    fpath.rename(Path(trash).joinpath(fpath.name))
            else:
                if shred:
                    logging.info("Securely deleting %s", fpath)
                else:
                    logging.info("Deleting %s", fpath)

                if not dry_run:
                    delete(fpath, shred)