From 0361908320f307a6c79786890a87f8af9d0d6966 Mon Sep 17 00:00:00 2001 From: Thomas Spurden Date: Sat, 17 Aug 2019 12:10:21 +0100 Subject: [PATCH] Add script to remove deleted photos from database --- README.md | 8 ++++---- clean-db | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) create mode 100755 clean-db diff --git a/README.md b/README.md index d278541..ca29553 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ Example usage: That should be all you need to find duplicates. But read on if you want more info. +`clean-db` will delete entries from the database which no longer exist on disk. + + ./clean-db photos/db photos/ + # Details `fingerprint-files.py` reads a list of (newline-separated) filenames from stdin @@ -53,7 +57,3 @@ Note that whilst the method is the same this implementation uses OpenCV rather than ImageMagick, so fingerprints will not be comparable. [1]: http://www.jhnc.org/findimagedupes/ - -# TODO - - - Some automated way to remove files from the db which don't exist on disc? diff --git a/clean-db b/clean-db new file mode 100755 index 0000000..7db6573 --- /dev/null +++ b/clean-db @@ -0,0 +1,25 @@ +#!/bin/bash + +placeholder=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + +if test $# -lt 2; then + echo "Usage: $0 paths..." + exit 1 +fi + +db=$1 +tmpdb=$db.tmp +shift + +script_dir=$(dirname $0) + +if test -e $tmpdb; then + echo "Looks like you have a temp db left at $tmpdb" + echo "If you are sure you do not want it then delete it" + exit 1 +fi + +{ cat "$db" 2>/dev/null; find $@ -type f -printf "$placeholder %p\n"; } | \ + sort -k2 | uniq -D -s65 | grep -v "^$placeholder " > "$tmpdb" + +mv -- "$tmpdb" "$db" -- 2.45.2