1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
(ns io.dominic.clj-classpath-duplicates.core
(:require [clojure.string :as string]
[clojure.edn :as edn]
[io.aviso.columns :as c]
[clojure.java.io :as io]
[clojure.tools.cli :as cli])
(:import [io.github.classgraph ClassGraph]))
(let [common-ignore-res [#"data_readers.cljc?"
#"project.clj"
#"(?i)(ABOUT|LICENSE|AUTHORS|COPYRIGHT|README)(\.(md|txt|html))?"
#"META-INF/services/.*"
#"\.keep"
#".*\.pom$" #"module-info\.class$"
#"(?i)META-INF/.*\.(?:MF|SF|RSA|DSA)"
#"(?i)META-INF/(?:INDEX\.LIST|DEPENDENCIES|NOTICE|LICENSE|LGPL-3\.0|ASL-2\.0)(?:\.txt)?"]
re (re-pattern (str "(" (string/join "|" common-ignore-res) ")"))]
(defn- common-ignore?
[path-str]
(boolean (re-matches re path-str))))
(defn- match-ignore-patterns?
[s patterns]
(boolean (some #(re-matches % s) patterns)))
(defn- duplicates
([] (duplicates []))
([ignore-patterns]
(->> (.. (ClassGraph.) scan getAllResources findDuplicatePaths)
(remove (comp common-ignore? key))
(remove #(match-ignore-patterns? (key %) ignore-patterns)))))
(let [sha1 (java.security.MessageDigest/getInstance "SHA-1")]
(defn- ck-resource
[resource]
(with-open [is (.open resource)
dis (java.security.DigestInputStream. is sha1)
os (java.io.OutputStream/nullOutputStream)]
(io/copy dis os)
(.digest (.getMessageDigest dis)))))
(defn- ba->hex
[ba]
(let [sb (StringBuilder. (* (alength ba) 2))]
(doseq [b ba] (.append sb (format "%02x" b)))
(str sb)))
(let [deps-libfile (some->> (System/getProperty "clojure.libfile")
slurp
edn/read-string
(reduce-kv
(fn [m k v]
(reduce
(fn [m p] (assoc m p k))
m
(:paths v)))
{}))]
(defn- format-resource-path
[resource]
(if-let [f (and deps-libfile (.getClasspathElementFile resource))]
(str (get deps-libfile (.getAbsolutePath f)))
(.getClasspathElementURL resource))))
(defn- print-dupes
[dupes & [{:keys [pretty-path? sha?]
:or {pretty-path? true
sha? true}}]]
(let [resource-formatter (if pretty-path? format-resource-path (memfn getClasspathElementURL))
formatter (c/format-columns
[:left (c/max-length (map key dupes))]
(when sha? " (") (when sha? [:left 40]) (when sha? ")")
" | "
[:left (c/max-length (map resource-formatter (mapcat val dupes)))])]
(c/write-rows
formatter
(if sha?
[first #(ba->hex (ck-resource (second %))) #(resource-formatter (second %))]
[first #(resource-formatter (second %))])
(for [[path resources] dupes
resource resources]
[path resource]))))
(comment
(print-dupes (duplicates)))
(def ^:private cli-options
[[nil "--ignore-re RE" "Regex to ignore"
:id :ignore-patterns
:parse-fn re-pattern
:assoc-fn (fn [opts k x] (update opts k conj x))]
[nil "--ignore STR" "Literal String to ignore"
:id :ignore-patterns
:parse-fn #(re-pattern (java.util.regex.Pattern/quote %))
:assoc-fn (fn [opts k x] (update opts k conj x))]
[nil "--full-path" "Use full path instead of coordinate where found"
:default false
:update-fn not]
[nil "--no-sha" "Hide the sha"
:default false
:update-fn not]
["-h" "--help"]])
(comment (cli/parse-opts ["--ignore" "foobar"] cli-options))
(defn -main
[& args]
(let [{:keys [summary errors]
{:keys [help ignore-patterns full-path no-sha]} :options} (cli/parse-opts args cli-options)]
(cond
help (println summary)
(seq errors) (do (println "The following errors occurred during parsing:\n\n" (string/join \newline errors))
(System/exit 255))
:else (let [dupes (duplicates ignore-patterns)]
(if (seq dupes)
(print-dupes dupes {:pretty-path? (not full-path)
:sha? (not no-sha)})
(binding [*out* *err*]
(println "No duplicates")))
(System/exit (count dupes))))))