M src/straatnaam/data.clj => src/straatnaam/data.clj +10 -1
@@ 50,10 50,19 @@
SELECT * FROM " sn ".verblijfsobject_pand")])
(log/info "version activated:" sn))
+(defn- current-sn
+ "Determine scheme name of currently exposed version (which ever was
+ imported successfully last)."
+ [db]
+ (-> db
+ (sql/query "SELECT schema FROM bag_syncs WHERE ok ORDER BY finished_at DESC LIMIT 1")
+ first
+ :schema))
+
(defn version-ok?
"Returns true when version passes sanity checks."
[db sn]
- (and (sanity/count-stats-ok? db sn)
+ (and (sanity/count-stats-ok? db sn (current-sn db))
(sanity/common-addresses-available? db sn)))
(defn versions
M src/straatnaam/sanity.clj => src/straatnaam/sanity.clj +39 -43
@@ 23,68 23,64 @@
(defn ratio-vals
"Return map of ratios between values in given maps a and b. Missing
- entry in one of the maps is replaced by zero values yielding either
- 0 or (-)infinity."
+ entry in one of the maps is omitted."
[a b]
(reduce (fn [m k]
- (let [a (get a k 0)
- b (get b k 0)]
- (assoc m k (if (zero? a)
- (if (neg? b)
- Double/NEGATIVE_INFINITY
- Double/POSITIVE_INFINITY)
- (/ b a)))))
+ (let [a (get a k)
+ b (get b k)]
+ (if (and a b)
+ (assoc m k (/ b a))
+ m)))
{}
(set (concat (keys a)
(keys b)))))
-(defn count-by-object-type
- "Returns a map of object_type to count in given sn."
- [db sn]
- (reduce (fn [m {:keys [n object_type]}]
- (assoc m object_type n))
- {}
- (sql/query db [(str "SELECT COUNT(*) AS n, object_type
- FROM " sn ".bag
- GROUP BY object_type")])))
+;; Values based on import of 2023-01-08. Overridden by tests.
+(def ^:dynamic *baseline-count-by-table*
+ {"ligplaats" 17985
+ "nummeraanduiding" 12187041
+ "pand" 20845820
+ "standplaats" 54179
+ "verblijfsobject" 21684895
+ "woonplaats" 3968})
(defn- table-or-view-exists? [db name & [sn]]
(sql/with-db-metadata [md db]
(.next (.getTables md nil sn name nil))))
-(defn virgin?
- "Test if a current version is available and contains data."
- [db]
- (or (not (table-or-view-exists? db "bag" "public"))
- (-> db (sql/query "SELECT COUNT(*) AS n FROM public.bag")
- first :n
- (= 0))))
-
-;; Values based on import of 2021-09-14. Overridden by tests.
-(def ^:dynamic *baseline-count-by-object-type*
- {"ligplaats" 13000, "standplaats" 30000, "verblijfsobject" 9537000})
+(defn count-by-table
+ "Returns a map of tables to counts in given sn."
+ [db sn]
+ (reduce (fn [m table]
+ (if (table-or-view-exists? db table sn)
+ (assoc m table
+ (-> db
+ (sql/query (str "SELECT COUNT(*) AS n FROM " sn "." table))
+ first
+ :n))
+ m))
+ {}
+ (keys *baseline-count-by-table*)))
(defn count-stats-ok?
"Are the count stats of the given version sn good compared to the
- current version in public. Return true if the object_types counts
- in the bag table differs between -5% and 10% percent from the
- current version or between -10% and 20% percent from the hardcoded
- baseline values. Bad stats are logged."
- [db sn]
- (let [[pred
- baseline] [#(< 0.90 (val %) 1.20)
- (if (virgin? db)
- *baseline-count-by-object-type*
- (count-by-object-type db "public"))]
- new (count-by-object-type db sn)
- bad-ratios (remove pred (ratio-vals baseline new))]
+ current version in public. Return true if the table counts differ
+ between 0% and 20% percent from the current version or from the
+ hardcoded baseline values. Tables which are not in both schemas are
+ not considered to allow the introduction of extra imported data in
+ newer versions. Bad stats are logged."
+ [db new-sn current-sn]
+ (let [current (when current-sn (count-by-table db current-sn))
+ current (if (seq current) current *baseline-count-by-table*)
+ new (count-by-table db new-sn)
+ bad-ratios (remove #(<= 1 (val %) 1.20) (ratio-vals current new))]
(if (seq bad-ratios)
(do
(log/info "difference too big for:"
(string/join ", "
(map #(let [[k] %]
- (str k " (" (get baseline k)
- " => " (get new k 0) ")"))
+ (str k " (" (get current k)
+ " => " (get new k) ")"))
bad-ratios)))
false)
true)))
M test/straatnaam/data_test.clj => test/straatnaam/data_test.clj +7 -5
@@ 58,11 58,13 @@
(compose-fixtures
test-db/each-fixture
(fn [f]
- (binding [sanity/*baseline-count-by-object-type* {"ligplaats" 1
- "standplaats" 1
- "verblijfsobject" 3}
- sanity/*common-addresses* [["1011AB" 105
- "De Ruijterkade" "Amsterdam"]]]
+ (binding [sanity/*baseline-count-by-table* {"ligplaats" 1
+ "nummeraanduiding" 5
+ "verblijfsobject" 1
+ "standplaats" 1
+ "pand" 1
+ "woonplaats" 3}
+ sanity/*common-addresses* [["1011AB" 105 "De Ruijterkade" "Amsterdam"]]]
(let [srv (run-jetty handler {:port test-port, :join? false})]
(try
(f)