~etalab/codegouvfr-consolidate-data

76fa40c9a4a212b6b4fe80dcd9f16a87d221213c — Bastien Guerry 12 days ago 17c6372
Add CODEGOUVFR_DAYS_INTERVAL env var

Small refactoring.
3 files changed, 15 insertions(+), 19 deletions(-)

M src/core.clj
M src/hut.clj
M src/utils.clj
M src/core.clj => src/core.clj +3 -4
@@ 274,8 274,7 @@
;;; Prepare data for json generation

(def prepare-repos
  (let [repo-mapping (:repos utils/mappings)
        max-desc     utils/max-description-length]
  (let [repo-mapping (:repos utils/mappings)]
    (comp
     ;; Add is_lib if repo is also listed in libraries
     (map #(assoc % :is_lib (is-lib (:repository_url %))))


@@ 289,8 288,8 @@
     (map #(assoc % :dp (count (:libraries (:dependencies %)))))
     ;; Remap licenses
     (map #(assoc % :li (get (:licenses utils/mappings) (:li %))))
     ;; Limit description
     (map #(update % :d (fn [d] (if d (subs d 0 (min (count d) max-desc)) ""))))
     ;; Limit description - hardcode it to 200
     (map #(update % :d (fn [d] (if d (subs d 0 (min (count d) 200)) ""))))
     ;; Replace emojis
     (map #(update
            %

M src/hut.clj => src/hut.clj +1 -1
@@ 20,7 20,7 @@
          :headers conj {"content-type" "application/json"}))

(defn- query-hut-api [subdomain q]
  (Thread/sleep utils/thread-interval)
  (Thread/sleep (:thread-interval utils/env-vars))
  (let [res (try (curl/get (str "https://" subdomain ".sr.ht/query")
                           (hut-parameters q))
                 (catch Exception e

M src/utils.clj => src/utils.clj +11 -14
@@ 15,15 15,11 @@
            [hickory.select :as hs]
            [taoensso.timbre :as timbre]))

(defonce max-description-length 200)

(defonce updating-after-days 30)

(defonce thread-interval (Integer. (System/getenv "CODEGOUVFR_GET_INTERVAL")))

(defonce env-vars
  {:gh-user  (System/getenv "CODEGOUVFR_GITHUB_USER")
   :gh-token (System/getenv "CODEGOUVFR_GITHUB_ACCESS_TOKEN")})
  {:gh-user             (System/getenv "CODEGOUVFR_GITHUB_USER")
   :gh-token            (System/getenv "CODEGOUVFR_GITHUB_ACCESS_TOKEN")
   :thread-interval     (Integer. (System/getenv "CODEGOUVFR_GET_INTERVAL"))
   :updating-after-days (Integer. (System/getenv "CODEGOUVFR_DAYS_INTERVAL"))})

(defonce urls
  {:sources    "https://git.sr.ht/~etalab/codegouvfr-sources/blob/master/comptes-organismes-publics.yml"


@@ 185,14 181,15 @@
  (merge user-agent {:basic-auth [(:gh-user env-vars) (:gh-token env-vars)]}))

(defn needs-updating? [date-str]
  (if-not (string? date-str)
    true
    (t/before?
     (t/minus (t/instant date-str) (t/days (rand-int updating-after-days)))
     (t/minus (t/instant) (t/days updating-after-days)))))
  (let [delay (:updating-after-days env-vars)]
    (if-not (string? date-str)
      true
      (t/before?
       (t/minus (t/instant date-str) (t/days (rand-int delay)))
       (t/minus (t/instant) (t/days delay))))))

(defn get-contents [s]
  (Thread/sleep 1000)
  (Thread/sleep (:thread-interval env-vars))
  (let [url?    (re-find #"https://" s)
        gh-api? (and url? (re-find #"https://api.github.com" s))
        res     (try (apply