~ihabunek/triglav

ref: 5725e4080922882af88029a2e7dffdfc2f1b9911 triglav/lib/triglav/import/zet.ex -rw-r--r-- 4.2 KiB
5725e408Ivan Habunek Validate and regenerate derived data after update 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
defmodule Triglav.Import.Zet do
  @moduledoc """
  Imports the latest GTFS data from ZET.

  See:
  https://download.geofabrik.de/europe/croatia.html
  """

  alias Triglav.Repo
  alias Triglav.Schemas.Zet.FeedInfo

  require Logger

  @type option :: {:force, boolean()}
  @type options :: [option]

  @spec run(options) ::
          {:ok, :no_update_available} | {:ok, {:updated_to, FeedInfo.t()}} | {:error, term}
  def run(opts \\ []) do
    force = Keyword.get(opts, :force, false)

    web_feed_archive = get_web_feed_archive()
    web_info = extract_feed_info(web_feed_archive)
    local_info = get_local_feed_info()

    web_version = String.to_integer(web_info.version)
    local_version = if local_info, do: String.to_integer(local_info.version)
    upgrade_available? = is_nil(local_info) or web_version > local_version

    Logger.info("Local version: #{local_version || "none"}")
    Logger.info("  Web version: #{web_version}")

    if force or upgrade_available? do
      Logger.info("Updating...")

      # TODO: better error handling:
      # - make functions return {:ok, _} or {:error, _}
      # - use Ecto.Multi to run every step in a transaction
      # - email on error?

      temp_dir = get_temp_dir()
      extract_archive(web_feed_archive, temp_dir)

      File.cd!(temp_dir)
      setup_db_env()
      run_sql("priv/gtfs/schema.sql")
      run_sql("priv/gtfs/load.sql")
      run_sql("priv/gtfs/transform.sql")

      File.rm_rf!(temp_dir)

      {:ok, {:updated_to, web_info}}
    else
      Logger.info("You already have the latest data. Use :force option to import anyway.")

      {:ok, :no_update_available}
    end
  end

  def get_web_feed_archive() do
    url = download_url()
    Logger.info("Downloading: #{url}")

    {:ok, {{_http, 200, 'OK'}, _headers, content}} =
      :httpc.request(:get, {url, []}, [], body_format: :binary)

    content
  end

  defp extract_feed_info(archive) do
    {:ok, [{'feed_info.txt', feed_info}]} =
      :zip.extract(archive, [:memory, file_list: ['feed_info.txt']])

    parse_feed_info(feed_info)
  end

  defp extract_archive(archive, path) do
    {:ok, _file_list} = :zip.extract(archive, [:verbose, cwd: to_charlist(path)])
  end

  # Parses feed_info.txt into a map
  defp parse_feed_info(feed_info) do
    feed_info
    |> String.trim()
    |> String.split("\n")
    |> Enum.map(&String.split(&1, ","))
    |> Enum.zip()
    |> Map.new(fn {k, v} ->
      {
        String.replace_leading(k, "feed_", "") |> String.to_atom(),
        String.trim(v, "\"")
      }
    end)
  end

  def get_local_feed_info() do
    if zet_schema_exists() do
      Repo.one(FeedInfo)
    end
  end

  defp get_temp_dir() do
    path = Path.join([System.tmp_dir!(), "triglav_tmp"])
    File.mkdir_p!(path)
    path
  end

  def get_temp_dir(path) do
    get_temp_dir() |> Path.join(path)
  end

  defp zet_schema_exists() do
    {:ok, %{rows: [[exists?]]}} =
      Repo.query("""
        SELECT EXISTS (
         SELECT FROM information_schema.tables
         WHERE table_schema = 'zet'
         AND table_name = 'feed_info'
      );
      """)

    exists?
  end

  defp run_sql(path) do
    Logger.info("Running: #{path}")
    path = Application.app_dir(:triglav, path)
    {_, 0} = System.cmd("psql", ["-f", path])
  end

  defp download_url() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")
    pattern = ~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/

    Regex.run(pattern, html)
    |> List.first()
    |> URI.parse()
    |> Map.update!(:path, &URI.encode(&1))
    |> URI.to_string()
  end

  defp get(url) do
    {:ok, {{'HTTP/1.1', 200, 'OK'}, _headers, body}} =
      :httpc.request(:get, {to_charlist(url), []}, [], [])

    to_string(body)
  end

  defp setup_db_env() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)
    |> Ecto.Repo.Supervisor.parse_url()
    |> Enum.each(fn
      {:hostname, hostname} -> System.put_env("PGHOST", hostname)
      {:database, database} -> System.put_env("PGDATABASE", database)
      {:username, username} -> System.put_env("PGUSER", username)
      {:password, password} -> System.put_env("PGPASSWORD", password)
      {:port, port} -> System.put_env("PGPORT", to_string(port))
    end)
  end
end