~ihabunek/triglav

ref: 26f2c3b921aea5c0e33d1f7c4320e9e6ceb08ea0 triglav/lib/triglav/import/zet.ex -rw-r--r-- 3.8 KiB
26f2c3b9Ivan Habunek Calculate and store derived data for public transport 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
defmodule Triglav.Import.Zet do
  @moduledoc """
  Imports the latest GTFS data from ZET.

  See:
  https://download.geofabrik.de/europe/croatia.html
  """

  alias Triglav.Repo
  alias Triglav.Schemas.Zet.FeedInfo

  @type options :: [option]
  @type option :: :force

  def run(opts \\ []) do
    force = Keyword.get(opts, :force, false)

    web_feed_archive = get_web_feed_archive()
    web_info = extract_feed_info(web_feed_archive)
    local_info = get_local_feed_info()

    web_version = String.to_integer(web_info.version)
    local_version = if local_info, do: String.to_integer(local_info.version)
    upgrade_available? = is_nil(local_info) or web_version > local_version

    IO.puts("Local version: #{local_version || "none"}")
    IO.puts("  Web version: #{web_version}")

    if force or upgrade_available? do
      IO.puts("Updating...")

      temp_dir = get_temp_dir()
      extract_archive(web_feed_archive, temp_dir)

      File.cd!(temp_dir)
      setup_db_env()
      run_sql("priv/gtfs/schema.sql")
      run_sql("priv/gtfs/load.sql")
      run_sql("priv/gtfs/transform.sql")

      File.rm_rf!(temp_dir)
    else
      IO.puts("You already have the latest data. Use :force option to import anyway.")
    end
  end

  def get_web_feed_archive() do
    url = download_url()
    IO.puts("Downloading: #{url}")

    {:ok, {{_http, 200, 'OK'}, _headers, content}} =
      :httpc.request(:get, {url, []}, [], body_format: :binary)

    content
  end

  defp extract_feed_info(archive) do
    {:ok, [{'feed_info.txt', feed_info}]} =
      :zip.extract(archive, [:memory, file_list: ['feed_info.txt']])

    parse_feed_info(feed_info)
  end

  defp extract_archive(archive, path) do
    {:ok, _file_list} = :zip.extract(archive, [:verbose, cwd: to_charlist(path)])
  end

  # Parses feed_info.txt into a map
  defp parse_feed_info(feed_info) do
    feed_info
    |> String.trim()
    |> String.split("\n")
    |> Enum.map(&String.split(&1, ","))
    |> Enum.zip()
    |> Map.new(fn {k, v} ->
      {
        String.replace_leading(k, "feed_", "") |> String.to_atom(),
        String.trim(v, "\"")
      }
    end)
  end

  def get_local_feed_info() do
    if zet_schema_exists() do
      Repo.one(FeedInfo)
    end
  end

  defp get_temp_dir() do
    path = Path.join([System.tmp_dir!(), "triglav_tmp"])
    File.mkdir_p!(path)
    path
  end

  def get_temp_dir(path) do
    get_temp_dir() |> Path.join(path)
  end

  defp zet_schema_exists() do
    {:ok, %{rows: [[exists?]]}} =
      Repo.query("""
        SELECT EXISTS (
         SELECT FROM information_schema.tables
         WHERE table_schema = 'zet'
         AND table_name = 'feed_info'
      );
      """)

    exists?
  end

  defp run_sql(path) do
    IO.puts("Running: #{path}")
    path = Application.app_dir(:triglav, path)
    {_, 0} = System.cmd("psql", ["-f", path])
  end

  defp download_url() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")
    pattern = ~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/

    Regex.run(pattern, html)
    |> List.first()
    |> URI.parse()
    |> Map.update!(:path, &URI.encode(&1))
    |> URI.to_string()
  end

  defp get(url) do
    {:ok, {{'HTTP/1.1', 200, 'OK'}, _headers, body}} =
      :httpc.request(:get, {to_charlist(url), []}, [], [])

    to_string(body)
  end

  defp setup_db_env() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)
    |> Ecto.Repo.Supervisor.parse_url()
    |> Enum.each(fn
      {:hostname, hostname} -> System.put_env("PGHOST", hostname)
      {:database, database} -> System.put_env("PGDATABASE", database)
      {:username, username} -> System.put_env("PGUSER", username)
      {:password, password} -> System.put_env("PGPASSWORD", password)
      {:port, port} -> System.put_env("PGPORT", to_string(port))
    end)
  end
end