~ihabunek/triglav

ref: 0266d54b9e454b6f482eba8ed374c9c905e5857c triglav/lib/triglav/import/zet.ex -rw-r--r-- 3.7 KiB
0266d54bIvan Habunek Improve zet import 1 year, 1 month ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
defmodule Triglav.Import.Zet do
  @moduledoc """
  Imports the latest GTFS data from ZET.

  See:
  https://download.geofabrik.de/europe/croatia.html
  """

  alias Triglav.Repo
  alias Triglav.Schemas.Zet.FeedInfo

  @type options :: [option]
  @type option :: :force

  def run(opts \\ []) do
    force = :force in opts

    web_feed_archive = get_web_feed_archive()
    web_info = extract_feed_info(web_feed_archive)
    local_info = get_local_feed_info()

    web_version = String.to_integer(web_info.version)
    local_version = if local_info, do: String.to_integer(local_info.version)
    upgrade_available? = is_nil(local_info) or web_version > local_version

    IO.puts("Local version: #{local_version || "none"}")
    IO.puts("  Web version: #{web_version}")

    if force or upgrade_available? do
      IO.puts("Updating...")

      temp_dir = get_temp_dir()
      extract_archive(web_feed_archive, temp_dir)

      File.cd!(temp_dir)
      setup_db_env()
      run_sql("priv/gtfs/schema.sql")
      run_sql("priv/gtfs/load.sql")
      run_sql("priv/gtfs/indices.sql")

      File.rm_rf!(temp_dir)
    else
      IO.puts("You already have the latest data. Use :force option to import anyway.")
    end
  end

  def get_web_feed_archive() do
    url = download_url()
    IO.puts("Downloading: #{url}")

    {:ok, {{_http, 200, 'OK'}, _headers, content}} =
      :httpc.request(:get, {url, []}, [], body_format: :binary)

    content
  end

  defp extract_feed_info(archive) do
    {:ok, [{'feed_info.txt', feed_info}]} =
      :zip.extract(archive, [:memory, file_list: ['feed_info.txt']])

    parse_feed_info(feed_info)
  end

  defp extract_archive(archive, path) do
    {:ok, _file_list} = :zip.extract(archive, [:verbose, cwd: to_charlist(path)])
  end

  # Parses feed_info.txt into a map
  defp parse_feed_info(feed_info) do
    feed_info
    |> String.trim()
    |> String.split("\n")
    |> Enum.map(&String.split(&1, ","))
    |> Enum.zip()
    |> Map.new(fn {k, v} ->
      {
        String.replace_leading(k, "feed_", "") |> String.to_atom(),
        String.trim(v, "\"")
      }
    end)
  end

  defp get_local_feed_info() do
    if zet_schema_exists() do
      Repo.one(FeedInfo)
    end
  end

  defp get_temp_dir() do
    path = Path.join([System.tmp_dir!(), "triglav_tmp"])
    File.mkdir_p!(path)
    path
  end

  def get_temp_dir(path) do
    get_temp_dir() |> Path.join(path)
  end

  defp zet_schema_exists() do
    {:ok, %{rows: [[exists?]]}} =
      Repo.query("""
        SELECT EXISTS (
         SELECT FROM information_schema.tables
         WHERE table_schema = 'zet'
         AND table_name = 'feed_info'
      );
      """)

    exists?
  end

  defp run_sql(path) do
    IO.puts("Running: #{path}")
    path = Application.app_dir(:triglav, path)
    {_, 0} = System.cmd("psql", ["-f", path])
  end

  defp download_url() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")
    pattern = ~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/

    Regex.run(pattern, html)
    |> List.first()
    |> URI.parse()
    |> Map.update!(:path, &URI.encode(&1))
    |> URI.to_string()
  end

  defp get(url) do
    {:ok, {{'HTTP/1.1', 200, 'OK'}, _headers, body}} =
      :httpc.request(:get, {to_charlist(url), []}, [], [])

    to_string(body)
  end

  defp setup_db_env() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)
    |> Ecto.Repo.Supervisor.parse_url()
    |> Enum.each(fn
      {:hostname, hostname} -> System.put_env("PGHOST", hostname)
      {:database, database} -> System.put_env("PGDATABASE", database)
      {:username, username} -> System.put_env("PGUSER", username)
      {:password, password} -> System.put_env("PGPASSWORD", password)
      {:port, port} -> System.put_env("PGPORT", to_string(port))
    end)
  end
end