~ihabunek/triglav

0266d54b9e454b6f482eba8ed374c9c905e5857c — Ivan Habunek 1 year, 3 days ago 806eea2
Improve zet import
3 files changed, 88 insertions(+), 66 deletions(-)

M lib/triglav/import/osm.ex
M lib/triglav/import/zet.ex
M lib/triglav/release.ex
M lib/triglav/import/osm.ex => lib/triglav/import/osm.ex +13 -9
@@ 41,14 41,15 @@ defmodule Triglav.Import.Osm do
        |> Map.fetch!(:path)
        |> String.trim_leading("/")

      {_, 0} = System.cmd("osm2pgsql", [
        "--hstore-all",
        "--create",
        "--slim",
        "--database",
        database,
        "croatia-latest.osm.pbf"
      ])
      {_, 0} =
        System.cmd("osm2pgsql", [
          "--hstore-all",
          "--create",
          "--slim",
          "--database",
          database,
          "croatia-latest.osm.pbf"
        ])

      Repo.query("ALTER TABLE planet_osm_rels ADD COLUMN tags_hstore hstore")
      Repo.query("ALTER TABLE planet_osm_rels ADD COLUMN type text")


@@ 58,7 59,10 @@ defmodule Triglav.Import.Osm do
      Repo.query("UPDATE planet_osm_rels SET tags_hstore = tags::hstore")
      Repo.query("UPDATE planet_osm_rels SET type = tags_hstore->'type'")
      Repo.query("UPDATE planet_osm_rels SET ref = tags_hstore->'ref'")
      Repo.query("UPDATE planet_osm_rels SET is_zet = lower(tags_hstore->'operator') in ('zet', 'zagrebački električni tramvaj')")

      Repo.query(
        "UPDATE planet_osm_rels SET is_zet = lower(tags_hstore->'operator') in ('zet', 'zagrebački električni tramvaj')"
      )

      Repo.query("CREATE INDEX idx_planet_osm_rels_is_zet ON planet_osm_rels(is_zet)")
      Repo.query("CREATE INDEX idx_planet_osm_rels_type ON planet_osm_rels(type)")

M lib/triglav/import/zet.ex => lib/triglav/import/zet.ex +73 -55
@@ 9,61 9,93 @@ defmodule Triglav.Import.Zet do
  alias Triglav.Repo
  alias Triglav.Schemas.Zet.FeedInfo

  def run(force \\ false) do
    url = get_download_url()
  @type options :: [option]
  @type option :: :force

    filename =
      url
      |> URI.parse()
      |> Map.get(:path)
      |> Path.basename()
      |> String.replace(" ", "")
  def run(opts \\ []) do
    force = :force in opts

    temp_dir = Path.join([System.tmp_dir!(), "triglav"])
    File.mkdir_p!(temp_dir)
    web_feed_archive = get_web_feed_archive()
    web_info = extract_feed_info(web_feed_archive)
    local_info = get_local_feed_info()

    IO.puts("Downloading: #{url}")
    target = Path.join([temp_dir, filename])
    download(url, target)

    IO.puts("Decompressing...")
    {_, 0} = System.cmd("unzip", ["-o", target, "-d", temp_dir])

    [header, row | _] =
      temp_dir
      |> Path.join("feed_info.txt")
      |> File.read!()
      |> String.split("\n")
      |> Enum.map(&String.split(&1, ","))

    version_index = Enum.find_index(header, &(&1 == "feed_version"))
    web_version = Enum.at(row, version_index) |> String.trim("\"")
    local_version = get_local_version()
    web_version = String.to_integer(web_info.version)
    local_version = if local_info, do: String.to_integer(local_info.version)
    upgrade_available? = is_nil(local_info) or web_version > local_version

    IO.puts("Local version: #{local_version}")
    IO.puts("Local version: #{local_version || "none"}")
    IO.puts("  Web version: #{web_version}")

    if force or is_nil(local_version) or
         String.to_integer(web_version) > String.to_integer(local_version) do
    if force or upgrade_available? do
      IO.puts("Updating...")

      temp_dir = get_temp_dir()
      extract_archive(web_feed_archive, temp_dir)

      File.cd!(temp_dir)
      setup_db_env()
      run_sql("priv/gtfs/schema.sql")
      run_sql("priv/gtfs/load.sql")
      run_sql("priv/gtfs/indices.sql")

      File.rm_rf!(temp_dir)
    else
      IO.puts("You already have the latest data. Use --force option to import anyway.")
      IO.puts("You already have the latest data. Use :force option to import anyway.")
    end
  end

  defp get_local_version() do
  def get_web_feed_archive() do
    url = download_url()
    IO.puts("Downloading: #{url}")

    {:ok, {{_http, 200, 'OK'}, _headers, content}} =
      :httpc.request(:get, {url, []}, [], body_format: :binary)

    content
  end

  defp extract_feed_info(archive) do
    {:ok, [{'feed_info.txt', feed_info}]} =
      :zip.extract(archive, [:memory, file_list: ['feed_info.txt']])

    parse_feed_info(feed_info)
  end

  defp extract_archive(archive, path) do
    {:ok, _file_list} = :zip.extract(archive, [:verbose, cwd: to_charlist(path)])
  end

  # Parses feed_info.txt into a map
  defp parse_feed_info(feed_info) do
    feed_info
    |> String.trim()
    |> String.split("\n")
    |> Enum.map(&String.split(&1, ","))
    |> Enum.zip()
    |> Map.new(fn {k, v} ->
      {
        String.replace_leading(k, "feed_", "") |> String.to_atom(),
        String.trim(v, "\"")
      }
    end)
  end

  defp get_local_feed_info() do
    if zet_schema_exists() do
      FeedInfo
      |> Repo.one()
      |> Map.get(:feed_version)
      Repo.one(FeedInfo)
    end
  end

  defp get_temp_dir() do
    path = Path.join([System.tmp_dir!(), "triglav_tmp"])
    File.mkdir_p!(path)
    path
  end

  def get_temp_dir(path) do
    get_temp_dir() |> Path.join(path)
  end

  defp zet_schema_exists() do
    {:ok, %{rows: [[exists?]]}} =
      Repo.query("""


@@ 83,11 115,15 @@ defmodule Triglav.Import.Zet do
    {_, 0} = System.cmd("psql", ["-f", path])
  end

  defp get_download_url() do
  defp download_url() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")
    pattern = ~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/

    Regex.run(~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/, html)
    Regex.run(pattern, html)
    |> List.first()
    |> URI.parse()
    |> Map.update!(:path, &URI.encode(&1))
    |> URI.to_string()
  end

  defp get(url) do


@@ 97,24 133,6 @@ defmodule Triglav.Import.Zet do
    to_string(body)
  end

  defp download(url, target) do
    # Encode whitespace in path
    url =
      url
      |> URI.parse()
      |> Map.update!(:path, &URI.encode(&1))
      |> URI.to_string()

    if File.exists?(target) do
      File.rm(target)
    end

    {:ok, :saved_to_file} =
      :httpc.request(:get, {to_charlist(url), []}, [], stream: to_charlist(target))

    IO.puts("Saved to: #{target}")
  end

  defp setup_db_env() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)

M lib/triglav/release.ex => lib/triglav/release.ex +2 -2
@@ 19,9 19,9 @@ defmodule Triglav.Release do
    Triglav.Import.Osm.run(force)
  end

  def import_zet(force \\ false) do
  def import_zet(opts \\ []) do
    start_repo()
    Triglav.Import.Zet.run(force)
    Triglav.Import.Zet.run(opts)
  end

  defp repos do