~ihabunek/triglav

5a21a522b53dd78688ee57a38d5a9a28ffa5a858 — Ivan Habunek 1 year, 11 months ago 0cfd9d2 zet-import
WIP
M .gitignore => .gitignore +3 -0
@@ 38,3 38,6 @@ npm-debug.log

# Local config
/config/*.local.exs

# Temp dir
/tmp

A feed_info.txt => feed_info.txt +2 -0
@@ 0,0 1,2 @@
feed_publisher_name,feed_publisher_url,feed_lang,feed_start_date,feed_end_date,feed_version
"Zagrebački Električni Tramvaj","http://www.zet.hr","hr",20201005,20201231,"000285"

M lib/mix/tasks/triglav/import_gtfs.ex => lib/mix/tasks/triglav/import_gtfs.ex +29 -23
@@ 1,33 1,39 @@
defmodule Mix.Tasks.Triglav.ImportGtfs do
defmodule Mix.Tasks.Triglav.ImportZet do
  use Mix.Task

  alias Triglav.Repo
  alias Triglav.Zet.Gtfs

  @shortdoc "Imports GTFS data for ZET routes"

  @impl Mix.Task
  def run(_args) do
    Application.put_env(:triglav, :repo_only, true)
    {:ok, _} = Application.ensure_all_started(:triglav)

    db_state = Triglav.DataImport.load_state()
    web_state = get_web_state()
  end

  defp get_web_state() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")
    Regex.run(~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/, html)
    |> IO.inspect(label: "matches")
  end

  defp get(url) do
    {:ok, {{'HTTP/1.1', 200, 'OK'}, _headers, body}} =
      :httpc.request(:get, {to_charlist(url), []}, [], [])

    to_string(body)
  end

  defp download(url, target) do
    {:ok, :saved_to_file} =
      :httpc.request(:get, {to_charlist(url), []}, [], stream: to_charlist(target))
    Triglav.Import.Zet.run()

    # web_filename =
    #   web_url
    #   |> URI.parse()
    #   |> Map.get(:path)
    #   |> Path.basename()
    #   |> String.replace(" ", "")

    # local_filename = Gtfs.get_feed_info() |> IO.inspect() |> Map.get(:file)

    # IO.puts(" Local data: #{local_filename}")
    # IO.puts("Remote data: #{web_filename}")

    # if web_filename != local_filename do
    #   response = IO.gets("Upgrade? [Y|n] ") |> String.downcase() |> String.trim()

    #   if response in ["", "y"] do
    #     IO.puts("Let's go.")
    #     upgrade(web_url, web_filename)
    #   else
    #     IO.puts("Not upgrading. Bye.")
    #   end
    # else
    #   IO.puts("You already have the latest data.")
    # end
  end
end

A lib/triglav/import/zet.ex => lib/triglav/import/zet.ex +107 -0
@@ 0,0 1,107 @@
defmodule Triglav.Import.Zet do
  @moduledoc """
  Imports the latest GTFS data from ZET.

  See:
  https://download.geofabrik.de/europe/croatia.html
  """

  alias Triglav.Repo

  def run() do
    url = get_download_url()

    filename =
      url
      |> URI.parse()
      |> Map.get(:path)
      |> Path.basename()
      |> String.replace(" ", "")

    temp_dir = Path.join([System.tmp_dir!(), "triglav"])
    File.mkdir_p!(temp_dir)

    IO.puts("Downloading: #{url}")
    target = Path.join([temp_dir, filename])
    # download(url, target)

    # IO.puts("Decompressing...")
    # {_, 0} = System.cmd("unzip", ["-o", target, "-d", temp_dir])

    # [header, row | _] =
    #   temp_dir
    #   |> Path.join("feed_info.txt")
    #   |> File.read!()
    #   |> String.split("\n")
    #   |> Enum.map(&String.split(&1, ","))

    # version_index = Enum.find_index(header, & &1 == "feed_version")
    # version = Enum.at(row, version_index)

    config = db_config()
     |> IO.inspect()

    # # Drop and recreate schema
    File.cd!(temp_dir)
    run_sql(config, "priv/gtfs/schema.sql")
    # run_sql("priv/gtfs/load.sql")
    # run_sql("priv/gtfs/indices.sql")
  end

  defp run_sql(config, path) do
    {_, 0} = System.cmd("psql", [
      "--host", Keyword.fetch!(config, :hostname),
      "--port", Keyword.fetch!(config, :port),
      "--username", Keyword.fetch!(config, :username),
      "--password", Keyword.fetch!(config, :password),
      "--dbname", Keyword.fetch!(config, :database),
      "<", path
    ] |> IO.inspect())
  end

  defp get_download_url() do
    html = get("https://www.zet.hr/odredbe/datoteke-u-gtfs-formatu/669")

    Regex.run(~r/https:\/\/www.zet.hr\/UserDocsImages\/[^"]+/, html)
    |> List.first()
  end

  defp get(url) do
    {:ok, {{'HTTP/1.1', 200, 'OK'}, _headers, body}} =
      :httpc.request(:get, {to_charlist(url), []}, [], [])

    to_string(body)
  end

  defp download(url, target) do
    # Encode whitespace in path
    url =
      url
      |> URI.parse()
      |> Map.update!(:path, &URI.encode(&1))
      |> URI.to_string()

    if File.exists?(target) do
      File.rm(target)
    end

    {:ok, :saved_to_file} =
      :httpc.request(:get, {to_charlist(url), []}, [], stream: to_charlist(target))

    IO.puts("Saved to: #{target}")
  end

  defp db_config() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)
    |> Ecto.Repo.Supervisor.parse_url()
  end

  defp db_name() do
    Application.fetch_env!(:triglav, Triglav.Repo)
    |> Keyword.get(:url)
    |> URI.parse()
    |> Map.get(:path)
    |> Path.basename()
  end
end

A priv/gtfs/indices.sql => priv/gtfs/indices.sql +0 -0
A priv/gtfs/load.sql => priv/gtfs/load.sql +8 -0
@@ 0,0 1,8 @@
\copy zet.agency FROM 'agency.txt' (FORMAT CSV, HEADER);
\copy zet.stops FROM 'stops.txt' (FORMAT CSV, HEADER);
\copy zet.routes FROM 'routes.txt' (FORMAT CSV, HEADER);
\copy zet.trips FROM 'trips.txt' (FORMAT CSV, HEADER);
\copy zet.stop_times FROM 'stop_times.txt' (FORMAT CSV, HEADER);
\copy zet.calendar FROM 'calendar.txt' (FORMAT CSV, HEADER);
\copy zet.calendar_dates FROM 'calendar_dates.txt' (FORMAT CSV, HEADER);
\copy zet.feed_info FROM 'feed_info.txt' (FORMAT CSV, HEADER);

A priv/gtfs/schema.sql => priv/gtfs/schema.sql +96 -0
@@ 0,0 1,96 @@
DROP SCHEMA IF EXISTS zet CASCADE;
CREATE SCHEMA zet;

CREATE TABLE zet.agency
(
  agency_id              text PRIMARY KEY,
  agency_name            text NOT NULL,
  agency_url             text NOT NULL,
  agency_timezone        text NOT NULL,
  agency_lang            text,
  agency_phone           text,
  agency_fare_url        text
);

CREATE TABLE zet.stops
(
  stop_id                text PRIMARY KEY,
  stop_code              text,
  stop_name              text,
  stop_desc              text,
  stop_lat               double precision,
  stop_lon               double precision,
  zone_id                text,
  stop_url               text,
  location_type          integer,
  parent_station         text
);

CREATE TABLE zet.routes
(
  route_id               text PRIMARY KEY,
  agency_id              text REFERENCES zet.agency(agency_id),
  route_short_name       text,
  route_long_name        text,
  route_desc             text,
  route_type             integer NOT NULL,
  route_url              text,
  route_color            text,
  route_text_color       text
);

CREATE TABLE zet.trips
(
  route_id               text NOT NULL REFERENCES zet.routes,
  service_id             text NOT NULL,
  trip_id                text NOT NULL PRIMARY KEY,
  trip_headsign          text,
  trip_short_name        text,
  direction_id           boolean,
  block_id               text,
  shape_id               text
);

CREATE TABLE zet.stop_times
(
  trip_id                text NOT NULL REFERENCES zet.trips,
  arrival_time           interval,
  departure_time         interval NOT NULL,
  stop_id                text NOT NULL REFERENCES zet.stops,
  stop_sequence          integer NOT NULL CHECK (stop_sequence >= 0),
  stop_headsign          text,
  pickup_type            integer,
  drop_off_type          integer,
  shape_dist_traveled    double precision
);

CREATE TABLE zet.calendar
(
  service_id             text PRIMARY KEY,
  monday                 boolean NOT NULL,
  tuesday                boolean NOT NULL,
  wednesday              boolean NOT NULL,
  thursday               boolean NOT NULL,
  friday                 boolean NOT NULL,
  saturday               boolean NOT NULL,
  sunday                 boolean NOT NULL,
  start_date             date NOT NULL,
  end_date               date NOT NULL
);

CREATE TABLE zet.calendar_dates
(
  service_id             text NOT NULL,
  date                   date NOT NULL,
  exception_type         integer NOT NULL
);

CREATE TABLE zet.feed_info
(
  feed_publisher_name    text NOT NULL,
  feed_publisher_url     text NOT NULL,
  feed_lang              text,
  feed_start_date        date,
  feed_end_date          date,
  feed_version           text
);

A scheduled-000-000285-2.10.2020..zip => scheduled-000-000285-2.10.2020..zip +0 -0