~ihabunek/triglav

089930b8a3598d8b4c17bf98db2e79d25b46cc02 — Ivan Habunek 11 months ago 5273773 hp
Parse HP data from their web site, show on web
A lib/triglav/hp.ex => lib/triglav/hp.ex +29 -0
@@ 0,0 1,29 @@
defmodule Triglav.HP do
  alias Ecto.Multi
  alias Triglav.HP.API
  alias Triglav.HP.Mappings
  alias Triglav.HP.Schemas.PostOffice
  alias Triglav.HP.Schemas.PostOfficeMapping
  alias Triglav.Repo
  import Ecto.Query

  def import() do
    post_offices = API.fetch_post_offices!()

    Multi.new()
    |> Multi.delete_all(:delete_mappings, PostOfficeMapping)
    |> Multi.delete_all(:delete, PostOffice)
    |> Multi.insert_all(:insert, PostOffice, post_offices)
    |> Multi.run(:insert_mappings, fn _repo, _changes ->
      result = Repo.insert_all(PostOfficeMapping, Mappings.generate())
      {:ok, result}
    end)
    |> Repo.transaction()
  end

  def list_post_office_mappings() do
    from(p in PostOfficeMapping)
    |> preload([:post_office, :way, :node])
    |> Repo.all()
  end
end

A lib/triglav/hp/api.ex => lib/triglav/hp/api.ex +93 -0
@@ 0,0 1,93 @@
defmodule Triglav.HP.API do
  @moduledoc """
  Loads and parses post office data from posta.hr.
  """
  alias Triglav.Http

  @doc """
  Loads data from posta.hr, returns maps ready to be inserted as PostOffice schemas.
  """
  @spec fetch_post_offices!() :: [map()]
  def fetch_post_offices!() do
    {:ok, body} = Http.get("https://www.posta.hr/mapahp.aspx?lng=_hr")
    coordinates_map = parse_coordinates(body)

    for {index, record} <- parse_contents(body) do
      {latitude, longitude} = Map.fetch!(coordinates_map, index)
      point = %Geo.Point{coordinates: {longitude, latitude}, srid: 4326}
      Map.put(record, :geometry, point)
    end
  end

  defp parse_coordinates(body) do
    [_line, match] = Regex.run(~r/var neighborhoods = \[([^\n]+)\];/, body)

    for [_line, latlng] <- Regex.scan(~r"new google.maps.LatLng\(([^)]+)\)", match) do
      latlng
      |> String.trim()
      |> String.split(",")
      |> Enum.map(&to_float/1)
      |> List.to_tuple()
    end
    |> Enum.with_index()
    |> Map.new(fn {el, idx} -> {idx, el} end)
  end

  defp parse_contents(body) do
    Regex.scan(~r"content\[(\d+)\] = '(.+)';"U, body)
    |> Enum.filter(fn [_, _, content] -> content =~ "POŠTANSKI URED" end)
    |> Enum.map(fn [_line, index, content] ->
      pattern =
        ~r'<div class="cloud"><h1>POŠTANSKI URED<br /><br />(\d+) ([^<]+)</h1>([^<]+)<br />Tel:(.*)<br/>Fax:(.*)<br /></div>'

      [_, post_code, place, street, phone, fax] = Regex.run(pattern, content)

      # Street address ends with place, remove it
      street =
        street
        |> String.split(",")
        |> List.delete_at(-1)
        |> Enum.map(&String.trim/1)
        |> Enum.join(", ")

      {String.to_integer(index),
       %{
         post_code: post_code,
         place: normalize_place(place),
         street: street,
         phone: normalize_phone(phone),
         fax: normalize_phone(fax)
       }}
    end)
  end

  defp to_float(string) do
    {float, ""} = string |> String.trim() |> Float.parse()
    float
  end

  defp normalize_phone(string) do
    string
    |> String.trim()
    |> String.split(",")
    |> Enum.reject(&(&1 == ""))
    |> Enum.map(&String.trim/1)
    |> Enum.map(&String.replace(&1, ~r"[\s-]+", ""))
    |> Enum.map(&String.replace(&1, ~r"^0", "+385"))
  end

  def normalize_place(string) do
    string
    |> String.split("-")
    |> Enum.map(&title_case/1)
    |> Enum.join(" - ")
  end

  defp title_case(string) do
    string
    |> String.split(~r"\s+")
    |> Enum.map(&String.trim/1)
    |> Enum.map(&String.capitalize/1)
    |> Enum.join(" ")
  end
end

A lib/triglav/hp/mappings.ex => lib/triglav/hp/mappings.ex +76 -0
@@ 0,0 1,76 @@
defmodule Triglav.HP.Mappings do
  @moduledoc """
  Generates PostOfficeMapping records which link imported PostOffice records and
  OSM data.
  """
  alias Triglav.Repo
  alias Triglav.HP.Schemas.PostOffice
  alias Triglav.HP.Schemas.PostOfficeMapping
  alias Triglav.Schemas.Osmosis.Node
  alias Triglav.Schemas.Osmosis.Way
  import Ecto.Query
  import Geo.PostGIS

  @spec generate() :: [PostOfficeMapping.t()]
  def generate() do
    matched_nodes = fetch_matched_nodes()
    matched_ways = fetch_matched_ways()

    matched_ids =
      Enum.concat(
        Enum.map(matched_nodes, & &1.post_office_id),
        Enum.map(matched_ways, & &1.post_office_id)
      )

    unmatched = fetch_unmatched(matched_ids)

    [matched_nodes, matched_ways, unmatched]
    |> Enum.concat()
    |> Enum.sort_by(& &1.post_office_id)
  end

  defp fetch_matched_nodes() do
    from(p in PostOffice,
      join: n in Node,
      on:
        fragment("? -> ? = ?", n.tags, "amenity", "post_office") and
          fragment("? -> ? = ?", n.tags, "ref", p.post_code),
      select: %{
        post_office_id: p.id,
        node_id: n.id,
        way_id: nil,
        distance: st_distance_in_meters(p.geometry, n.geom)
      }
    )
    |> Repo.all()
  end

  defp fetch_matched_ways() do
    from(p in PostOffice,
      join: w in Way,
      on:
        fragment("? -> ? = ?", w.tags, "amenity", "post_office") and
          fragment("? -> ? = ?", w.tags, "ref", p.post_code),
      select: %{
        post_office_id: p.id,
        node_id: nil,
        way_id: w.id,
        distance: st_distance_in_meters(p.geometry, w.linestring)
      }
    )
    |> Repo.all()
  end

  defp fetch_unmatched(matched_ids) do
    from(p in PostOffice,
      where: p.id not in ^matched_ids,
      select: %{
        post_office_id: p.id,
        node_id: nil,
        way_id: nil,
        distance: nil
      }
    )
    |> Repo.all()
  end
end

A lib/triglav/hp/schemas/post_office.ex => lib/triglav/hp/schemas/post_office.ex +14 -0
@@ 0,0 1,14 @@
defmodule Triglav.HP.Schemas.PostOffice do
  use Ecto.Schema

  @type t() :: %__MODULE__{}

  schema "hp_post_offices" do
    field :post_code, :string
    field :street, :string
    field :place, :string
    field :phone, {:array, :string}
    field :fax, {:array, :string}
    field :geometry, Geo.PostGIS.Geometry
  end
end

A lib/triglav/hp/schemas/post_office_mapping.ex => lib/triglav/hp/schemas/post_office_mapping.ex +20 -0
@@ 0,0 1,20 @@
defmodule Triglav.HP.Schemas.PostOfficeMapping do
  @doc """
  Maps post offices imported from HP with existing OSM nodes/ways representing
  post offices (amenity=post_office). Includes the distance between them for
  validation.
  """
  alias Triglav.HP.Schemas.PostOffice
  alias Triglav.Schemas.Osmosis.Node
  alias Triglav.Schemas.Osmosis.Way
  use Ecto.Schema

  @type t() :: %__MODULE__{}

  schema "hp_post_office_mappings" do
    belongs_to :post_office, PostOffice
    belongs_to :node, Node
    belongs_to :way, Way
    field :distance, :float
  end
end

A lib/triglav_web/controllers/hp/post_offices_controller.ex => lib/triglav_web/controllers/hp/post_offices_controller.ex +11 -0
@@ 0,0 1,11 @@
defmodule TriglavWeb.HP.PostOfficesController do
  use TriglavWeb, :controller

  alias Triglav.HP

  def index(conn, _params) do
    mappings = HP.list_post_office_mappings()

    render(conn, "index.html", mappings: mappings)
  end
end

M lib/triglav_web/router.ex => lib/triglav_web/router.ex +4 -0
@@ 33,6 33,10 @@ defmodule TriglavWeb.Router do
      get "/tracks", TracksController, :index
      get "/tracks/:id", TracksController, :detail
    end

    scope "/hp", alias: HP, as: :hps do
      get "/post_offices", PostOfficesController, :index
    end
  end

  # Other scopes may use custom stacks.

A lib/triglav_web/templates/hp/post_offices/index.html.eex => lib/triglav_web/templates/hp/post_offices/index.html.eex +61 -0
@@ 0,0 1,61 @@
<style>
  .bl { border-left: 1px dotted gray }
</style>

<main role="main" class="container">
  <h1>HP Post Offices</h1>

  <p>Post offices loaded from <a href="https://www.posta.hr/interaktivna-karta-postanskih-ureda/6454">HP</a>.</p>

  <p>Post offices in OSM should ways or nodes with a ref tag containing the numerical post code to be matched to the corresponding HP post office.</p>

  <%= if length(@mappings) > 0 do %>
    <table>
      <thead>
        <tr>
          <th colspan="2">Hrvatska Pošta</th>
          <th rowspan="2" class="bl">Matched OSM node or way</th>
          <th rowspan="2" class="bl">Distance</th>
        </tr>
        <tr>
          <th>Name</th>
          <th>Address</th>
        </tr>
      </thead>
      <tbody>
        <%= for mapping <- @mappings do %>
          <tr>
            <td><%= mapping.post_office.post_code %> <%= mapping.post_office.place %></td>
            <td><%= mapping.post_office.street %></td>

            <%= if mapping.node do %>
              <td class="bl"><%= osm_link(mapping.node, tags: [], name: true) %></td>
            <% end %>

            <%= if mapping.way do %>
              <td class="bl"><%= osm_link(mapping.way, tags: [], name: true) %></td>
            <% end %>

            <%= if !mapping.node and !mapping.way do %>
              <td class="bl text-gray">Not found</td>
            <% end %>

            <%= if mapping.distance && mapping.distance <= 50 do %>
              <td><%= mapping.distance %></td>
            <% end %>

            <%= if mapping.distance && mapping.distance > 50 do %>
              <td class="error"><%= mapping.distance %></td>
            <% end %>

            <%= if !mapping.distance do %>
              <td></td>
            <% end %>
          </tr>
        <% end %>
      </tbody>
    </table>
  <% else %>
    <p>No post offices found. Run the import script.</p>
  <% end %>
</main>

A lib/triglav_web/views/hp/post_offices_view.ex => lib/triglav_web/views/hp/post_offices_view.ex +5 -0
@@ 0,0 1,5 @@
defmodule TriglavWeb.HP.PostOfficesView do
  use TriglavWeb, :view

  def title("index.html", _), do: "HP Post Offices"
end

A priv/repo/migrations/20211003151456_create_hp_tables.exs => priv/repo/migrations/20211003151456_create_hp_tables.exs +23 -0
@@ 0,0 1,23 @@
defmodule Triglav.Repo.Migrations.CreateHpTables do
  use Ecto.Migration

  def change do
    create table("hp_post_offices") do
      add :post_code, :text, null: false
      add :street, :text, null: false
      add :place, :text, null: false
      add :phone, {:array, :text}, null: false
      add :fax, {:array, :text}, null: false
      add :geometry, :geometry, null: false
    end

    create unique_index("hp_post_offices", [:post_code])

    create table("hp_post_office_mappings") do
      add :post_office_id, references("hp_post_offices"), null: false
      add :node_id, :bigint
      add :way_id, :bigint
      add :distance, :float
    end
  end
end