~ihabunek/triglav

4241f6f4dfcb9eaefbd7e45dc1433a33563a877a — Ivan Habunek 23 days ago 6b60529 duckdb
wip
1 files changed, 50 insertions(+), 19 deletions(-)

M lib/triglav/gtfs/duck_db_import.ex
M lib/triglav/gtfs/duck_db_import.ex => lib/triglav/gtfs/duck_db_import.ex +50 -19
@@ 5,24 5,24 @@ defmodule Triglav.Gtfs.DuckDbImport do

  def go do
    dirs = [
      "/home/ihabunek/projects/ihabunek/triglav/tmp/zet/322",
      "/home/ihabunek/projects/ihabunek/triglav/tmp/osijek",
      "/home/ihabunek/projects/ihabunek/triglav/tmp/jadrolinija",
      "/home/ihabunek/projects/ihabunek/triglav/tmp/hzpp",
      "/home/ihabunek/projects/ihabunek/triglav/tmp/autotrolej"
      "/home/ihabunek/projects/ihabunek/triglav/tmp/zet/322"
      # "/home/ihabunek/projects/ihabunek/triglav/tmp/osijek",
      # "/home/ihabunek/projects/ihabunek/triglav/tmp/jadrolinija",
      # "/home/ihabunek/projects/ihabunek/triglav/tmp/hzpp",
      # "/home/ihabunek/projects/ihabunek/triglav/tmp/autotrolej"
    ]

    for dir <- dirs do
      parse_platforms(%{
        agency: "#{dir}/agency.txt",
        stops: "#{dir}/stops.txt",
        routes: "#{dir}/routes.txt",
        trips: "#{dir}/trips.txt",
        stop_times: "#{dir}/stop_times.txt"
      })
      |> Enum.take(10)
      |> IO.inspect(width: 200)
    end
    # for dir <- dirs do
    #   parse_platforms(%{
    #     agency: "#{dir}/agency.txt",
    #     stops: "#{dir}/stops.txt",
    #     routes: "#{dir}/routes.txt",
    #     trips: "#{dir}/trips.txt",
    #     stop_times: "#{dir}/stop_times.txt"
    #   })
    #   |> Enum.take(50)
    #   |> IO.inspect(label: "platforms", width: 200)
    # end

    # for dir <- dirs do
    #   parse_routes(%{


@@ 32,10 32,22 @@ defmodule Triglav.Gtfs.DuckDbImport do
    #     trips: "#{dir}/trips.txt",
    #     stop_times: "#{dir}/stop_times.txt"
    #   })
    #   |> Enum.take(10)
    #   |> IO.inspect(width: 200)
    #   |> Enum.take(5)
    #   |> IO.inspect(label: "routes", width: 200)
    # end

    for dir <- dirs do
      parse_trips(%{
        agency: "#{dir}/agency.txt",
        stops: "#{dir}/stops.txt",
        routes: "#{dir}/routes.txt",
        trips: "#{dir}/trips.txt",
        stop_times: "#{dir}/stop_times.txt"
      })
      |> Enum.take(5)
      |> IO.inspect(label: "trips", width: 200)
    end

    nil
  end



@@ 56,12 68,31 @@ defmodule Triglav.Gtfs.DuckDbImport do
  def parse_platforms(files) do
    duckdb_query!("""
      SELECT stop_id as gtfs_id,
             coalesce(parent_station) as parent_gtfs_id,
             stop_name as name,
             stop_lat as latitude,
             stop_lon as longitude
      FROM read_csv('#{files.stops}');
    """)
    |> Enum.map(fn row ->
      row
      |> Map.put(:geometry, %Geo.Point{coordinates: {row.longitude, row.latitude}, srid: 4326})
      |> Map.delete(:longitude)
      |> Map.delete(:latitude)
    end)
  end

  def parse_trips(files) do
    duckdb_query!("""
      WITH trips AS (
          SELECT t.trip_id, t.route_id, t.direction_id, list(stop_id) AS stops
          FROM read_csv('#{files.stop_times}', types={'stop_id': 'VARCHAR'}) AS st
          JOIN read_csv('#{files.trips}') AS t ON t.trip_id = st.trip_id
          GROUP BY 1, 2, 3
      )
      SELECT route_id, direction_id, stops, count(*) AS count from trips
      GROUP BY 1, 2, 3
      ORDER BY 1 ASC, 2 ASC, 3 DESC;
    """)
  end

  defp duckdb_query!(query) do