~tfardet/gistools

cd745222aacf4db93a4f7146ed324d5ee53bc28e — Tanguy Fardet a month ago a3774b8
Fixed French flows for admin1 + Paris/Lyon/Marseille
2 files changed, 91 insertions(+), 28 deletions(-)

M countries/FRA/fra_pop.py
M countries/FRA/fra_utils.py
M countries/FRA/fra_pop.py => countries/FRA/fra_pop.py +59 -13
@@ 7,8 7,7 @@ import os

from datetime import date, datetime
from functools import cache
from hashlib import sha1
from os.path import isfile, join
from os.path import getmtime, isfile, join
from requests import Session

import geopandas as gpd


@@ 66,6 65,11 @@ def local_higher_education(
    codereg = area.iloc[0].admin1
    area_code = area.iloc[0].geocode

    # special case for Paris
    if area_code == "75056":
        admin_level = 2
        area_code = "75"

    # base for the url request
    url = "https://data.enseignementsup-recherche.gouv.fr/api/v2/catalog/" \
          "datasets/fr-esr-sise-effectifs-d-etudiants-inscrits-esr-public/" \


@@ 78,9 82,18 @@ def local_higher_education(

    yearurl = url + f"&refine=annee:{year}"

    filehash = sha1(yearurl.encode("utf-8")).hexdigest()
    filehash = f"esr_{year}_R{codereg}"
    filename = join(_cachedir, f"{filehash}.parquet")

    if isfile(filename):
        tmstp = datetime.fromtimestamp(getmtime(filename))
        now = datetime.now()

        day_lapse = (now - tmstp).days

        if day_lapse >= 365:
            force_download = True

    if not isfile(filename) or force_download:
        tmpfile = join(_cachedir, f"{filehash}.csv")



@@ 113,7 126,17 @@ def local_higher_education(
    # filter relevant entries
    filters = None

    if admin_level >= 4:
    if area_code == "69123":
        # arrondissements de Lyon
        locs = (area_code,) + tuple(f"6938{i}" for i in range(1, 10))

        filters = [("implantation_code_commune", "in", locs)]
    elif area_code == "13055":
        # arrondissements de Marseille
        locs = (area_code,) + tuple(f"132{i:02d}" for i in range(1, 17))

        filters = [("implantation_code_commune", "in", locs)]
    elif admin_level >= 4:
        # keep only relevant entries
        filters = [("implantation_code_commune", "=", area_code)]
    elif admin_level > 1:


@@ 181,13 204,27 @@ def local_schools(
    codereg = area.iloc[0].admin1
    area_code = area.iloc[0].geocode

    # special case for Paris
    if area_code == "75056":
        admin_level = 2
        area_code = "75"

    url = "https://data.education.gouv.fr/api/explore/v2.1/catalog/" \
          "datasets/fr-en-annuaire-education/exports/geojson" \
          f"?refine=code_region:{codereg}"

    filehash = sha1(url.encode("utf-8")).hexdigest()
    filehash = f"educnat_R{codereg}"
    filename = join(_cachedir, f"{filehash}.parquet")

    if isfile(filename):
        tmstp = datetime.fromtimestamp(getmtime(filename))
        now = datetime.now()

        day_lapse = (now - tmstp).days

        if day_lapse >= 365:
            force_download = True

    if not isfile(filename) or force_download:
        tmpfile = join(_cachedir, f"{filehash}.json")



@@ 233,11 270,21 @@ def local_schools(
    # filter relevant entries
    filters = None

    if admin_level >= 4:
    if area_code == "69123":
        # arrondissements de Lyon (utilise aussi 69380 !!!)
        locs = (area_code,) + tuple(f"6938{i}" for i in range(10))

        filters = [("code_commune", "in", locs)]
    elif area_code == "13055":
        # arrondissements de Marseille (utilise aussi 13200 !!!)
        locs = (area_code,) + tuple(f"132{i:02d}" for i in range(17))

        filters = [("code_commune", "in", locs)]
    elif admin_level >= 4:
        filters = [("code_commune", "=", area_code)]
    elif admin_level > 1:
        base_adm = area.iloc[0].admin2
        adm2 = ("0"*(len(base_adm) - 2)) + base_adm
        adm2 = ("0"*(3 - len(base_adm))) + base_adm
        filters = [("code_departement", "=", adm2)]

    gdf = gpd.read_parquet(filename, filters=filters)


@@ 617,7 664,7 @@ def local_population(
    if not has_pynsee or no_pynsee:
        try:
            res_pop = _local_res_no_pynsee(
                outw, employed_pop, schools, he, instud, outstud, area)
                outw, schools, he, instud, outstud, area)

            day_pop = _local_day_no_pynsee(
                outw, employed_pop, schools, he, instud, outstud, area)


@@ 636,7 683,7 @@ def local_population(
    else:
        try:
            rplatest = _insee_latest("RP", nivgeo=nivgeo, geocode=geocode,
                                    year=year, force_download=force_download)
                                     year=year, force_download=force_download)

            ages = _get_pynsee_localdata(
                variables="SEXE-AGEQ100", dataset_version=rplatest,


@@ 659,7 706,7 @@ def local_population(
                         f"Could not compute detailed demographics: {e}.")

            res_pop = _local_res_no_pynsee(
                outw, employed_pop, schools, he, instud, outstud, area)
                outw, schools, he, instud, outstud, area)

    res: dict[str, list] = {
        "sex": [], "age": [], "group": [], "residents": [], "workforce": [],


@@ 751,7 798,6 @@ def _add_populations(

def _local_res_no_pynsee(
    outw: float,
    employed_pop: float,
    schools: pd.DataFrame,
    highed: float,
    instud: float,


@@ 791,7 837,7 @@ def _local_res_no_pynsee(
    res[(">= 65", None, "senior")] = seniors

    # resident students
    student_res = outstud - (maternelles + primaires + elementaires)
    student_res = max(outstud - (maternelles + primaires + elementaires), 0)

    # hypothesis: fraction between higher and lower education is the same for
    # schoolday and resident kids


@@ 801,7 847,7 @@ def _local_res_no_pynsee(
        frac_he = highed / (college + lycee + highed)

    res_he = frac_he * student_res
    res_teens = student_res - res_he
    res_teens = max(student_res - res_he, 0)

    # hypothesis: flat distribution
    res[("11-14", None, "teenager")] = 4/7 * res_teens

M countries/FRA/fra_utils.py => countries/FRA/fra_utils.py +32 -15
@@ 255,36 255,53 @@ def _get_population_flux(

    filters: list[tuple[tuple]] = []

    if admin_level in (2, 3):
        filters.append(((f"admin{admin_level}_source", "=", location),))
        filters.append(((f"admin{admin_level}_target", "=", location),))
    elif admin_level >= 4:
        filters.append((("CODGEO", "=", location),))
        filters.append(((destination, "=", location),))
    locs: tuple[str] = ()

    # special case for Paris
    if location == "75056":
        admin_level = 2
        location = "75"

    # get the different population fluxes
    if admin_level == 1:
        from gistools.admin import get_admin_boundaries
        dpts = get_admin_boundaries(
            "France", location, area_level=1, admin_level=2)

        incoming = 0.
        outgoing = 0.
        locs = tuple(dpts.geocode)

        for _, series in dpts.iterrows():
            inp, outp = _get_population_flux(series.geocode, ptype, 2)
        filters.append(((f"admin2_source", "in", locs),))
        filters.append(((f"admin2_target", "in", locs),))
    if admin_level in (2, 3):
        filters.append(((f"admin{admin_level}_source", "=", location),))
        filters.append(((f"admin{admin_level}_target", "=", location),))
    elif admin_level >= 4:
        # special cases for Lyon and Marseille
        if location == "69123":
            locs = (location,) + tuple(f"6938{i}" for i in range(1, 10))

            incoming += inp
            outgoing += outp
            filters.append((("CODGEO", "in", locs),))
            filters.append(((destination, "in", locs),))
        elif location == "13055":
            locs = (location,) + tuple(f"132{i:02d}" for i in range(1, 17))

        return incoming, outgoing
            filters.append((("CODGEO", "in", locs),))
            filters.append(((destination, "in", locs),))
        else:
            filters.append((("CODGEO", "=", location),))
            filters.append(((destination, "=", location),))

    df = _get_pop_flux_data(ptype, destination, col, year, number,
                            filters=tuple(filters) if filters else None)

    if admin_level in (2, 3):
    if admin_level == 1:
        outgoing = df[f"admin2_source"].isin(locs)
        incoming = df[f"admin2_target"].isin(locs)
    elif admin_level in (2, 3):
        outgoing = (df[f"admin{admin_level}_source"] == location)
        incoming = (df[f"admin{admin_level}_target"] == location)
    elif location in ("69123", "13055"):
        outgoing = df.CODGEO.isin(locs)
        incoming = df[destination].isin(locs)
    else:
        incoming = (df[destination] == location)
        outgoing = (df.CODGEO == location)