~tfardet/pycafe

3d1e52257c7c5c39c9c2cdcc62890194496a94df — Tanguy Fardet 7 months ago 14d54ed
Improved demographics using gistools only
3 files changed, 3 insertions(+), 309 deletions(-)

M _deposits.py
M utils/__init__.py
D utils/population.py
M _deposits.py => _deposits.py +3 -10
@@ 20,7 20,6 @@ from orgmatt.nutrients import nutrient_from_population as nfp

from .units import *
from .utils.dataframe_tools import _deposits_to_df_args
from .utils.population import compute_popdict


om.set_logging_level("WARN")


@@ 170,16 169,10 @@ def local_deposits(
    # get demographics (kwargs is for caching in multi-area recursion)
    res_pop, day_pop = gt.demographics.get_local_population(
        country, area=area, admin_level=admin_level, metadata=metadata,
        force_download=force_download, mode="age", **kwargs)
        force_download=force_download, mode="group", fmt="dict", **kwargs)

    # get population dict
    if res_pop.population.sum() == 0:
        res_pop = boundary.population.iloc[0]

    if day_pop.population.sum() == 0:
        day_pop = 0

    popdict = compute_popdict(res_pop, day_pop)
    keys = set(res_pop).union(day_pop)
    popdict = {k: (res_pop.get(k, 0), day_pop.get(k, 0)) for k in keys}

    deposits = deposits_from_population(
        popdict, days, ci=ci, force_download=force_download, **kwargs)

M utils/__init__.py => utils/__init__.py +0 -2
@@ 2,5 2,3 @@
# SPDX-FileCopyrightText: 2022 Tanguy Fardet
# SPDX-License-Identifier: AGPL-3.0-or-later
# utils/__init__.py

from .population import compute_popdict

D utils/population.py => utils/population.py +0 -297
@@ 1,297 0,0 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2022 Tanguy Fardet
# SPDX-License-Identifier: AGPL-3.0-or-later
# utils/population.py

from typing import Union

import numpy as np
import pandas as pd


def compute_popdict(res_pop: Union[float, pd.DataFrame],
                    day_pop: Union[float, pd.DataFrame]):
    '''
    Return a population dict for deposit and impact computation.

    Parameters
    ----------
    res_pop : float or DataFrame
        Resident population.
    day_pop : float or DataFrame
        Dauly population.

    Returns
    -------
    popdict : dict
        Data about the resident and daily populations separated by age group
        ("adult", "senior", "teenager", "kid", "baby") and sex ("male",
        "female", or None if they are grouped together).
    '''
    popdict = {}

    sexes = set()

    res_df = isinstance(res_pop, pd.DataFrame)
    day_df = isinstance(day_pop, pd.DataFrame)

    if res_df:
        sexes = set(res_pop.sex)

    if day_df:
        sexes = sexes.union(day_pop.sex)

    if not sexes:
        sexes = {None}

    res, day = 0, 0

    for sex in sexes:
        # adult
        r = "^[2-5]\d-([2-5]\d|6[0-5])$"

        if res_df:
            res_adult = res_pop.age.str.match(r)
            res = res_pop[_with_sex(res_adult, res_pop, sex)].population.sum()
        else:
            res = res_pop / len(sexes)

        if day_df:
            day_adult = day_pop.age.str.match(r)

            day = day_pop[_with_sex(day_adult, day_pop, sex)].population.sum()
        else:
            day = day_pop / len(sexes)

        popdict[("adult", sex)] = (res, day)

        # senior
        if res_df or day_df:
            r = "^(>=? ?[6-9]\d|[6-9]\d-[6-9]\d|(>=? ?)?(1\d{2}))$"

            res = res_pop[
                _with_sex(res_pop.age.str.match(r), res_pop, sex)
            ].population.sum() if res_df else 0

            day = day_pop[
                _with_sex(day_pop.age.str.match(r), day_pop, sex)
            ].population.sum() if day_df else 0

            popdict[("senior", sex)] = (res, day)

            # teenagers
            r = "^1\d-1\d$"

            res = res_pop[
                _with_sex(res_pop.age.str.match(r), res_pop, sex)
            ].population.sum() if res_df else  0

            day = day_pop[
                _with_sex(day_pop.age.str.match(r), day_pop, sex)
            ].population.sum() if day_df else 0

            popdict[("teenager", sex)] = (res, day)

            # kids
            r = "^[3-9]-[4-9]$"

            res = res_pop[
                _with_sex(res_pop.age.str.match(r), res_pop, sex)
            ].population.sum() if res_df else 0

            day = day_pop[
                _with_sex(day_pop.age.str.match(r), day_pop, sex)
            ].population.sum() if day_df else 0

            # check babies
            if day_df and res_df:
                r = "^(<=? ?[1-9]|.+m|[0-2]-[1-9])$"
                ageset = set(res_pop[res_pop.age.str.match(r)].age)

                res_babies, day_babies = 0, 0

                for agerange in ageset:
                    stop = None
                    if "-" in agerange:
                        start, stop = agerange.split("-")
                    elif "<" in agerange:
                        stop = agerange[1:].strip("=")

                    res_age = res_pop[
                        _with_sex(res_pop.age == agerange, res_pop, sex)
                    ].population.sum()

                    day_age = day_pop[
                        _with_sex(day_pop.age == agerange, day_pop, sex)
                    ].population.sum()

                    if "m" in agerange:
                        res_babies += res_age
                        day_babies += day_age
                    else:
                        start, stop = None, None

                        if "-" in agerange:
                            start, stop = agerange.split("-")
                        elif "<" in agerange:
                            stop = agerange[1:].strip("=")

                        if stop < "3":
                            res_babies += res_age
                            day_babies += day_age
                        else:
                            ageup = int(stop)
                            agelow = 0 if start is None else int(start)
                            agerange = (ageup - agelow + 1)
                            res += (ageup - 2)/agerange*res_age
                            day += (ageup - 2)/agerange*day_age

                            res_babies += 3/agerange*res_age
                            day_babies += 3/agerange*day_age

                popdict[("baby", sex)] = (res_babies, day_babies)

            popdict[("kid", sex)] = (res, day)

    return popdict


def pop_dataframe(res_pop: Union[float, pd.DataFrame],
                  day_pop: Union[float, pd.DataFrame]):
    '''
    Return a population dict for deposit and impact computation.

    Parameters
    ----------
    res_pop : float or DataFrame
        Resident population.
    day_pop : float or DataFrame
        Dauly population.

    Returns
    -------
    popdict : dict
        Data about the resident and daily populations separated by age group
        ("adult", "senior", "teenager", "kid", "baby") and sex ("male",
        "female", or None if they are grouped together).
    '''
    df = {"resident": [], "active": [], "sex": [], "group": [], "geocode": []}

    if isinstance(res_pop, pd.DataFrame):
        sexes = set(res_pop.sex).union(day_pop.sex)

        for sex in sexes:
            # adult
            r = "^[2-5]\d-([2-5]\d|6[0-5])$"
            res_adult = res_pop.age.str.match(r)
            day_adult = day_pop.age.str.match(r)

            res = res_pop[_with_sex(res_adult, res_pop, sex)].population.sum()
            day = day_pop[_with_sex(day_adult, day_pop, sex)].population.sum()

            df["resident"].append(res)
            df["active"].append(day)

            # senior
            r = "^(>=? ?[6-9]\d|[6-9]\d-[6-9]\d|(>=? ?)?(1\d{2}))$"
            res = res_pop[
                _with_sex(res_pop.age.str.match(r), res_pop, sex)
            ].population.sum()

            day = day_pop[
                _with_sex(day_pop.age.str.match(r), day_pop, sex)
            ].population.sum()

            df["resident"].append(res)
            df["active"].append(day)

            # teenagers
            r = "^1\d-1\d$"
            res_teen = res_pop.age.str.match(r)
            day_teen = day_pop.age.str.match(r)

            res = res_pop[_with_sex(res_teen, res_pop, sex)].population.sum()
            day = day_pop[_with_sex(day_teen, day_pop, sex)].population.sum()

            df["resident"].append(res)
            df["active"].append(day)

            # kids
            r = "^[3-9]-[4-9]$"
            res_kid = res_pop.age.str.match(r)
            day_kid = day_pop.age.str.match(r)

            res = res_pop[_with_sex(res_kid, res_pop, sex)].population.sum()
            day = day_pop[_with_sex(day_kid, day_pop, sex)].population.sum()

            # check babies
            r = "^(<=? ?[1-9]|.+m|[0-2]-[1-9])$"
            ageset = set(res_pop[res_pop.age.str.match(r)].age)

            res_babies, day_babies = 0, 0

            for agerange in ageset:
                stop = None
                if "-" in agerange:
                    start, stop = agerange.split("-")
                elif "<" in agerange:
                    stop = agerange[1:].strip("=")

                res_age = res_pop[
                    _with_sex(res_pop.age == agerange, res_pop, sex)
                ].population.sum()

                day_age = day_pop[
                    _with_sex(day_pop.age == agerange, day_pop, sex)
                ].population.sum()

                if "m" in agerange:
                    res_babies += res_age
                    day_babies += day_age
                else:
                    start, stop = None, None

                    if "-" in agerange:
                        start, stop = agerange.split("-")
                    elif "<" in agerange:
                        stop = agerange[1:].strip("=")

                    if stop < "3":
                        res_babies += res_age
                        day_babies += day_age
                    else:
                        ageup = int(stop)
                        agelow = 0 if start is None else int(start)
                        agerange = (ageup - agelow + 1)
                        res += (ageup - 2)/agerange*res_age
                        day += (ageup - 2)/agerange*day_age

                        res_babies += 3/agerange*res_age
                        day_babies += 3/agerange*day_age

            df["resident"].append(res)
            df["active"].append(day)

            df["resident"].append(res_babies)
            df["active"].append(day_babies)

            # update group and sex
            df["group"].extend(["adult", "senior", "teenager", "kid", "baby"])
            df["sex"].extend([sex]*5)

        return pd.DataFrame(df)
    elif isinstance(res_pop, (float, np.integer)):
        # without details, consider full adult population
        return pd.DataFrame({
            "group": ["adult"], "sex": [None], "resident": [res_pop],
            "active": [day_pop]
        })

    raise ValueError(f"Invalid population type: '{type(res_pop)}'.")


def _with_sex(condition, df, sex):
    if sex is None:
        return condition

    return condition & (df.sex == sex)