~tfardet/pycafe

57c78dc690d6517916fdfcc110a965da2a502f9b — Tanguy Fardet 6 months ago f935336
Support gistools 0.2.0
8 files changed, 135 insertions(+), 119 deletions(-)

M README.md
M _deposits.py
M impacts/water.py
M plot/gisplots.py
M plot/impacts.py
M plot/local_deposits.py
M units.py
M utils/language.py
M README.md => README.md +1 -1
@@ 13,8 13,8 @@ The main license is AGPL 3.0+.
Requires:

* Python >= 3.9
* [gistools](https://git.sr.ht/~tfardet/gistools) >= 0.2.0
* [OrgMatt](https://orgmatt.readthedocs.io)
* [gistools](https://git.sr.ht/~tfardet/gistools)
* [geopandas](https://geopandas.org)
* [pint](https://pint.readthedocs.io) and [pint-pandas](https://github.com/hgrecco/pint-pandas)
* mapclassify (optional)

M _deposits.py => _deposits.py +65 -71
@@ 3,9 3,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# _deposits.py

import sys

from collections.abc import Iterable
from typing import Container, Literal, Sequence

import geopandas as gpd


@@ 18,6 15,7 @@ import pint_pandas

from geopandas import GeoDataFrame
from orgmatt.nutrients import nutrient_from_population as nfp
from pandas import DataFrame

from .units import *
from .utils.dataframe_tools import _deposits_to_df_args


@@ 40,12 38,13 @@ Nutrients = Literal["N", "P", "K"] | Sequence[Literal["N", "P", "K"]]
def local_deposits(
    country: str,
    area: GeoDataFrame,
    period: str,
    days: dict[str, int],
    admin_level: int | None = None,
    ci: int = 0,
    force_download : bool = False,
    with_tourists: bool = True,
    force_download: bool = False,
    **kwargs
) -> tuple[gpd.GeoDataFrame, dict, pd.DataFrame]:
) -> tuple[GeoDataFrame, DataFrame, DataFrame]:
    '''
    Compute the whole deposit generated over a certain period of time in a
    given area.


@@ 54,17 53,20 @@ def local_deposits(
    ----------
    country : str
        Name or ISO code of the country.
    area : str or list of str
        Name(s) or code(s) for the area(s) of interest (must be administrative
        areas of level 0 to 5).
    area : GeoDataFrame
        Area(s) of interest (must be administrative areas of level 0 to 5),
        following the :func:`~gistools.admin.get_admin_boundaries` format.
    period : str
        Period over which the deposits are calculated, either "year", a season
        ("winter", "spring", "summer", or "fall"/"autumn") or a month.
    days : {"working": days (int), "free": (int)}
        Number of days spent at work or at home (hypothesis for "free" period).
        An additional "studying" entry can be added for students, if not, the
        "working" value will be used.
    admin_level : int, optional
        Administrative level of `area`.
    ci : int (default: 0)
        Confidence interval.
    with_tourists : bool, optional (default: True)
        Include tourists in the population calculation.
    force_download : bool (default: False)
        Whether to download everything anew or use the cached data (default).
    **kwargs : dict


@@ 74,16 76,16 @@ def local_deposits(
    -------
    boundary : GeoDataFrame
        Delimitation of `area`.
    popdict : dict
        Data about the resident and daily populations separated by age group
        ("adult", "senior", "teenager", "kid") and sex ("male", "female", or
        None if they are grouped together).
    population : DataFrame
        Data about the resident, daily, and tourist populations separated by
        age group ("adult", "senior", "teenager", "kid") and sex
        ("male", "female", or None if they are grouped together).
    deposits : DataFrame
        Computed deposits, with the following columns:

        - "compound", either a nutrient or a type of organic matter
        - "source", the origin of the comound, e.g. "urine", or "biowaste",
        - "amount", the quantity present in the local desposit for a given
        - "source", the origin of the compound, e.g. "urine", or "biowaste",
        - "amount", the quantity present in the local deposit for a given
          compound and source,
        - "unit", the unit of the "amount" value.



@@ 99,14 101,7 @@ def local_deposits(
        Fraction of food residues generated at home on working days.
    '''
    # iterative calls if multiple areas
    multiarea = True

    print(area)

    if isinstance(area, gpd.GeoDataFrame):
        multiarea = (len(area) > 1)

    if multiarea:
    if len(area) > 1:
        # order biggest cities first
        order = list(range(len(area)))



@@ 118,7 113,8 @@ def local_deposits(
        # configure progress bar
        pbar_name = "Computing deposits"

        boundary, popdict, deposits = gpd.GeoDataFrame(), {}, pd.DataFrame()
        boundary = gpd.GeoDataFrame()
        population, deposits = pd.DataFrame(), pd.DataFrame()

        with gt.utils.get_progress() as prog:
            # must make id manually to ensure visibility follows settings


@@ 127,64 123,43 @@ def local_deposits(
            for elt in prog.track(area, task_id=tid):
                # use cache keyword to make faster computations
                b, p, d = local_deposits(
                    country, elt, days=days, admin_level=admin_level, ci=ci,
                    force_download=force_download, cache=kwargs.get("cache", True))
                    country, elt, period=period, days=days, ci=ci,
                    force_download=force_download,
                    cache=kwargs.get("cache", True))

                geocode = b.loc[0, "geocode"]

                d["geocode"] = [geocode]*len(d)
                d.loc[:, "geocode"] = geocode
                p.loc[:, "geocode"] = geocode

                deposits = pd.concat((deposits, d), ignore_index=True)
                boundary = gpd.GeoDataFrame(
                    pd.concat((boundary, b), ignore_index=True), crs=b.crs)

                popdict[geocode] = p
                boundary = pd.concat((boundary, b), ignore_index=True)
                population = pd.concat((population, p), ignore_index=True)

        # reset gistools cache
        gt.countries._cache = {}

        return boundary, popdict, deposits
        return boundary, population, deposits

    # single-area code
    boundary = area
    metadata = {}

    if isinstance(area, str):
        meta_entries = ["population", "admin1", "admin2", "admin3", "admin4"]

        boundary = gt.admin.get_admin_boundaries(
            country, area, area_level=admin_level, metadata=metadata,
            meta_entries=meta_entries)
    else:
        geodata = area.iloc[0]
        metadata["population"] = geodata.get("population", np.NaN)

        admin_level = geodata.get("admin_level")
        metadata[f"admin{admin_level}"] = geodata.get("geocode", "")

        for i in range(1, admin_level):
            alevel = f"admin{i}"
            metadata[alevel] = geodata.get(alevel, "")

    # get demographics (kwargs is for caching in multi-area recursion)
    res_pop, day_pop = gt.demographics.get_local_population(
        country, area=area, admin_level=admin_level, metadata=metadata,
        force_download=force_download, mode="group", fmt="dict", **kwargs)

    keys = set(res_pop).union(day_pop)
    popdict = {k: (res_pop.get(k, 0), day_pop.get(k, 0)) for k in keys}
    population = gt.demographics.get_local_population(
        country, area=area, period=period, with_tourists=with_tourists,
        force_download=force_download, **kwargs)

    deposits = deposits_from_population(
        popdict, days, ci=ci, force_download=force_download, **kwargs)
        population, days, ci=ci, with_tourists=with_tourists,
        force_download=force_download, **kwargs)

    return boundary, popdict, deposits
    return area, population, deposits


def deposits_from_population(
    population: int | dict,
    population: DataFrame,
    days: dict[str, int],
    ci: int = 0,
    force_download : bool = False,
    with_tourists: bool = True,
    force_download: bool = False,
    **kwargs
) -> pd.DataFrame:
    '''


@@ 192,7 167,7 @@ def deposits_from_population(

    Parameters
    ----------
    population : int or dict
    population : DataFrame
        The population that generates the organic matter and nutrients.
    days : {"working": days (int), "free": (int)}
        Number of days spent at work or at home (hypothesis for "free" period).


@@ 200,6 175,8 @@ def deposits_from_population(
        "working" value will be used.
    ci : int (default: 0)
        Confidence interval.
    with_tourists : bool, optional (default: True)
        Include tourists in the deposit calculation.
    force_download : bool (default: False)
        Whether to download everything anew or use the cached data (default).
    **kwargs : dict


@@ 209,12 186,11 @@ def deposits_from_population(
        "compound": [], "source": [], "unit": [], "amount": [], "location": []
    }

    nutrients = []
    with_tourists &= "tourists" in population

    if isinstance(population, int):
        population = {("adult", None): (population, population)}
    nutrients = []

    locations = ("at_home", "at_work", "at_school")
    locations = ("at_home", "at_work", "at_school", "other")

    if "studying" not in days:
        days["studying"] = days.get("working", 0)


@@ 223,8 199,14 @@ def deposits_from_population(
        for location in locations:
            nut_res, om_res = None, None

            for (group, sex), (res_pop, day_pop) in population.items():
            for _, row in population.iterrows():
                # skip some combinations
                group = row.group
                sex = None if row.sex == "mixed" else row.sex
                res_pop = row.residents
                day_pop = row.workforce + row.retirees + row.students
                tourists = row.tourists if with_tourists else 0

                skip = (
                    group == "baby" or
                    (location == "at_work" and group != "adult") or


@@ 269,6 251,17 @@ def deposits_from_population(
                    omlist.append(generated_organic_matter(
                        duration, res_pop, source=source, ci=ci,
                        location="at_home", day_type="free", **kw))
                elif location == "other":
                    if tourists:
                        # tourists population is nights over the whole period
                        # so as if they each stayed 1 day
                        nutlist.append(nutrient_deposit(
                            1*day, tourists, source=source, ci=ci,
                            location="at_home", day_type="free", **kw))

                        omlist.append(generated_organic_matter(
                            1*day, tourists, source=source, ci=ci,
                            location="at_home", day_type="free", **kw))
                else:
                    # working/studying days, at work/school
                    duration = days["working"]*day


@@ 502,7 495,8 @@ def nutrient_deposit(

        for nutrient in nutrients:
            nutrient_deposits[nutrient] = frac * nfp(
                population, duration, nutrient, excreta=source, ci=ci, **kwargs)
                population, duration, nutrient, excreta=source, ci=ci,
                **kwargs)
    elif source in ("food", "biowaste"):
        waste_type = "FoodResidues" if source == "food" \
                     else "BiodegradableWaste"

M impacts/water.py => impacts/water.py +39 -14
@@ 6,13 6,20 @@
import numpy as np
import pandas as pd

from pandas import DataFrame

import orgmatt.impacts as omi

from ..units import *
from ..units import Quantity, L, check_dim


@check_dim(arglist=('duration', 1, '[time]'))
def water_consumption_flush(pop : dict, duration, **kwargs):
def water_consumption_flush(
    pop: DataFrame,
    duration: Quantity,
    ci: int = 0,
    **kwargs
) -> DataFrame:
    '''
    Boxplot for the water consumption associated to the flushes from a
    population over a certain duration.


@@ 24,6 31,8 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
        Population dictionnary returned from :func:`~pycafe.local_deposits`.
    duration : float [time]
        Time interval over which water is consumed.
    ci : int, optional (default: 90)
        Confidence interval used to generate the data.
    kwargs : dict, optional
        Additional arguments can be use to restrict the results to a subset of
        the full database. E.g. `region="Europe"` to use only values obtained


@@ 50,11 59,14 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
    from the 5th, 50th, and 95th toilet use percentiles multiplied by the
    following compination:

    - 7L for average toilet flush, 4L for average male urinal flush (at work)
    - 9L for large dual-flush toilet, 6L for large male urinal flush (at work)
    - 3L for small dual-flush toilet, 0.5L for small male urinal flush (at work)
    - 5L for small full toilet flush (only at work)
    - 12L for highest full toilet flush (only at home)
    - 9L for large dual-flush toilet
    - 7L for average toilet flush
    - 6L for large male urinal flush (at work)
    - 5L for small full toilet flush (only at work)
    - 4L for average male urinal flush (at work)
    - 3L for small dual-flush toilet
    - 0.5L for small male urinal flush (at work)
    '''
    import pycafe as pc



@@ 70,8 82,18 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
        "group": [], "percentile": []
    }

    for (group, sex), (res_pop, day_pop) in pop.items():
    delta = 0.5*(100 - ci)
    percentiles = (delta, 50, 100 - delta) if ci else None

    for _, row in pop.iterrows():
        for mode in ("avg", "low", "high", "highest"):
            sex = row.sex
            group = row.group

            tourists = getattr(row, "tourists", 0)
            res_pop = row.residents + tourists
            day_pop = row.workforce + row.students + tourists

            # compute water used at home (first, full days at home)
            uflush = 7*L
            fflush = 7*L


@@ 93,15 115,16 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
                # time spent at home
                home = omi.water_consumption_flush(
                    res_pop, home_duration, avg_flush_urine=uflush,
                    avg_flush_feces=fflush, ci=90, sex=sex, group=group,
                    avg_flush_feces=fflush, ci=ci, sex=sex, group=group,
                    auto_correct=True, **kwargs)

                # then add fraction of working days spent at home
                home += omi.water_consumption_flush(
                    res_pop, work_duration, avg_flush_urine=uflush,
                    avg_flush_feces=fflush, frac_urinations=fraction_voids_home,
                    frac_defecations=fraction_defecation_home, ci=90, sex=sex,
                    group=group, auto_correct=True, **kwargs)
                    avg_flush_feces=fflush,
                    frac_urinations=fraction_voids_home,
                    frac_defecations=fraction_defecation_home, ci=ci,
                    sex=sex, group=group, auto_correct=True, **kwargs)

                # for work, check if we use a urinal (male)
                if sex == "male":


@@ 124,7 147,7 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
                work = omi.water_consumption_flush(
                    day_pop, work_duration, avg_flush_urine=uflush,
                    avg_flush_feces=fflush, frac_urinations=work_urination,
                    frac_defecations=(1-fraction_defecation_home), ci=90,
                    frac_defecations=(1-fraction_defecation_home), ci=ci,
                    sex=sex, group=group, auto_correct=True, **kwargs)

                res["volume"].extend(work)


@@ 136,7 159,7 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
                # we consider that they spend the whole day at home
                home = omi.water_consumption_flush(
                    res_pop, duration, avg_flush_urine=uflush,
                    avg_flush_feces=fflush, ci=90, sex=sex, group=group,
                    avg_flush_feces=fflush, ci=ci, sex=sex, group=group,
                    auto_correct=True, **kwargs)

            if group != "baby":


@@ 153,7 176,9 @@ def water_consumption_flush(pop : dict, duration, **kwargs):
                res["sex"].extend((sex,)*num_entries)
                res["scenario"].extend((mode,)*num_entries)
                res["group"].extend((group,)*num_entries)
                res["percentile"].extend((5, 50, 95)*int(num_entries/3))

                if percentiles:
                    res["percentile"].extend(percentiles*int(num_entries/3))

    # set units
    unit = res["volume"][0].u

M plot/gisplots.py => plot/gisplots.py +4 -4
@@ 14,7 14,7 @@ from matplotlib.axes import Axes
from matplotlib_scalebar.scalebar import ScaleBar
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from pandas import DataFrame
from pyproj import CRS, Transformer
from pyproj import CRS
from shapely import Point

from gistools.osm.get_geometries import _types, get_all_local_features


@@ 104,7 104,7 @@ def plot_local_admin(
        p = boundary.representative_point().iloc[0]

        if spatial_bounds is None:
            spatial_bounds = boundary.bounds.iloc[0]
            spatial_bounds = boundary.bounds.iloc[0].to_numpy()

        radius = radius or 1



@@ 175,7 175,7 @@ def plot_local_features(
    # check the size of the boundary and simplify if necessary
    if len(request_bnd.exterior.xy[0]) >= 200:
        bounds = request_bnd.bounds
        tolerance = 0.005*max(bounds[2] - bounds[0], bounds[3]- bounds[1])
        tolerance = 0.005 * max(bounds[2] - bounds[0], bounds[3] - bounds[1])

        request_bnd = request_bnd.simplify(tolerance)



@@ 252,7 252,7 @@ def _pie_inset(data, names, ilon, ilat, ax, bounds, radius, colors, legend,
    ax_sub.set_xlim(-axrange, axrange)
    ax_sub.set_ylim(-axrange, axrange)

    colors = colors or mpl.colormaps["bone"]((0.2, 0.55, 0.8))
    colors = colors or mpl.colormaps["bone"]((0, 0.4, 0.65, 0.9))

    # note: "frame=True" seems necessary to get correct pie display
    patches, texts = ax_sub.pie(data, radius=0.5*radius*axrange, colors=colors,

M plot/impacts.py => plot/impacts.py +0 -1
@@ 77,4 77,3 @@ def water_consumption(data: pd.DataFrame, x: str = None, hue: str = None,

    if show:
        plt.show()


M plot/local_deposits.py => plot/local_deposits.py +21 -25
@@ 34,7 34,7 @@ __all__ = [

def plot_local_deposits(
    area_name: str,
    population: dict,
    population: DataFrame,
    boundary: GeoDataFrame,
    deposits: DataFrame,
    parcels: GeoDataFrame | None = None,


@@ 53,7 53,7 @@ def plot_local_deposits(
    ----------
    area_name : str
        Name of the area analyzed.
    population : dict
    population : DataFrame
        Size of the population.
    boundary : GeoDataFrame
        Administrative boundary for the area.


@@ 123,7 123,7 @@ def plot_map_deposits(
    deposits: DataFrame,
    parcels: GeoDataFrame | None = None,
    area_name: str | None = None,
    population: dict | None = None,
    population: DataFrame | None = None,
    compound_density: str = "N",
    figsize: tuple | None = None,
    ax: Axes | None = None,


@@ 143,7 143,7 @@ def plot_map_deposits(
        Agricultural parcels.
    area_name : str, optional
        Name of the area analyzed (used for the plot subtitle).
    population : dict, optional
    population : DataFrame, optional
        Size of the population (used for the plot subtitle).
    figsize : tuple, optional (default: (18, 5))
        Size of the figure.


@@ 253,7 253,7 @@ def plot_map_deposits(
                if (rsum / total_amount) >= limit_deposit or not hide_lowest:
                    rd = np.power(rsum / total_amount, exponent) * max_radius

                    plot_scale = (i==(blen-1))
                    plot_scale = (i == (blen-1))

                    plot_local_admin(r.name, r, data=rdata, pie=pie, radius=rd,
                                     spatial_bounds=bounds, ax=ax,


@@ 284,7 284,7 @@ def plot_map_deposits(

            if old:
                handles = old.legend_handles
                labels = [l.get_label() for l in handles]
                labels = [line.get_label() for line in handles]

                bbta = kwargs.get("loc_legend_pie", (1, 0))
                loc = "upper " if bbta[1] > 0.5 else "lower "


@@ 325,7 325,7 @@ def plot_map_deposits(
            old = ax.get_legend()
            handles = old.legend_handles

            labels = [l.get_label() for l in handles]
            labels = [line.get_label() for line in handles]
            first_legend = fig.legend(handles, labels, loc=loc,
                                      bbox_to_anchor=bbta,
                                      title=old.get_title().get_text())


@@ 347,9 347,11 @@ def plot_map_deposits(
            ax.add_artist(first_legend)
            old.remove()
        else:
            boundary.plot(color="grey", alpha=0.5, ax=ax, legend=False)

            plot_local_admin(area_name, boundary, data=data, pie=pie,
                             ax=ax, show=False, language=lang, loc=loc,
                             bbta=bbta, scalebar=False)
                             bbta=bbta, scalebar=True)

    # plot the parcels
    if parcels is not None:


@@ 361,19 363,8 @@ def plot_map_deposits(
                ax=ax, crs=boundary.crs)

    # add subtitle with population
    if population:
        res_pop, day_pop = 0, 0

        for v in population.values():
            if isinstance(v, dict):
                for k2, v2 in v.items():
                    r, d = v2
                    res_pop += r
                    day_pop += d
            else:
                r, d = v
                res_pop += r
                day_pop += d
    if population is not None:
        res_pop = population.residents.sum()

        pstr = translate("population", lang)



@@ 618,7 609,6 @@ def plot_NPK_pie(

        if missing_all:
            df_nutr = nutrients[nutrients.compound == nut]
            print(set(df_nutr.unit))
            skeep = r"urine|feces \(wet\)|biowaste"
            val_all = df_nutr[df_nutr.source.str.match(skeep)].amount.sum()



@@ 665,10 655,16 @@ def plot_NPK_pie(
    for n in nn:
        for source in sources:
            v_ufu.extend(
                nutrients[(nutrients.compound == n)
                          &(nutrients.source == source)].amount)
                nutrients[
                    (nutrients.compound == n) &
                    (nutrients.source == source)
                ].amount
            )

        data = nutrients[
            (nutrients.compound == n) & (nutrients.source == "all")
        ]

        data = nutrients[(nutrients.compound == n)&(nutrients.source == "all")]
        v_all.extend(data.amount)

        labels.append(FormattedQuantity(

M units.py => units.py +3 -1
@@ 3,6 3,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# units.py

from pint import Quantity

from orgmatt import units as _omu
from gistools import units as _gtu



@@ 12,4 14,4 @@ from gistools.units import *
_omudir = {elt for elt in dir(_omu) if not elt.startswith("_")}
_gtudir = {elt for elt in dir(_gtu) if not elt.startswith("_")}

__all__ = list(_omudir.union(_gtudir))
__all__ = ["Quantity"] + list(_omudir.union(_gtudir))

M utils/language.py => utils/language.py +2 -2
@@ 5,7 5,6 @@


translations = {
    "en": {",": ","},
    "fr": {
        "food": "résidus alimentaires",
        "feces": "fèces",


@@ 33,7 32,8 @@ translations = {
        "of food": "de nourriture",
        "of food residues": "de résidus alimentaires",
        "Barplot of the deposits of organic matter.": "Diagramme en barre des gisements de matières organiques.",
        "Detailed amount are:": "Les quantités détaillées sont :"
        "Detailed amount are:": "Les quantités détaillées sont :",
        "other": "autre"
    }
}