From 1f08e57c85508e28515c8a48c43b57c460116b96 Mon Sep 17 00:00:00 2001 From: Bryan Brattlof Date: Tue, 9 Feb 2021 18:54:21 -0500 Subject: [PATCH] add code to generate tickets issued by day of month --- tickets-per-day-of-month.py | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tickets-per-day-of-month.py diff --git a/tickets-per-day-of-month.py b/tickets-per-day-of-month.py new file mode 100644 index 0000000..4dbb9b2 --- /dev/null +++ b/tickets-per-day-of-month.py @@ -0,0 +1,64 @@ +# +# How many tickets are issued in Boston per day of month? +# +import matplotlib.pyplot as plt + +import pandas as pd +import numpy as np +import utils + +data = utils.data +#data.info() + +bdom = data.groupby(pd.Grouper(key="Issued", freq="D"))["Issued"].count() +fig, ax = plt.subplots(figsize=(10, 4)) + +# drop day 31 (there are only 7 months) +b31 = bdom[bdom.index.day == 31] +bdom.drop(b31.index, inplace=True) + +# drop 2020 data +covid = bdom[bdom.index.year == 2020] +bdom.drop(covid.index, inplace=True) + +# avg, +- std +med = bdom.groupby(bdom.index.day).median() +std = bdom.groupby(bdom.index.day).std() + +plt.fill_between(med.index, med-std, med+std, facecolor="white") +# plt.plot(med.index, med, color="white") + +# fit trend line +# fit = np.poly1d(np.polyfit(bdom.index.day, bdom.values, 1)) +# plt.plot(bdom.index.day, fit(bdom.index.day), color='black') + +# plot each data point (add in covid) +plt.scatter(covid.index.day, covid.values, color="tab:red", alpha=0.2) +plt.scatter(bdom.index.day, bdom.values, color="black", alpha=0.2) + + +plt.xticks(range(1, 31)) + +ax.set( + title="Tickets Issued on Day of Month", + ylabel="Tickets Issued" +) + +plt.tight_layout() +plt.savefig( + utils.FIG_DIR / "tickets-by-day-of-month.svg", + transparent=True) + + +# Why so many low values? +low = bdom[bdom.values < 1000] + +# faily evenly spaced out between months +#low.groupby(low.index.month).count() + +# and by year +#low.groupby(low.index.year).count() + +# However, day of week shows that 80% 459 of the days +# fall on a Sunday, when a majority of meters are inactive +#low.groupby(low.index.dayofweek).count() -- 2.45.2