@@ 0,0 1,53 @@
+#
+# What are the most common ticket violation?
+#
+import matplotlib.pyplot as plt
+
+import pandas as pd
+import numpy as np
+import utils
+
+data = utils.data
+#data.info()
+
+# Drop 2020
+covid = data[data["Issued"].dt.year == 2020]
+bvio = data.drop(covid.index)
+
+# group tickets by violation & year
+bvio = bvio.groupby([
+ pd.Grouper(key="Violation Desc Long"),
+ bvio["Issued"].dt.year
+])["Ticket Number"].count().reset_index()
+
+# calculate average number of tickets each violation
+# averages per year and add the values to the dataframe
+bvio = bvio.merge(
+ bvio.groupby(pd.Grouper("Violation Desc Long"))["Ticket Number"].mean(),
+ on="Violation Desc Long")
+bvio.rename(columns={
+ "Ticket Number_x": "Tickets",
+ "Ticket Number_y": "Average",
+}, inplace=True)
+
+bvio.sort_values("Average", ascending=True, inplace=True)
+fig, ax = plt.subplots(figsize=(4, 10))
+
+ax.scatter(y=bvio["Violation Desc Long"],
+ x=bvio["Tickets"], color="black", alpha=0.2)
+
+# flip chart on vertical axis
+ax.invert_xaxis()
+ax.yaxis.set_label_position("right")
+ax.yaxis.tick_right()
+ax.xaxis.tick_top()
+
+ax.set(
+ title="Tickets by Violation")
+
+plt.box(False)
+plt.grid(True, which='major', axis='x', color="black")
+plt.tight_layout()
+plt.savefig(
+ utils.FIG_DIR / "tickets-by-violation.svg",
+ transparent=True)