A => .gitignore +2 -0
@@ 1,2 @@
+.ipynb_checkpoints/
+venv/
A => clean.py +25 -0
@@ 1,25 @@
+import sys
+import json
+
+def convert_date_to_europe(date):
+ if not date:
+ return date
+ m, d, y = date.split('/')
+ y = int(y)
+ m = int(m)
+ d = int(d)
+ return f'{y:04d}-{m:02d}-{d:02d}'
+
+for line in sys.stdin.readlines():
+ rows = json.loads(line)
+ for row in rows:
+ new_row = {
+ 'FileNumber': row['File\u00a0#'],
+ 'Type': row['Type'],
+ 'Status': row['Status'],
+ 'Introduced': convert_date_to_europe(row['Introduced']),
+ 'FinalAction': convert_date_to_europe(row['Final\u00a0Action']),
+ 'Title': row['Title'],
+ 'url': row['url'],
+ }
+ print(json.dumps(new_row))
A => get_entities.py +10 -0
@@ 1,10 @@
+import json
+import sys
+import spacy
+nlp = spacy.load('en_core_web_trf')
+
+for line in sys.stdin:
+ row = json.loads(line)
+ file_number = row['FileNumber']
+ for entity in nlp(row['Title']).ents:
+ print(json.dumps({"FileNumber": file_number, "Label": entity.label_, "Text": entity.text}))
A => legistar.db +0 -0
A => make_sqlite.sh +11 -0
@@ 1,11 @@
+#!/bin/bash
+set -Eeuox pipefail
+python3 scrape.py > results.json
+python3 clean.py < results.json > cleaned.json
+rm -f legistar.db
+sqlite-utils insert --pk FileNumber --nl --alter --analyze legistar.db legistar cleaned.json
+sqlite-utils create-index legistar.db legistar Type
+sqlite-utils create-index legistar.db legistar Status
+sqlite-utils create-index legistar.db legistar Introduced
+sqlite-utils create-index legistar.db legistar FinalAction
+sqlite-utils enable-fts legistar.db legistar Title
A => scrape.py +15 -0
@@ 1,15 @@
+import urllib3
+urllib3.disable_warnings()
+
+from legistar.bills import LegistarBillScraper
+from json import dumps
+
+def scrape_bills():
+ s = LegistarBillScraper()
+ s.BASE_URL = 'https://sfgov.legistar.com/'
+ s.LEGISLATION_URL = 'https://sfgov.legistar.com/Legislation.aspx'
+ for page in s.searchLegislation():
+ print(dumps(list(s.parseSearchResults(page))))
+
+
+scrape_bills()
A => templates/table-legistar-legistar.html +19 -0
@@ 1,19 @@
+{% extends "default:table.html" %}
+
+{% block content %}
+<div style="width: 100vw">
+<script type="application/vnd.vegalite+json">
+{
+ "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
+ "width": "container",
+ "description": "introduced by date",
+ "mark": "bar",
+ "encoding": {
+ "x": {"timeUnit": "year", "field": "Introduced", "type": "temporal"},
+ "y": {"aggregate": "count"}
+ }
+}
+</script>
+</div>
+{{ super() }}
+{% endblock %}