A => .gitignore +129 -0
@@ 1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
A => AbstractDatabaseConnector.py +106 -0
@@ 1,106 @@
+"""
+ An Abstract Base Class OOP example building on prior example for Code Louisville Fall 2017
+"""
+
+import sqlite3
+import pdb
+from abc import ABCMeta, abstractmethod
+
+class DatabaseConnector(metaclass=ABCMeta):
+ """A class to help you connect to various databases"""
+
+ __metaclass__ = ABCMeta
+ _max_connections = 3
+ _current_connections = 0
+
+ def __init__(self, server, database, schema):
+ self.server = server,
+ self.database = database,
+ self.schema = schema,
+ self._timeout = None
+
+ self._increment_current_connections()
+
+ if self._current_connections > self._max_connections:
+ print("""MAX CONNECTIONS: {0} CURRENT CONNECTIONS: {1}\n
+ Please close a connection to create a new one.""")
+ raise Exception
+
+ @property
+ def timeout(self):
+ return self.timeout
+
+ @timeout.setter
+ def timeout(self, value):
+ self._timeout = value
+
+ @abstractmethod
+ def execute_query(self):
+ raise NotImplementedError()
+
+ #@abstractmethod
+ def bulk_export(self):
+ raise NotImplementedError()
+
+ @staticmethod
+ def sql_syntax_help():
+ print("SELECT {COLUMNS} FROM {TABLE_NAME}\
+ \nSELECT {COLUMNS} FROM {SCHEMA}.{TABLE} WHERE {CONDITIONAL}\
+ \nSELECT {COLUMNS} FROM {SCHEMA}.{TABLE} WHERE {CONDITIONAL} ORDER BY {COLUMN} {ASC\DESC}\
+ \nSELECT {AGGREGATE FUNCTION} {COLUMN} FROM {SCHEMA}.{TABLE}")
+
+
+ @classmethod
+ def _increment_current_connections(cls):
+ cls._current_connections += 1
+
+ @classmethod
+ def _decrement_current_connections(cls):
+ cls._current_connections -= 1
+
+ @classmethod
+ def update_max_connections(cls, count):
+ cls._max_connections = count
+
+
+class SQLLiteDatabaseConnector(DatabaseConnector):
+ def __init__(self, server, database, schema):
+ super(self.__class__, self).__init__(server, database, schema)
+ self._connection = None
+ self.type = 'sqlite3'
+
+ @property
+ def connection(self):
+ return self._connection
+
+ @connection.setter
+ def connection(self, connection_string):
+ self._connection = sqlite3.connect(connection_string).cursor()
+
+ @connection.deleter
+ def connection(self):
+ pdb.set_trace()
+ self._connection.close()
+ del self._connection
+ self._decrement_current_connections()
+ pdb.set_trace()
+
+ def execute_query(self, query):
+ query_result = []
+ result = self._connection.execute(query)
+ [query_result.append(r) for r in result]
+ return query_result
+
+class MSSqlDatabaseConnection(DatabaseConnector):
+ def init(self, server, database, schema):
+ super(MSSqlDatabaseConnection, self).__init__(server, database, schema)
+ self.type = 'mssql'
+
+
+if __name__ == "__main__":
+ chinook = SQLLiteDatabaseConnector("localhost", "chinook.db", None)
+ chinook.connection = 'chinook.db'
+ data = chinook.execute_query("SELECT * FROM artists")
+ [print(row) for row in data]
+ del chinook.connection
+ chinook.sql_syntax_help()
A => RecurseRemove.sql +38 -0
@@ 1,38 @@
+/*
+Setup a recursive CTE to iterate over ascii ranges and remove
+any characters within given ranges. Remember max recursion depth
+is sql server is 100 by default
+*/
+
+--Initial increment handler
+DECLARE @controlcharacter int = 32,
+--Second increment handler
+@extendedcharacter int = 256;
+
+;with controlcharacters as (
+ --Initialize counter for control characters(00),
+ --Place constraint at proper recursion depth (32 for asii purpose).
+ SELECT 0 AS cnt, REPLACE(Col,char(00), '') as col
+ FROM source
+ WHERE condition
+ UNION ALL
+ --Increment the counter and use replace to remove unwanted ascii
+ --characters. Check cntr against declared variable.
+ SELECT cntr + 1 as cntr, REPLACE (col,char(cntr), '') as col
+ FROM controlcharacters c
+ where cntr < @controlcharacter),
+
+ extendedcharacters as (
+ --Same initialization as above, but using the last row from the
+ --first recursive set (controlcharacters) to start.
+ SELECT 127 as cntr, REPLACE(col,char(127), '') as col
+ FROM controlcharacters
+ WHERE cntr = (SELECT MAX(cntr) from controlcharacters)
+ UNION ALL
+ SELECT cntr + 1, REPLACE(col,char(cntr), '') as col
+ FROM extendedcharacters c
+ WHERE cntr< @extendedcharacter)
+
+SELECT * FROM extendedcharacters where cntr = (SELECT MAX(cntr) from extendedcharacters)
+--Override MAXRECURSION 100 so that the second pass can go from 127 to 255
+OPTION (MAXRECURSION 128);
A => airflow-log-cleanup.py +59 -0
@@ 1,59 @@
+"""
+Airflow creates ALOT of logs. The first thing you should check is changing your
+log level and dag bag refresh rate. If that solves your space issue great! If
+not the below should be able to help by scheduling the script to run via cron
+at a given interval cleaning up the local airflow logs.
+"""
+
+import os
+from datetime import datetime
+
+# subtracting timestamps returns milliseconds
+HOUR_IN_MILLISECONDS = 3600000
+
+def truncate_process_manager_log(log_base_path):
+ """
+ The scheduler records all acitivty related to dag processing in the same file.
+ This file can grow large fast, and is actively in use. Intead of unlinking the
+ file and pulling it out from under the scheduler truncate.
+ """
+ dag_process_manager_log = os.path.join(
+ log_base_path, "dag_processor_manager", "dag_processor_manager.log"
+ )
+ open(dag_process_manager_log, "w").close()
+
+
+def traverse_and_unlink(fobject):
+ """
+ Traverse the log directory on the given airflow instance (webserver, scheduler,
+ worker, etc) and remove any logs not modified in the last hour.
+ """
+ for entry in os.scandir(fobject):
+ new_fobject = os.path.join(fobject, entry)
+ if os.path.isfile(new_fobject):
+ last_modified = os.stat(new_fobject).st_mtime
+ delta = datetime.utcnow().timestamp() - last_modified
+ if delta > HOURS_IN_MILLISECONDS:
+ print(
+ f"{new_fobject} has not been used in the last hour.\nCleaning up."
+ )
+ os.unlink(new_fobject)
+ elif os.path.isdir(new_fobject):
+ traverse_and_unlink(new_fobject)
+
+
+def cleanup_logs():
+ """
+ Remove all logs not used within the last hour.
+
+ Truncate the dag processor log.
+ """
+ base_dir = os.environ["AIRFLOW_HOME"]
+ log_dir = os.path.join(base_dir, "logs")
+
+ truncate_process_manager_log(log_dir)
+
+
+if __name__ == "__main__":
+ cleanup_logs()
+
A => async-after-creation.py +37 -0
@@ 1,37 @@
+"""
+Example extended aioodbc configuration.
+"""
+import asyncio
+import aioodbc
+import pyodbc
+from concurrent.futures import ThreadPoolExecutor
+
+loop = asyncio.get_event_loop()
+
+async def conn_attributes(conn):
+ conn.setdecoding(pyodbc.SQL_CHAR, encoding='utf-8')
+ conn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
+ conn.setdecoding(pyodbc.SQL_WMETADATA, encoding='utf-16le')
+ conn.setencoding(encoding='utf-8')
+
+async def odbc_insert_worker(conn, val):
+ async with conn.cursor() as cur:
+ await cur.execute('insert into async_testing values (?)', val)
+ await cur.commit()
+
+async def db_main(loop, vals):
+ dsn="foo"
+
+ vals = list(vals)
+
+ async with aioodbc.create_pool(dsn=dsn, loop=loop, executor=ThreadPoolExecutor(max_workers=3), after_created=conn_attributes) as pool:
+ tasks = [do_insert(pool, val) for val in vals]
+ await asyncio.gather(*tasks)
+
+async def do_insert(pool, val):
+ async with pool.acquire() as conn:
+ await odbc_insert_worker(conn, val)
+
+vals = range(0,1000)
+
+loop.run_until_complete(db_main(loop, vals))
A => readme.md +1 -0
@@ 1,1 @@
+Collection of one off scripts that don't belong in a package, repo or elsewhere.
A => scrape-zip-files.py +29 -0
@@ 1,29 @@
+"""
+Quick script to parse an HTML page and extract zip files.
+"""
+
+from bs4 import BeautifulSoup
+from urllib.request import urlopen
+import requests
+import zipfile
+import io
+
+"""
+Fails if file extraction requires a password
+"""
+
+home = 'http://www.dndjunkie.com'
+url = 'http://www.dndjunkie.com/rpgx/datasets/'
+data = urlopen(url).read()
+page = BeautifulSoup(data,'html.parser')
+files = []
+
+for link in page.findAll('a'):
+ l = link.get('href')
+ files.append(l)
+
+for l in files[2:]:
+ full_path = home + l
+ r = requests.get(full_path)
+ z = zipfile.ZipFile(io.BytesIO(r.content))
+ z.extractall()