~n0mn0m/snippets

675709a8119c87e53215c60c1d9f632657527cc2 — n0mn0m 5 months ago
Initialize

Track one off scripts I use, but don't live in a project.
A  => .gitignore +129 -0
@@ 1,129 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

A  => AbstractDatabaseConnector.py +106 -0
@@ 1,106 @@
"""
 An Abstract Base Class OOP example building on prior example for Code Louisville Fall 2017 
"""

import sqlite3
import pdb
from abc import ABCMeta, abstractmethod

class DatabaseConnector(metaclass=ABCMeta):
    """A class to help you connect to various databases"""

    __metaclass__ = ABCMeta
    _max_connections = 3
    _current_connections = 0

    def __init__(self, server, database, schema):
        self.server = server,
        self.database = database,
        self.schema = schema,
        self._timeout = None

        self._increment_current_connections()

        if self._current_connections > self._max_connections:
            print("""MAX CONNECTIONS: {0} CURRENT CONNECTIONS: {1}\n
                   Please close a connection to create a new one.""")
            raise Exception

    @property
    def timeout(self):
        return self.timeout

    @timeout.setter
    def timeout(self, value):
        self._timeout = value

    @abstractmethod
    def execute_query(self):
        raise NotImplementedError()

    #@abstractmethod
    def bulk_export(self):
        raise NotImplementedError()

    @staticmethod
    def sql_syntax_help():
        print("SELECT {COLUMNS} FROM {TABLE_NAME}\
               \nSELECT {COLUMNS} FROM {SCHEMA}.{TABLE} WHERE {CONDITIONAL}\
               \nSELECT {COLUMNS} FROM {SCHEMA}.{TABLE} WHERE {CONDITIONAL} ORDER BY {COLUMN} {ASC\DESC}\
               \nSELECT {AGGREGATE FUNCTION} {COLUMN} FROM {SCHEMA}.{TABLE}")


    @classmethod
    def _increment_current_connections(cls):
        cls._current_connections += 1

    @classmethod
    def _decrement_current_connections(cls):
        cls._current_connections -= 1

    @classmethod
    def update_max_connections(cls, count):
        cls._max_connections = count


class SQLLiteDatabaseConnector(DatabaseConnector):
    def __init__(self, server, database, schema):
        super(self.__class__, self).__init__(server, database, schema)
        self._connection = None
        self.type = 'sqlite3'

    @property
    def connection(self):
        return self._connection

    @connection.setter
    def connection(self, connection_string):
        self._connection = sqlite3.connect(connection_string).cursor()

    @connection.deleter
    def connection(self):
        pdb.set_trace()
        self._connection.close()
        del self._connection
        self._decrement_current_connections()
        pdb.set_trace()

    def execute_query(self, query):
        query_result = []
        result = self._connection.execute(query)
        [query_result.append(r) for r in result]
        return query_result

class MSSqlDatabaseConnection(DatabaseConnector):
    def init(self, server, database, schema):
        super(MSSqlDatabaseConnection, self).__init__(server, database, schema)
        self.type = 'mssql'


if __name__ == "__main__":
    chinook = SQLLiteDatabaseConnector("localhost", "chinook.db", None)
    chinook.connection = 'chinook.db'
    data = chinook.execute_query("SELECT * FROM artists")
    [print(row) for row in data]
    del chinook.connection
    chinook.sql_syntax_help()

A  => RecurseRemove.sql +38 -0
@@ 1,38 @@
/*
Setup a recursive CTE to iterate over ascii ranges and remove
any characters within given ranges. Remember max recursion depth
is sql server is 100 by default
*/

--Initial increment handler
DECLARE @controlcharacter int = 32,
--Second increment handler
@extendedcharacter int = 256;

;with controlcharacters as (
  --Initialize counter for control characters(00),
  --Place constraint at proper recursion depth (32 for asii purpose).
  SELECT 0 AS cnt, REPLACE(Col,char(00), '') as col
  FROM source
  WHERE condition
  UNION ALL
  --Increment the counter and use replace to remove unwanted ascii
  --characters. Check cntr against declared variable.
  SELECT cntr + 1 as cntr, REPLACE (col,char(cntr), '') as col
  FROM controlcharacters c
  where cntr < @controlcharacter),
  
  extendedcharacters as (
    --Same initialization as above, but using the last row from the
    --first recursive set (controlcharacters) to start.
    SELECT 127 as cntr, REPLACE(col,char(127), '') as col
    FROM controlcharacters
    WHERE cntr = (SELECT MAX(cntr) from controlcharacters)
    UNION ALL
    SELECT cntr + 1, REPLACE(col,char(cntr), '') as col
    FROM extendedcharacters c
    WHERE cntr< @extendedcharacter)
    
SELECT * FROM extendedcharacters where cntr = (SELECT MAX(cntr) from extendedcharacters)
--Override MAXRECURSION 100 so that the second pass can go from 127 to 255
OPTION (MAXRECURSION 128);

A  => airflow-log-cleanup.py +59 -0
@@ 1,59 @@
"""
Airflow creates ALOT of logs. The first thing you should check is changing your
log level and dag bag refresh rate. If that solves your space issue great! If
not the below should be able to help by scheduling the script to run via cron
at a given interval cleaning up the local airflow logs.
"""

import os
from datetime import datetime

# subtracting timestamps returns milliseconds
HOUR_IN_MILLISECONDS = 3600000

def truncate_process_manager_log(log_base_path):
    """
    The scheduler records all acitivty related to dag processing in the same file.
    This file can grow large fast, and is actively in use. Intead of unlinking the
    file and pulling it out from under the scheduler truncate.
    """
    dag_process_manager_log = os.path.join(
        log_base_path, "dag_processor_manager", "dag_processor_manager.log"
    )
    open(dag_process_manager_log, "w").close()


def traverse_and_unlink(fobject):
    """
    Traverse the log directory on the given airflow instance (webserver, scheduler,
    worker, etc) and remove any logs not modified in the last hour.
    """
    for entry in os.scandir(fobject):
        new_fobject = os.path.join(fobject, entry)
        if os.path.isfile(new_fobject):
            last_modified = os.stat(new_fobject).st_mtime
            delta = datetime.utcnow().timestamp() - last_modified
            if delta > HOURS_IN_MILLISECONDS:
                print(
                    f"{new_fobject} has not been used in the last hour.\nCleaning up."
                )
                os.unlink(new_fobject)
            elif os.path.isdir(new_fobject):
                traverse_and_unlink(new_fobject)


def cleanup_logs():
    """
    Remove all logs not used within the last hour.

    Truncate the dag processor log.
    """
    base_dir = os.environ["AIRFLOW_HOME"]
    log_dir = os.path.join(base_dir, "logs")

    truncate_process_manager_log(log_dir)


if __name__ == "__main__":
    cleanup_logs()


A  => async-after-creation.py +37 -0
@@ 1,37 @@
"""
Example extended aioodbc configuration.
"""
import asyncio
import aioodbc
import pyodbc
from concurrent.futures import ThreadPoolExecutor

loop = asyncio.get_event_loop()

async def conn_attributes(conn):
    conn.setdecoding(pyodbc.SQL_CHAR, encoding='utf-8')
    conn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
    conn.setdecoding(pyodbc.SQL_WMETADATA, encoding='utf-16le')
    conn.setencoding(encoding='utf-8')

async def odbc_insert_worker(conn, val):
    async with conn.cursor() as cur:
        await cur.execute('insert into async_testing values (?)', val)
        await cur.commit()

async def db_main(loop, vals):
    dsn="foo"

    vals = list(vals)

    async with aioodbc.create_pool(dsn=dsn, loop=loop, executor=ThreadPoolExecutor(max_workers=3), after_created=conn_attributes) as pool:
        tasks = [do_insert(pool, val) for val in vals]
        await asyncio.gather(*tasks)

async def do_insert(pool, val):
    async with pool.acquire() as conn:
        await odbc_insert_worker(conn, val)

vals = range(0,1000)

loop.run_until_complete(db_main(loop, vals))

A  => readme.md +1 -0
@@ 1,1 @@
Collection of one off scripts that don't belong in a package, repo or elsewhere.

A  => scrape-zip-files.py +29 -0
@@ 1,29 @@
"""
Quick script to parse an HTML page and extract zip files.
"""

from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
import zipfile
import io

"""
Fails if file extraction requires a password
"""

home = 'http://www.dndjunkie.com'
url = 'http://www.dndjunkie.com/rpgx/datasets/'
data = urlopen(url).read()
page = BeautifulSoup(data,'html.parser')
files = []

for link in page.findAll('a'):
    l = link.get('href')
    files.append(l)

for l in files[2:]:
    full_path = home + l
    r = requests.get(full_path)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall()