From df6dc3315a5e64472598f636690a6bff69a94e64 Mon Sep 17 00:00:00 2001 From: savoy Date: Thu, 8 Sep 2022 11:22:27 -0500 Subject: [PATCH] added: fill NaN values with 0.00 prior Signed-off-by: savoy --- ade/bin/concur.py | 473 +++++++++++++++++++++++----------------------- 1 file changed, 238 insertions(+), 235 deletions(-) diff --git a/ade/bin/concur.py b/ade/bin/concur.py index e25474f..7cf3ec0 100644 --- a/ade/bin/concur.py +++ b/ade/bin/concur.py @@ -1,235 +1,238 @@ -#! /usr/bin/env python - -# Preparation of the Concur expense report -# Copyright (C) 2018-2022 savoy - -# Concur is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Concur is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Concur. If not, see . - -import datetime as dt -import glob -import pandas as pd -import re -import sys -import win32com -import xlwings as xw - -from decimal import Decimal -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from lib import admin, dates, connections - - -def download(): - ol = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") - bx = ol.GetDefaultFolder(6) - folder = bx.Folders(CONFIG["email_folder"]) # type: ignore - cannot be None at this point - - for msg in folder.Items: - if msg.Unread == True: - try: - att = msg.Attachments[0] - except IndexError: - att = msg.Attachments[1] - - name = att.Filename - try: - date = re.findall(r"\d{6}", name)[0] - except IndexError: - msg.Unread = False - return msg.Body - - week = re.findall(r"Week (\d)", name, re.IGNORECASE)[0] - save = f"{date[4:]}{date[0:2]}{date[2:4]}_w{week}_concur_data.xlsx" - filepath = Path("~/").expanduser() / save - att.SaveAsFile(filepath) - msg.Unread = False - print( - f"Expense reporting data for 20{date[4:]}-{date[0:2]}-{date[2:4]} " - f"week {week} has been saved to " - f"{filepath}" - ) - return filepath - - -@admin.initializor -def get(body): - directory = admin.instance.server / CONFIG["expense_repo_path"] # type: ignore - cannot be None at this point - date = re.findall(r"\d{1,2}/\d{1,2}/\d{1,4}", body)[0] - year = re.findall(r"\d+/\d+/(\d+)", date)[0] - if len(year) == 2: - year = "20" + year - month = re.findall(r"(\d+)/\d+/\d+", date)[0] - if len(month) == 1: - month = "0" + month - day = re.findall(r"\d+/(\d+)/\d+", date)[0] - if len(day) == 1: - day = "0" + day - - close = dates.calendar( - int(year), int(month), ref=dt.date(int(year), int(month), int(day)) - )[2] - if len(str(close.month)) == 1: - fp = ( - directory - / str(close.year) - / f'{str(close.year)[2:]}{str(close.month).rjust(1+len(str(close.month)), "0")}' - ) - else: - fp = directory / str(close.year) / f"{str(close.year)[2:]}{str(close.month)}" - print(fp) - fp = glob.glob(f"{fp.as_posix()}/*{month}{day}{year[2:]}*Reporting*")[0] - print(fp) - fp = Path(fp) - - return fp - - -@admin.initializor -def update(path): - data = admin.instance.server / CONFIG["csv_output_path"] # type: ignore - cannot be None at this point - - df = pd.read_excel( - path, - sheet_name="Data", - header=1, - usecols=[ - "Employee Name", - "Employee ID", - "Home BC", - "Home Dept", - "Report Entry Expense Type Name", - "Report Entry Transaction Date", - "Report Entry Description", - "Report Entry Vendor Name", - "Report Entry Vendor Description", - "Recharge BC", - "Recharge Dept", - "Journal Account Code", - "Journal Amount", - ], - dtype={"Home Dept": "Int64", "Journal Account Code": "Int64"}, - ) - # converters={ - # 'Journal Amount': lambda x: Decimal(x).quantize( - # Decimal('1.11')) - # }) - - df.drop(df.loc[df["Employee Name"] == ", "].index, inplace=True) - # People are stupid and leave tons of blank lines filled with formula, - # so this has to be done after cleaning the blank lines out. - df["Journal Amount"] = df["Journal Amount"].map( - lambda x: Decimal(x).quantize(Decimal("1.11")) - ) - - df.rename( - columns={ - "Employee Name": "employeeName", - "Employee ID": "employeeId", - "Home BC": "businessCenter", - "Home Dept": "department", - "Report Entry Expense Type Name": "expenseType", - "Report Entry Transaction Date": "expenseDate", - "Report Entry Description": "expenseDescription", - "Report Entry Vendor Name": "expenseVendor", - "Report Entry Vendor Description": "vendorDescription", - "Recharge BC": "expenseBusinessCenter", - "Recharge Dept": "agreement", - "Journal Account Code": "glCode", - "Journal Amount": "amount", - }, - inplace=True, - ) - - for key, value in df.employeeId.iteritems(): - try: - df.loc[key, "employeeId"] = int(value) - except ValueError: - df.loc[key, "employeeId"] = 0 - df.employeeId = df.employeeId.astype("Int64") - - with connections.Sqlite("concur") as conn: - region = pd.read_sql("SELECT * FROM Region", conn) - - df = df.merge( - region.loc[:, ["businessCenter", "region"]], - how="left", - on="businessCenter", - validate="m:1", - ) - region.rename( - columns={"region": "regionCharged", "businessCenter": "expenseBusinessCenter"}, - inplace=True, - ) - df = df.merge( - region.loc[:, ["expenseBusinessCenter", "regionCharged"]], - how="left", - on="expenseBusinessCenter", - validate="m:1", - ) - - df["odod"] = df["expenseDescription"].str.extract("(ODOD)") - - df["financialDate"] = dates.vector_cal(df.expenseDate) - df["financialYear"] = pd.DatetimeIndex(df.financialDate).year - df["financialMonth"] = pd.DatetimeIndex(df.financialDate).month - df.drop(columns=["financialDate"], inplace=True) - - with connections.Sqlite("concur", upload=True) as conn: - df.to_sql("Concur", conn, if_exists="append", index=False) - - df = df[ - [ - "employeeId", - "employeeName", - "businessCenter", - "region", - "department", - "expenseType", - "expenseDate", - "expenseDescription", - "expenseVendor", - "vendorDescription", - "expenseBusinessCenter", - "glCode", - "amount", - "agreement", - "regionCharged", - "odod", - "financialYear", - "financialMonth", - ] - ] - - df.to_csv(data / "concur.csv", index=False, header=False, mode="a") - - -if __name__ == "__main__": - admin.instance = admin.Admin() - CONFIG = admin.instance.get_bin_config(Path(__file__).stem) - if not CONFIG: - sys.exit() - - x = download() - - if Path(x).is_file(): - update(x) - elif x: - x = get(x) - update(x) - else: - print("No new expense data available") - - x.unlink() +#! /usr/bin/env python + +# Preparation of the Concur expense report +# Copyright (C) 2018-2022 savoy + +# Concur is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Concur is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Concur. If not, see . + +import datetime as dt +import glob +import pandas as pd +import re +import sys +import win32com +import xlwings as xw + +from decimal import Decimal +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from lib import admin, dates, connections + + +def download(): + ol = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") + bx = ol.GetDefaultFolder(6) + folder = bx.Folders(CONFIG["email_folder"]) # type: ignore - cannot be None at this point + + for msg in folder.Items: + if msg.Unread == True: + try: + att = msg.Attachments[0] + except IndexError: + att = msg.Attachments[1] + + name = att.Filename + try: + date = re.findall(r"\d{6}", name)[0] + except IndexError: + msg.Unread = False + return msg.Body + + week = re.findall(r"Week (\d)", name, re.IGNORECASE)[0] + save = f"{date[4:]}{date[0:2]}{date[2:4]}_w{week}_concur_data.xlsx" + filepath = Path("~/").expanduser() / save + att.SaveAsFile(filepath) + msg.Unread = False + print( + f"Expense reporting data for 20{date[4:]}-{date[0:2]}-{date[2:4]} " + f"week {week} has been saved to " + f"{filepath}" + ) + return filepath + + +@admin.initializor +def get(body): + directory = admin.instance.server / CONFIG["expense_repo_path"] # type: ignore - cannot be None at this point + date = re.findall(r"\d{1,2}/\d{1,2}/\d{1,4}", body)[0] + year = re.findall(r"\d+/\d+/(\d+)", date)[0] + if len(year) == 2: + year = "20" + year + month = re.findall(r"(\d+)/\d+/\d+", date)[0] + if len(month) == 1: + month = "0" + month + day = re.findall(r"\d+/(\d+)/\d+", date)[0] + if len(day) == 1: + day = "0" + day + + close = dates.calendar( + int(year), int(month), ref=dt.date(int(year), int(month), int(day)) + )[2] + if len(str(close.month)) == 1: + fp = ( + directory + / str(close.year) + / f'{str(close.year)[2:]}{str(close.month).rjust(1+len(str(close.month)), "0")}' + ) + else: + fp = directory / str(close.year) / f"{str(close.year)[2:]}{str(close.month)}" + print(fp) + fp = glob.glob(f"{fp.as_posix()}/*{month}{day}{year[2:]}*Reporting*")[0] + print(fp) + fp = Path(fp) + + return fp + + +@admin.initializor +def update(path): + data = admin.instance.server / CONFIG["csv_output_path"] # type: ignore - cannot be None at this point + + df = pd.read_excel( + path, + sheet_name="Data", + header=1, + usecols=[ + "Employee Name", + "Employee ID", + "Home BC", + "Home Dept", + "Report Entry Expense Type Name", + "Report Entry Transaction Date", + "Report Entry Description", + "Report Entry Vendor Name", + "Report Entry Vendor Description", + "Recharge BC", + "Recharge Dept", + "Journal Account Code", + "Journal Amount", + ], + dtype={"Home Dept": "Int64", "Journal Account Code": "Int64"}, + ) + # converters={ + # 'Journal Amount': lambda x: Decimal(x).quantize( + # Decimal('1.11')) + # }) + + df.drop(df.loc[df["Employee Name"] == ", "].index, inplace=True) + # People are stupid and leave tons of blank lines filled with formula, + # so this has to be done after cleaning the blank lines out. + df["Journal Amount"] = df["Journal Amount"].map( + lambda x: Decimal(x).quantize(Decimal("1.11")) + ) + + df.rename( + columns={ + "Employee Name": "employeeName", + "Employee ID": "employeeId", + "Home BC": "businessCenter", + "Home Dept": "department", + "Report Entry Expense Type Name": "expenseType", + "Report Entry Transaction Date": "expenseDate", + "Report Entry Description": "expenseDescription", + "Report Entry Vendor Name": "expenseVendor", + "Report Entry Vendor Description": "vendorDescription", + "Recharge BC": "expenseBusinessCenter", + "Recharge Dept": "agreement", + "Journal Account Code": "glCode", + "Journal Amount": "amount", + }, + inplace=True, + ) + + for key, value in df.employeeId.iteritems(): + try: + df.loc[key, "employeeId"] = int(value) + except ValueError: + df.loc[key, "employeeId"] = 0 + df.employeeId = df.employeeId.astype("Int64") + + with connections.Sqlite("concur") as conn: + region = pd.read_sql("SELECT * FROM Region", conn) + + df = df.merge( + region.loc[:, ["businessCenter", "region"]], + how="left", + on="businessCenter", + validate="m:1", + ) + region.rename( + columns={"region": "regionCharged", "businessCenter": "expenseBusinessCenter"}, + inplace=True, + ) + df = df.merge( + region.loc[:, ["expenseBusinessCenter", "regionCharged"]], + how="left", + on="expenseBusinessCenter", + validate="m:1", + ) + + df["odod"] = df["expenseDescription"].str.extract("(ODOD)") + + df["financialDate"] = dates.vector_cal(df.expenseDate) + df["financialYear"] = pd.DatetimeIndex(df.financialDate).year + df["financialMonth"] = pd.DatetimeIndex(df.financialDate).month + df.drop(columns=["financialDate"], inplace=True) + + # making sure `amount` is not NaN + df.amount.fillna(Decimal('0.00'), inplace=True) + + with connections.Sqlite("concur", upload=True) as conn: + df.to_sql("Concur", conn, if_exists="append", index=False) + + df = df[ + [ + "employeeId", + "employeeName", + "businessCenter", + "region", + "department", + "expenseType", + "expenseDate", + "expenseDescription", + "expenseVendor", + "vendorDescription", + "expenseBusinessCenter", + "glCode", + "amount", + "agreement", + "regionCharged", + "odod", + "financialYear", + "financialMonth", + ] + ] + + df.to_csv(data / "concur.csv", index=False, header=False, mode="a") + + +if __name__ == "__main__": + admin.instance = admin.Admin() + CONFIG = admin.instance.get_bin_config(Path(__file__).stem) + if not CONFIG: + sys.exit() + + x = download() + + if Path(x).is_file(): + update(x) + elif x: + x = get(x) + update(x) + else: + print("No new expense data available") + + x.unlink() -- 2.45.2