From 7683ff5760bc7ea5b2f285eff7b71b8f14c69428 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Mon, 1 May 2023 10:07:50 -0500 Subject: [PATCH] Initial commit --- .gitignore | 6 + pyproject.toml | 28 +++ xactfetch.py | 603 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 637 insertions(+) create mode 100644 .gitignore create mode 100644 pyproject.toml create mode 100644 xactfetch.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8125531 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/.venv +/cookies.json +/screenshot_*.png +*.egg-info/ +__pycache__/ +*.py[co] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2aad61c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[project] +name = "xactfetch" +authors = [ + {name = "Dustin C. Hatch", email = "dustin@hatch.name"}, +] +description = "Fetch transaction exports from online banking websites" +requires-python = ">=3.10" +license = {text = "CC0"} +classifiers = [ + "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "Programming Language :: Python :: 3", +] +dependencies = [ + "playwright~=1.32.1", + "requests~=2.29.0", +] +dynamic = ["version"] + +[project.scripts] +xactfetch = "xactfetch:main" + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.pyright] +venvPath = '.' +venv = '.venv' diff --git a/xactfetch.py b/xactfetch.py new file mode 100644 index 0000000..5654654 --- /dev/null +++ b/xactfetch.py @@ -0,0 +1,603 @@ +import contextlib +import copy +import datetime +import json +import logging +import os +import random +import shlex +import shutil +import subprocess +import tempfile +from pathlib import Path +from types import TracebackType +from typing import Any, Optional, Type + +import requests +from playwright.sync_api import Page +from playwright.sync_api import TimeoutError as PlaywrightTimeout +from playwright.sync_api import sync_playwright + + +log = logging.getLogger('xactfetch') + + +NTFY_URL = 'https://ntfy.pyrocufflink.net' +NTFY_TOPIC = 'dustin' +FIREFLY_III_URL = 'https://firefly.pyrocufflink.blue' +ACCOUNTS = { + 'commerce': { + '8357': 1, + '7730': 67, + }, + 'chase': 15, +} + + +def ntfy( + message: str, + topic: str = NTFY_TOPIC, + title: Optional[str] = None, + tags: Optional[str] = None, +) -> None: + headers = { + 'Title': title or 'xactfetch', + } + if tags: + headers['Tags'] = tags + r = requests.post( + f'{NTFY_URL}/{topic}', + headers=headers, + data=message, + ) + r.raise_for_status() + + +def rbw_unlocked() -> bool: + log.debug('Checking if rbw vault is locked') + cmd = ['rbw', 'unlocked'] + p = subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL) + unlocked = p.returncode == 0 + log.info('rbw vault is %s', 'unlocked' if unlocked else 'locked') + return unlocked + + +def rbw_get( + name: str, folder: Optional[str] = None, username: Optional[str] = None +) -> str: + log.info( + 'Getting password for Bitwarden vault item ' + '%s (folder: %s, username: %s)', + name, + folder, + username, + ) + cmd = ['rbw', 'get'] + if folder is not None: + cmd += ('--folder', folder) + cmd.append(name) + if username is not None: + cmd.append(username) + p = subprocess.run(cmd, check=True, capture_output=True, encoding='utf-8') + assert p.stdout is not None + return p.stdout.rstrip('\n') + + +def rbw_code( + name: str, folder: Optional[str] = None, username: Optional[str] = None +) -> str: + log.info( + 'Getting OTP code for Bitwarden vault item ' + '%s (folder: %s, username: %s)', + name, + folder, + username, + ) + cmd = ['rbw', 'code'] + if folder is not None: + cmd += ('--folder', folder) + cmd.append(name) + if username is not None: + cmd.append(username) + p = subprocess.run(cmd, check=True, capture_output=True, encoding='utf-8') + assert p.stdout is not None + return p.stdout.rstrip('\n') + + +def firefly_import(csv: Path, config: dict[str, Any], token: str) -> None: + log.debug('Importing transactions from %s to Firefly III', csv) + env = { + 'PATH': os.environ['PATH'], + 'FIREFLY_III_ACCESS_TOKEN': token, + 'IMPORT_DIR_ALLOWLIST': '/import', + 'FIREFLY_III_URL': FIREFLY_III_URL, + 'WEB_SERVER': 'false', + } + with tempfile.TemporaryDirectory() as tmpdir: + dest = Path(tmpdir) / 'import.csv' + log.debug('Copying %s to %s', csv, dest) + shutil.copyfile(csv, dest) + configfile = dest.with_suffix('.json') + log.debug('Saving config as %s', configfile) + with configfile.open('w', encoding='utf-8') as f: + json.dump(config, f) + cmd = [ + 'podman', + 'run', + '--rm', + '-it', + '-v', + f'{tmpdir}:/import:ro,z', + '--env-host', + 'docker.io/fireflyiii/data-importer', + ] + if log.isEnabledFor(logging.DEBUG): + log.debug( + 'Running command: %s', + ' '.join(shlex.quote(str(a)) for a in cmd), + ) + if os.environ.get('DEBUG_SKIP_IMPORT'): + cmd = ['true'] + p = subprocess.run(cmd, env=env, check=False) + if p.returncode == 0: + log.info( + 'Successfully imported transactions from %s to Firefly III', + csv, + ) + else: + log.error('Failed to import transactions from %s') + + +def get_last_transaction_date(key: int, token: str) -> datetime.date: + url = f'{FIREFLY_III_URL}/api/v1/accounts/{key}/transactions' + r = requests.get( + url, + headers={ + 'Authorization': f'Bearer {token}', + 'Accept': 'application/vnd.api+json', + }, + ) + r.raise_for_status() + last_date = datetime.datetime.min + for xact in r.json()['data']: + for split in xact['attributes']['transactions']: + try: + datestr = split['date'].split('T')[0] + date = datetime.datetime.fromisoformat(datestr) + except (KeyError, ValueError) as e: + log.warning( + 'Could not parse date from transaction %s: %s', + xact['id'], + e, + ) + continue + if date > last_date: + last_date = date + return last_date.date() + datetime.timedelta(days=1) + + +def download_chase(page: Page, end_date: datetime.date, token: str) -> None: + with Chase(page) as c: + c.login() + key = ACCOUNTS['chase'] + try: + start_date = get_last_transaction_date(key, token) + except (OSError, ValueError) as e: + log.error( + 'Skipping Chase account: could not get last transaction: %s', + e, + ) + return + if start_date >= end_date: + log.info( + 'Skipping Chase account: last transaction was %s', + start_date, + ) + return + csv = c.download_transactions(start_date, end_date) + log.info('Importing transactions from Chase into Firefly III') + c.firefly_import(csv, key, token) + + +def download_commerce(page: Page, end_date: datetime.date, token: str) -> None: + log.info('Downloading transaction lists from Commerce Bank') + csvs = [] + with CommerceBank(page) as c: + c.login() + for name, key in ACCOUNTS['commerce'].items(): + try: + start_date = get_last_transaction_date(key, token) + except (OSError, ValueError) as e: + log.error( + 'Skipping account %s: could not get last transaction: %s', + name, + e, + ) + continue + if start_date >= end_date: + log.info( + 'Skipping account %s: last transaction was %s', + name, + start_date, + ) + continue + log.info( + 'Getting transactions since %s for account xxx%s', + start_date, + name, + ) + c.open_account(name) + csvs.append((key, c.download_transactions(start_date, end_date))) + log.info('Importing transactions from Commerce Bank into Firefly III') + for key, csv in csvs: + c.firefly_import(csv, key, token) + + +class CommerceBank: + URL = 'https://banking.commercebank.com/CBI/Auth/Login' + IMPORT_CONFIG = { + 'version': 3, + 'source': 'fidi-1.2.2', + 'created_at': '2023-04-27T08:05:10-05:00', + 'date': 'n/j/Y', + 'delimiter': 'comma', + 'headers': True, + 'rules': True, + 'skip_form': False, + 'add_import_tag': True, + 'roles': [ + 'date_transaction', + 'internal_reference', + 'description', + 'amount_debit', + 'amount_credit', + ], + 'do_mapping': [ + False, + False, + False, + False, + False, + ], + 'mapping': [], + 'duplicate_detection_method': 'classic', + 'ignore_duplicate_lines': False, + 'unique_column_index': 0, + 'unique_column_type': 'internal_reference', + 'flow': 'file', + 'identifier': '0', + 'connection': '0', + 'ignore_spectre_categories': False, + 'map_all_data': False, + 'accounts': [], + 'date_range': '', + 'date_range_number': 30, + 'date_range_unit': 'd', + 'date_not_before': '', + 'date_not_after': '', + 'nordigen_country': '', + 'nordigen_bank': '', + 'nordigen_requisitions': [], + 'nordigen_max_days': '90', + 'conversion': False, + 'ignore_duplicate_transactions': True, + } + + def __init__(self, page: Page) -> None: + self.page = page + self.username = 'admiraln3mo' + self.vault_item = 'Commerce Bank' + self.vault_folder = 'Websites' + self._logged_in = False + + def __enter__(self) -> 'CommerceBank': + return self + + def __exit__( + self, + exc_type: Optional[Type[Exception]], + exc_value: Optional[Exception], + tb: Optional[TracebackType], + ) -> None: + self.logout() + + def login(self) -> None: + log.debug('Navigating to %s', self.URL) + self.page.goto(self.URL) + password = rbw_get(self.vault_item, self.vault_folder, self.username) + log.debug('Filling username/password login form') + self.page.get_by_role('textbox', name='Customer ID').fill( + self.username + ) + self.page.get_by_role('textbox', name='Password').fill(password) + self.page.get_by_role('button', name='Log In').click() + log.debug('Waiting for OTP 2FA form') + otp_input = self.page.locator('id=securityCodeInput') + otp_input.wait_for() + self.page.wait_for_timeout(random.randint(1000, 3000)) + log.debug('Filling OTP 2FA form') + otp = rbw_code(self.vault_item, self.vault_folder, self.username) + otp_input.fill(otp) + with self.page.expect_event('load'): + self.page.get_by_role('button', name='Continue').click() + log.debug('Waiting for page load') + self.page.wait_for_load_state() + log.info('Successfully logged in to Commerce Bank') + self._logged_in = True + + def logout(self) -> None: + if not self._logged_in: + return + log.debug('Logging out of Commerce Bank') + with self.page.expect_event('load'): + self.page.get_by_test_id('navWrap').get_by_text('Logout').click() + log.info('Logged out of Commerce Bank') + + def open_account(self, account: str) -> None: + log.debug('Navigating to activity page for account %s', account) + if '/Activity/' in self.page.url: + self.page.get_by_role('button', name='My Accounts').click() + with self.page.expect_event('load'): + self.page.get_by_role('link', name=account).click() + self.page.wait_for_load_state() + self.page.wait_for_timeout(random.randint(1000, 3000)) + log.info('Loaded activity page for account %s', account) + + def download_transactions( + self, from_date: datetime.date, to_date: datetime.date + ) -> Path: + log.info('Downloading transactions from %s to %s', from_date, to_date) + idx = self.page.url.rstrip('/').split('/')[-1] + href = ( + f'Download.ashx?Index={idx}' + f'&From={from_date}&To={to_date}' + f'&Type=csv' + '&DurationOfMonths=6' + ) + log.debug('Navigating to %s', href) + with self.page.expect_download() as di: + self.page.evaluate(f'window.location.href = "{href}";') + log.debug('Waiting for download to complete') + self.page.wait_for_timeout(random.randint(1000, 3000)) + path = di.value.path() + assert path + log.info('Downloaded transactions to %s', path) + return path + + def firefly_import(self, csv: Path, account: int, token: str) -> None: + config = copy.deepcopy(self.IMPORT_CONFIG) + config['default_account'] = account + firefly_import(csv, config, token) + + +class Chase: + URL = 'https://secure26ea.chase.com/web/auth/dashboard' + IMPORT_CONFIG = { + 'version': 3, + 'source': 'fidi-1.2.2', + 'created_at': '2023-04-27T09:54:42-05:00', + 'date': 'n/j/Y', + 'delimiter': 'comma', + 'headers': True, + 'rules': True, + 'skip_form': False, + 'add_import_tag': True, + 'roles': [ + 'date_transaction', + 'date_process', + 'description', + 'tags-comma', + '_ignore', + 'amount', + 'note', + ], + 'do_mapping': [False, False, False, True, False, False, False], + 'mapping': [], + 'duplicate_detection_method': 'classic', + 'ignore_duplicate_lines': True, + 'unique_column_index': 0, + 'unique_column_type': 'internal_reference', + 'flow': 'file', + 'identifier': '0', + 'connection': '0', + 'ignore_spectre_categories': False, + 'map_all_data': True, + 'accounts': [], + 'date_range': '', + 'date_range_number': 30, + 'date_range_unit': 'd', + 'date_not_before': '', + 'date_not_after': '', + 'nordigen_country': '', + 'nordigen_bank': '', + 'nordigen_requisitions': [], + 'nordigen_max_days': '90', + 'conversion': False, + 'ignore_duplicate_transactions': True, + } + + def __init__(self, page: Page) -> None: + self.page = page + self.username = 'AdmiralN3mo' + self.vault_item = 'Chase' + self.vault_folder = 'Websites' + self.saved_cookies = Path('cookies.json') + self._logged_in = False + + def __enter__(self) -> 'Chase': + self.load_cookies() + return self + + def __exit__( + self, + exc_type: Optional[Type[Exception]], + exc_value: Optional[Exception], + tb: Optional[TracebackType], + ) -> None: + try: + self.logout() + finally: + self.save_cookies() + + def load_cookies(self) -> None: + log.debug('Loading saved cookies from %s', self.saved_cookies) + try: + with self.saved_cookies.open(encoding='utf-8') as f: + self.page.context.add_cookies(json.load(f)) + except: + log.warning( + 'Could not load saved cookies, ' + 'SMS verification will be required!' + ) + else: + log.info('Successfully loaded saved cookies') + + def save_cookies(self) -> None: + log.debug('Saving cookies from %s', self.saved_cookies) + try: + with self.saved_cookies.open('w', encoding='utf-8') as f: + f.write(json.dumps(self.page.context.cookies())) + except Exception as e: + log.error('Failed to save cookies: %s', e) + else: + log.info('Successfully saved cookies to %s', self.saved_cookies) + + def login(self) -> None: + log.debug('Navigating to %s', self.URL) + self.page.goto(self.URL) + self.page.wait_for_load_state() + self.page.wait_for_timeout(random.randint(2000, 4000)) + password = rbw_get(self.vault_item, self.vault_folder, self.username) + log.debug('Filling username/password login form') + self.page.frame_locator('#logonbox').locator( + 'input[name=userId]' + ).fill(self.username) + self.page.frame_locator('#logonbox').locator( + 'input[name=password]' + ).fill(password) + self.page.wait_for_timeout(random.randint(500, 750)) + self.page.frame_locator('#logonbox').get_by_role( + 'button', name='Sign in' + ).click() + log.debug('Waiting for page load') + self.page.wait_for_load_state() + self.page.get_by_text('Amazon Rewards points').wait_for(timeout=60000) + self.page.get_by_role('button', name='Open an account').wait_for() + log.info('Successfully logged in to Chase') + self._logged_in = True + + def download_transactions( + self, from_date: datetime.date, to_date: datetime.date + ) -> Path: + log.info('Downloading transactions from %s to %s', from_date, to_date) + fmt = '%m/%d/%Y' + log.debug('Waiting for page to load completely') + self.page.get_by_role('link', name='Sort Options').wait_for() + self.page.wait_for_timeout(random.randint(1500, 2500)) + s = self.page.locator('#downloadActivityIcon') + s.wait_for() + self.page.wait_for_timeout(random.randint(1500, 2500)) + log.debug('Clicking "Download account activity" button') + s.click() + log.debug('Waiting for page to load') + s = self.page.locator('button#select-downloadActivityOptionId') + s.wait_for() + log.debug('Filling account activity download form') + s.click() + self.page.get_by_text('Choose a date range').nth(1).locator( + '../..' + ).click() + self.page.wait_for_timeout(random.randint(500, 1500)) + self.page.locator('#accountActivityFromDate-input-input').fill( + from_date.strftime(fmt) + ) + self.page.locator('#accountActivityFromDate-input-input').blur() + self.page.wait_for_timeout(random.randint(500, 1500)) + self.page.locator('#accountActivityToDate-input-input').fill( + to_date.strftime(fmt) + ) + self.page.locator('#accountActivityToDate-input-input').blur() + self.page.wait_for_timeout(random.randint(500, 1500)) + with self.page.expect_download() as di: + self.page.get_by_role( + 'button', name='Download', exact=True + ).click() + log.debug('Waiting for download to complete') + self.page.wait_for_timeout(random.randint(1000, 2500)) + path = di.value.path() + assert path + log.info('Downloaded transactions to %s', path) + return path + + def logout(self) -> None: + if not self._logged_in: + return + log.debug('Logging out of Chase') + with self.page.expect_event('load'): + self.page.get_by_role('button', name='Sign out').click() + log.info('Logged out of Chase') + + def firefly_import(self, csv: Path, account: int, token: str) -> None: + config = copy.deepcopy(self.IMPORT_CONFIG) + config['default_account'] = account + firefly_import(csv, config, token) + + +@contextlib.contextmanager +def screenshot_failure(page: Page): + try: + yield + except Exception: + log.exception('Failed to download transactions:') + now = datetime.datetime.now() + filename = now.strftime('screenshot_%Y%m%d%H%M%S.png') + log.debug('Saving browser screenshot to %s', filename) + try: + screenshot = page.screenshot() + with open(filename, 'wb') as f: + f.write(screenshot) + except Exception as e: + log.error('Failed to save browser screenshot: %s', e) + else: + log.error('Browser screenshot saved as %s', filename) + raise + + +def main() -> None: + logging.basicConfig(level=logging.DEBUG) + if not rbw_unlocked(): + ntfy( + 'xactfetch needs you to unlock the rbw vault', + tags='closed_lock_with_key', + ) + log.debug('Getting Firefly III access token from rbw vault') + token = rbw_get('xactfetch') + end_date = datetime.date.today() - datetime.timedelta(days=1) + with sync_playwright() as pw: + headless = os.environ.get('DEBUG_HEADLESS_BROWSER', '1') == '1' + browser = pw.firefox.launch(headless=headless) + page = browser.new_page() + failed = False + try: + with screenshot_failure(page): + download_commerce(page, end_date, token) + except Exception: + ntfy( + 'Downloading transactions from Commerce Bank failed', + tags='warning', + ) + failed = True + try: + with screenshot_failure(page): + download_chase(page, end_date, token) + except Exception: + ntfy( + 'Downloading transactions from Chase failed', + tags='warning', + ) + failed = True + raise SystemExit(1 if failed else 0) + + +if __name__ == '__main__': + main()